summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap10
-rw-r--r--CREDITS6
-rw-r--r--Documentation/ABI/obsolete/procfs-i8k10
-rw-r--r--Documentation/ABI/stable/sysfs-block49
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-cpu4
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-hpre178
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-sec146
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-zip146
-rw-r--r--Documentation/ABI/testing/sysfs-class-hwmon8
-rw-r--r--Documentation/ABI/testing/sysfs-class-power1
-rw-r--r--Documentation/ABI/testing/sysfs-class-thermal2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu7
-rw-r--r--Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing6
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs54
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-damon274
-rw-r--r--Documentation/Makefile2
-rw-r--r--Documentation/accounting/psi.rst3
-rw-r--r--Documentation/admin-guide/acpi/fan_performance_states.rst28
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst20
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst5
-rw-r--r--Documentation/admin-guide/gpio/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst50
-rw-r--r--Documentation/admin-guide/index.rst1
-rw-r--r--Documentation/admin-guide/iostats.rst6
-rw-r--r--Documentation/admin-guide/kdump/vmcoreinfo.rst8
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt74
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst380
-rw-r--r--Documentation/admin-guide/mm/pagemap.rst2
-rw-r--r--Documentation/admin-guide/mm/zswap.rst22
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst26
-rw-r--r--Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst60
-rw-r--r--Documentation/admin-guide/pm/working-state.rst1
-rw-r--r--Documentation/admin-guide/reporting-issues.rst73
-rw-r--r--Documentation/admin-guide/reporting-regressions.rst451
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst90
-rw-r--r--Documentation/arm/marvell.rst2
-rw-r--r--Documentation/arm64/booting.rst10
-rw-r--r--Documentation/arm64/elf_hwcaps.rst5
-rw-r--r--Documentation/arm64/memory-tagging-extension.rst54
-rw-r--r--Documentation/arm64/silicon-errata.rst16
-rw-r--r--Documentation/asm-annotations.rst11
-rw-r--r--Documentation/block/biodoc.rst1164
-rw-r--r--Documentation/block/capability.rst2
-rw-r--r--Documentation/block/index.rst1
-rw-r--r--Documentation/cdrom/packet-writing.rst4
-rw-r--r--Documentation/conf.py131
-rw-r--r--Documentation/core-api/entry.rst279
-rw-r--r--Documentation/core-api/index.rst8
-rw-r--r--Documentation/core-api/mm-api.rst19
-rw-r--r--Documentation/core-api/pin_user_pages.rst18
-rw-r--r--Documentation/cpu-freq/cpu-drivers.rst3
-rw-r--r--Documentation/dev-tools/kfence.rst12
-rw-r--r--Documentation/dev-tools/kselftest.rst8
-rw-r--r--Documentation/dev-tools/ktap.rst49
-rw-r--r--Documentation/dev-tools/kunit/usage.rst2
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt3
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.yaml2
-rw-r--r--Documentation/devicetree/bindings/arm/qcom.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/qoriq-clock.txt1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml19
-rw-r--r--Documentation/devicetree/bindings/extcon/maxim,max77843.yaml40
-rw-r--r--Documentation/devicetree/bindings/gpio/sifive,gpio.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml68
-rw-r--r--Documentation/devicetree/bindings/hwmon/national,lm90.yaml4
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml114
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml31
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml98
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml96
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml33
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml1
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,cru.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/google,cros-ec.yaml31
-rw-r--r--Documentation/devicetree/bindings/mfd/max14577.txt147
-rw-r--r--Documentation/devicetree/bindings/mfd/max77802.txt25
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max14577.yaml195
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77802.yaml194
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77843.yaml144
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml6
-rw-r--r--Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml7
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml3
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml84
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sifive.yaml1
-rw-r--r--Documentation/devicetree/bindings/regulator/max77802.txt111
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max14577.yaml78
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77802.yaml85
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77843.yaml65
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max8973.yaml5
-rw-r--r--Documentation/devicetree/bindings/regulator/pfuze100.yaml6
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml2
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml141
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62360.yaml98
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62864.yaml63
-rw-r--r--Documentation/devicetree/bindings/regulator/tps62360-regulator.txt44
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml1
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml1
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml107
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml58
-rw-r--r--Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml52
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rspi.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml33
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi.yaml188
-rw-r--r--Documentation/devicetree/bindings/spi/spi-controller.yaml7
-rw-r--r--Documentation/devicetree/bindings/spi/spi-mt65xx.txt68
-rw-r--r--Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml31
-rw-r--r--Documentation/devicetree/bindings/spi/spi-pl022.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/spi-samsung.txt122
-rw-r--r--Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt33
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml78
-rw-r--r--Documentation/devicetree/bindings/thermal/exynos-thermal.txt106
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-lmh.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml184
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml150
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt36
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml5
-rw-r--r--Documentation/devicetree/bindings/usb/dwc2.yaml1
-rw-r--r--Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml2
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml2
-rw-r--r--Documentation/driver-api/gpio/board.rst21
-rw-r--r--Documentation/driver-api/mtd/index.rst2
-rw-r--r--Documentation/driver-api/mtd/spi-intel.rst (renamed from Documentation/driver-api/mtd/intel-spi.rst)8
-rw-r--r--Documentation/driver-api/serial/driver.rst2
-rw-r--r--Documentation/driver-api/thermal/index.rst1
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst272
-rw-r--r--Documentation/filesystems/caching/netfs-api.rst7
-rw-r--r--Documentation/filesystems/dax.rst6
-rw-r--r--Documentation/filesystems/erofs.rst2
-rw-r--r--Documentation/filesystems/ext4/blocks.rst2
-rw-r--r--Documentation/filesystems/fscrypt.rst25
-rw-r--r--Documentation/filesystems/locking.rst48
-rw-r--r--Documentation/filesystems/netfs_library.rst16
-rw-r--r--Documentation/filesystems/porting.rst6
-rw-r--r--Documentation/filesystems/vfs.rst62
-rw-r--r--Documentation/firmware-guide/acpi/enumeration.rst111
-rw-r--r--Documentation/firmware-guide/acpi/gpio-properties.rst26
-rw-r--r--Documentation/gpu/todo.rst24
-rw-r--r--Documentation/hwmon/aquacomputer_d5next.rst49
-rw-r--r--Documentation/hwmon/asus_ec_sensors.rst54
-rw-r--r--Documentation/hwmon/dell-smm-hwmon.rst180
-rw-r--r--Documentation/hwmon/index.rst3
-rw-r--r--Documentation/hwmon/lm70.rst7
-rw-r--r--Documentation/hwmon/max6639.rst2
-rw-r--r--Documentation/hwmon/pli1209bc.rst75
-rw-r--r--Documentation/hwmon/sch5627.rst4
-rw-r--r--Documentation/hwmon/sysfs-interface.rst4
-rw-r--r--Documentation/hwmon/tmp464.rst73
-rw-r--r--Documentation/hwmon/xdpe12284.rst12
-rw-r--r--Documentation/index.rst1
-rw-r--r--Documentation/kernel-hacking/locking.rst2
-rw-r--r--Documentation/locking/locktypes.rst2
-rw-r--r--Documentation/process/applying-patches.rst28
-rw-r--r--Documentation/process/deprecated.rst20
-rw-r--r--Documentation/process/handling-regressions.rst746
-rw-r--r--Documentation/process/index.rst2
-rw-r--r--Documentation/process/researcher-guidelines.rst143
-rw-r--r--Documentation/process/submitting-patches.rst3
-rw-r--r--Documentation/scheduler/index.rst2
-rw-r--r--Documentation/scheduler/sched-debug.rst54
-rw-r--r--Documentation/scheduler/sched-domains.rst8
-rw-r--r--Documentation/scheduler/schedutil.rst (renamed from Documentation/scheduler/schedutil.txt)30
-rw-r--r--Documentation/security/SCTP.rst26
-rw-r--r--Documentation/security/keys/trusted-encrypted.rst25
-rw-r--r--Documentation/sphinx/kerneldoc-preamble.sty226
-rw-r--r--Documentation/sphinx/kfigure.py134
-rw-r--r--Documentation/spi/pxa2xx.rst3
-rw-r--r--Documentation/tools/index.rst20
-rw-r--r--Documentation/tools/rtla/common_hist_options.rst2
-rw-r--r--Documentation/tools/rtla/common_options.rst19
-rw-r--r--Documentation/tools/rtla/common_osnoise_description.rst2
-rw-r--r--Documentation/tools/rtla/common_osnoise_options.rst10
-rw-r--r--Documentation/tools/rtla/common_timerlat_options.rst12
-rw-r--r--Documentation/tools/rtla/index.rst26
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-hist.rst2
-rw-r--r--Documentation/trace/osnoise-tracer.rst4
-rw-r--r--Documentation/translations/conf.py12
-rw-r--r--Documentation/translations/ja_JP/index.rst4
-rw-r--r--Documentation/translations/ko_KR/index.rst5
-rw-r--r--Documentation/translations/zh_CN/accounting/delay-accounting.rst62
-rw-r--r--Documentation/translations/zh_CN/admin-guide/index.rst124
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst28
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst232
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst132
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst286
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/index.rst49
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/ksm.rst148
-rw-r--r--Documentation/translations/zh_CN/core-api/index.rst2
-rw-r--r--Documentation/translations/zh_CN/core-api/rbtree.rst391
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst2
-rw-r--r--Documentation/translations/zh_CN/devicetree/index.rst50
-rw-r--r--Documentation/translations/zh_CN/devicetree/of_unittest.rst189
-rw-r--r--Documentation/translations/zh_CN/devicetree/usage-model.rst330
-rw-r--r--Documentation/translations/zh_CN/index.rst21
-rw-r--r--Documentation/translations/zh_CN/peci/index.rst26
-rw-r--r--Documentation/translations/zh_CN/peci/peci.rst54
-rw-r--r--Documentation/translations/zh_CN/power/energy-model.rst190
-rw-r--r--Documentation/translations/zh_CN/power/index.rst56
-rw-r--r--Documentation/translations/zh_CN/power/opp.rst341
-rw-r--r--Documentation/translations/zh_CN/riscv/index.rst1
-rw-r--r--Documentation/translations/zh_CN/riscv/vm-layout.rst67
-rw-r--r--Documentation/translations/zh_CN/scheduler/index.rst9
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-energy.rst351
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-nice-design.rst99
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-stats.rst156
-rw-r--r--Documentation/translations/zh_CN/vm/active_mm.rst85
-rw-r--r--Documentation/translations/zh_CN/vm/balance.rst81
-rw-r--r--Documentation/translations/zh_CN/vm/damon/api.rst32
-rw-r--r--Documentation/translations/zh_CN/vm/damon/design.rst139
-rw-r--r--Documentation/translations/zh_CN/vm/damon/faq.rst48
-rw-r--r--Documentation/translations/zh_CN/vm/damon/index.rst33
-rw-r--r--Documentation/translations/zh_CN/vm/free_page_reporting.rst38
-rw-r--r--Documentation/translations/zh_CN/vm/highmem.rst128
-rw-r--r--Documentation/translations/zh_CN/vm/index.rst53
-rw-r--r--Documentation/translations/zh_CN/vm/ksm.rst70
-rw-r--r--Documentation/translations/zh_TW/index.rst4
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst5
-rw-r--r--Documentation/virt/kvm/api.rst20
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst6
-rw-r--r--Documentation/vm/damon/design.rst43
-rw-r--r--Documentation/vm/damon/faq.rst2
-rw-r--r--Documentation/vm/page_owner.rst10
-rw-r--r--Documentation/vm/page_table_check.rst2
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--Documentation/x86/intel-hfi.rst72
-rw-r--r--Documentation/x86/sva.rst53
-rw-r--r--MAINTAINERS314
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig80
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/configs/defconfig1
-rw-r--r--arch/alpha/include/asm/pgtable.h1
-rw-r--r--arch/alpha/include/asm/xor.h53
-rw-r--r--arch/alpha/mm/init.c6
-rw-r--r--arch/arc/include/asm/hugepage.h1
-rw-r--r--arch/arc/include/asm/pgtable-levels.h1
-rw-r--r--arch/arm/Kconfig5
-rw-r--r--arch/arm/boot/dts/Makefile1
-rw-r--r--arch/arm/boot/dts/am335x-wega.dtsi2
-rw-r--r--arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi2
-rw-r--r--arch/arm/boot/dts/bcm2711.dtsi1
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi5
-rw-r--r--arch/arm/boot/dts/dra7.dtsi28
-rw-r--r--arch/arm/boot/dts/imx23-evk.dts1
-rw-r--r--arch/arm/boot/dts/imx6qdl-udoo.dtsi5
-rw-r--r--arch/arm/boot/dts/imx7ulp.dtsi2
-rw-r--r--arch/arm/boot/dts/meson.dtsi8
-rw-r--r--arch/arm/boot/dts/meson8.dtsi24
-rw-r--r--arch/arm/boot/dts/meson8b.dtsi24
-rw-r--r--arch/arm/boot/dts/omap3-beagle-ab4.dts47
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts33
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-common.dtsi18
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts33
-rw-r--r--arch/arm/boot/dts/rk322x.dtsi4
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi2
-rw-r--r--arch/arm/boot/dts/spear320-hmi.dts1
-rw-r--r--arch/arm/boot/dts/ste-ux500-samsung-skomer.dts4
-rw-r--r--arch/arm/boot/dts/tegra124-nyan-big-fhd.dts10
-rw-r--r--arch/arm/boot/dts/tegra124-nyan-big.dts15
-rw-r--r--arch/arm/boot/dts/tegra124-nyan-blaze.dts15
-rw-r--r--arch/arm/boot/dts/tegra124-venice2.dts14
-rw-r--r--arch/arm/configs/davinci_all_defconfig1
-rw-r--r--arch/arm/configs/ezx_defconfig1
-rw-r--r--arch/arm/configs/imote2_defconfig1
-rw-r--r--arch/arm/configs/integrator_defconfig1
-rw-r--r--arch/arm/configs/iop32x_defconfig1
-rw-r--r--arch/arm/configs/keystone_defconfig1
-rw-r--r--arch/arm/configs/lart_defconfig1
-rw-r--r--arch/arm/configs/netwinder_defconfig1
-rw-r--r--arch/arm/configs/versatile_defconfig1
-rw-r--r--arch/arm/configs/viper_defconfig1
-rw-r--r--arch/arm/configs/zeus_defconfig1
-rw-r--r--arch/arm/crypto/aes-neonbs-core.S105
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c35
-rw-r--r--arch/arm/crypto/blake2s-shash.c4
-rw-r--r--arch/arm/include/asm/assembler.h12
-rw-r--r--arch/arm/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/arm/include/asm/pgtable-2level.h2
-rw-r--r--arch/arm/include/asm/processor.h1
-rw-r--r--arch/arm/include/asm/spectre.h38
-rw-r--r--arch/arm/include/asm/uaccess.h10
-rw-r--r--arch/arm/include/asm/vmlinux.lds.h43
-rw-r--r--arch/arm/include/asm/xor.h42
-rw-r--r--arch/arm/kernel/Makefile2
-rw-r--r--arch/arm/kernel/entry-armv.S79
-rw-r--r--arch/arm/kernel/entry-common.S24
-rw-r--r--arch/arm/kernel/kgdb.c36
-rw-r--r--arch/arm/kernel/spectre.c71
-rw-r--r--arch/arm/kernel/traps.c65
-rw-r--r--arch/arm/lib/xor-neon.c12
-rw-r--r--arch/arm/mach-mstar/Kconfig1
-rw-r--r--arch/arm/mach-omap2/display.c2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c4
-rw-r--r--arch/arm/mach-pxa/corgi.c26
-rw-r--r--arch/arm/mach-pxa/hx4700.c10
-rw-r--r--arch/arm/mach-pxa/icontrol.c26
-rw-r--r--arch/arm/mach-pxa/littleton.c10
-rw-r--r--arch/arm/mach-pxa/magician.c12
-rw-r--r--arch/arm/mach-pxa/poodle.c14
-rw-r--r--arch/arm/mach-pxa/spitz.c26
-rw-r--r--arch/arm/mach-pxa/stargate2.c20
-rw-r--r--arch/arm/mach-pxa/z2.c20
-rw-r--r--arch/arm/mach-s3c/Kconfig12
-rw-r--r--arch/arm/mach-s3c/devs.c77
-rw-r--r--arch/arm/mach-s3c/mach-crag6410-module.c13
-rw-r--r--arch/arm/mach-s3c/mach-crag6410.c13
-rw-r--r--arch/arm/mach-s3c/setup-spi-s3c64xx.c9
-rw-r--r--arch/arm/mach-s3c/spi-core-s3c24xx.h6
-rw-r--r--arch/arm/mach-socfpga/Kconfig2
-rw-r--r--arch/arm/mm/Kconfig11
-rw-r--r--arch/arm/mm/init.c43
-rw-r--r--arch/arm/mm/mmu.c2
-rw-r--r--arch/arm/mm/proc-v7-bugs.c208
-rw-r--r--arch/arm/probes/kprobes/Makefile3
-rw-r--r--arch/arm/vdso/Makefile2
-rw-r--r--arch/arm64/Kconfig123
-rw-r--r--arch/arm64/Kconfig.platforms3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts8
-rw-r--r--arch/arm64/boot/dts/apple/t8103.dtsi24
-rw-r--r--arch/arm64/boot/dts/arm/juno-base.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi24
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi24
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi24
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8ulp.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/mba8mx.dtsi2
-rw-r--r--arch/arm64/boot/dts/intel/socfpga_agilex.dtsi4
-rw-r--r--arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts8
-rw-r--r--arch/arm64/boot/dts/marvell/armada-37xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts5
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi28
-rw-r--r--arch/arm64/boot/dts/qcom/sm8450.dtsi8
-rw-r--r--arch/arm64/boot/dts/rockchip/px30.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi17
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi20
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts14
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2.dtsi22
-rw-r--r--arch/arm64/crypto/Kconfig2
-rw-r--r--arch/arm64/crypto/aes-glue.c22
-rw-r--r--arch/arm64/crypto/aes-modes.S18
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S264
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c97
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha512-armv8.pl2
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c28
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h64
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h23
-rw-r--r--arch/arm64/include/asm/archrandom.h45
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h3
-rw-r--r--arch/arm64/include/asm/assembler.h58
-rw-r--r--arch/arm64/include/asm/cpufeature.h33
-rw-r--r--arch/arm64/include/asm/cputype.h25
-rw-r--r--arch/arm64/include/asm/debug-monitors.h12
-rw-r--r--arch/arm64/include/asm/el2_setup.h2
-rw-r--r--arch/arm64/include/asm/fixmap.h6
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/insn-def.h14
-rw-r--r--arch/arm64/include/asm/insn.h81
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/linkage.h24
-rw-r--r--arch/arm64/include/asm/lse.h6
-rw-r--r--arch/arm64/include/asm/module.lds.h6
-rw-r--r--arch/arm64/include/asm/mte-def.h1
-rw-r--r--arch/arm64/include/asm/mte-kasan.h1
-rw-r--r--arch/arm64/include/asm/mte.h22
-rw-r--r--arch/arm64/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/arm64/include/asm/perf_event.h312
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h11
-rw-r--r--arch/arm64/include/asm/preempt.h19
-rw-r--r--arch/arm64/include/asm/processor.h1
-rw-r--r--arch/arm64/include/asm/rwonce.h4
-rw-r--r--arch/arm64/include/asm/sections.h5
-rw-r--r--arch/arm64/include/asm/spectre.h7
-rw-r--r--arch/arm64/include/asm/string.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h22
-rw-r--r--arch/arm64/include/asm/topology.h4
-rw-r--r--arch/arm64/include/asm/vectors.h73
-rw-r--r--arch/arm64/include/asm/xor.h21
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/cpu_errata.c64
-rw-r--r--arch/arm64/kernel/cpufeature.c254
-rw-r--r--arch/arm64/kernel/cpuidle.c6
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/crash_core.c6
-rw-r--r--arch/arm64/kernel/elfcore.c134
-rw-r--r--arch/arm64/kernel/entry-common.c31
-rw-r--r--arch/arm64/kernel/entry.S223
-rw-r--r--arch/arm64/kernel/idreg-override.c16
-rw-r--r--arch/arm64/kernel/image-vars.h4
-rw-r--r--arch/arm64/kernel/mte.c36
-rw-r--r--arch/arm64/kernel/perf_event.c10
-rw-r--r--arch/arm64/kernel/process.c3
-rw-r--r--arch/arm64/kernel/proton-pack.c400
-rw-r--r--arch/arm64/kernel/setup.c3
-rw-r--r--arch/arm64/kernel/signal.c11
-rw-r--r--arch/arm64/kernel/stacktrace.c5
-rw-r--r--arch/arm64/kernel/sys_compat.c1
-rw-r--r--arch/arm64/kernel/traps.c1
-rw-r--r--arch/arm64/kernel/vdso/Makefile5
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm64/kvm/arm.c57
-rw-r--r--arch/arm64/kvm/handle_exit.c8
-rw-r--r--arch/arm64/kvm/hyp/exception.c5
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S9
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h27
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/cache.S5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c14
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c18
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c16
-rw-r--r--arch/arm64/kvm/hypercalls.c12
-rw-r--r--arch/arm64/kvm/psci.c21
-rw-r--r--arch/arm64/kvm/sys_regs.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c17
-rw-r--r--arch/arm64/lib/clear_page.S5
-rw-r--r--arch/arm64/lib/copy_page.S5
-rw-r--r--arch/arm64/lib/crc32.S87
-rw-r--r--arch/arm64/lib/insn.c187
-rw-r--r--arch/arm64/lib/memchr.S5
-rw-r--r--arch/arm64/lib/memcmp.S6
-rw-r--r--arch/arm64/lib/memcpy.S21
-rw-r--r--arch/arm64/lib/memset.S12
-rw-r--r--arch/arm64/lib/mte.S4
-rw-r--r--arch/arm64/lib/strchr.S6
-rw-r--r--arch/arm64/lib/strcmp.S246
-rw-r--r--arch/arm64/lib/strlen.S6
-rw-r--r--arch/arm64/lib/strncmp.S241
-rw-r--r--arch/arm64/lib/strnlen.S6
-rw-r--r--arch/arm64/lib/strrchr.S5
-rw-r--r--arch/arm64/lib/xor-neon.c46
-rw-r--r--arch/arm64/mm/cache.S35
-rw-r--r--arch/arm64/mm/extable.c4
-rw-r--r--arch/arm64/mm/flush.c7
-rw-r--r--arch/arm64/mm/hugetlbpage.c39
-rw-r--r--arch/arm64/mm/init.c36
-rw-r--r--arch/arm64/mm/mmap.c17
-rw-r--r--arch/arm64/mm/mmu.c69
-rw-r--r--arch/arm64/mm/mteswap.c2
-rw-r--r--arch/arm64/mm/proc.S2
-rw-r--r--arch/arm64/net/bpf_jit.h11
-rw-r--r--arch/arm64/tools/Makefile10
-rw-r--r--arch/arm64/tools/cpucaps11
-rw-r--r--arch/csky/include/asm/pgtable.h1
-rw-r--r--arch/hexagon/include/asm/pgtable.h5
-rw-r--r--arch/hexagon/mm/init.c2
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/configs/zx1_defconfig1
-rw-r--r--arch/ia64/include/asm/pgtable.h1
-rw-r--r--arch/ia64/include/asm/thread_info.h6
-rw-r--r--arch/ia64/include/asm/xor.h21
-rw-r--r--arch/ia64/kernel/topology.c10
-rw-r--r--arch/ia64/mm/discontig.c11
-rw-r--r--arch/ia64/pci/fixup.c4
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/amiga/config.c1
-rw-r--r--arch/m68k/apollo/config.c1
-rw-r--r--arch/m68k/atari/config.c1
-rw-r--r--arch/m68k/atari/stdma.c1
-rw-r--r--arch/m68k/bvme6000/config.c2
-rw-r--r--arch/m68k/configs/amiga_defconfig7
-rw-r--r--arch/m68k/configs/apollo_defconfig7
-rw-r--r--arch/m68k/configs/atari_defconfig6
-rw-r--r--arch/m68k/configs/bvme6000_defconfig6
-rw-r--r--arch/m68k/configs/hp300_defconfig7
-rw-r--r--arch/m68k/configs/mac_defconfig6
-rw-r--r--arch/m68k/configs/multi_defconfig7
-rw-r--r--arch/m68k/configs/mvme147_defconfig6
-rw-r--r--arch/m68k/configs/mvme16x_defconfig6
-rw-r--r--arch/m68k/configs/q40_defconfig6
-rw-r--r--arch/m68k/configs/sun3_defconfig9
-rw-r--r--arch/m68k/configs/sun3x_defconfig9
-rw-r--r--arch/m68k/emu/nfblock.c1
-rw-r--r--arch/m68k/hp300/config.c1
-rw-r--r--arch/m68k/include/asm/cmpxchg.h9
-rw-r--r--arch/m68k/include/asm/config.h33
-rw-r--r--arch/m68k/include/asm/current.h4
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h1
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h1
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h1
-rw-r--r--arch/m68k/kernel/setup_mm.c24
-rw-r--r--arch/m68k/mac/config.c1
-rw-r--r--arch/m68k/mm/fault.c2
-rw-r--r--arch/m68k/mvme147/config.c2
-rw-r--r--arch/m68k/mvme16x/config.c2
-rw-r--r--arch/m68k/q40/config.c1
-rw-r--r--arch/microblaze/Kconfig2
-rw-r--r--arch/microblaze/include/asm/irq.h3
-rw-r--r--arch/microblaze/include/asm/pgtable.h3
-rw-r--r--arch/microblaze/kernel/irq.c16
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts15
-rw-r--r--arch/mips/cavium-octeon/octeon-memcpy.S2
-rw-r--r--arch/mips/configs/cobalt_defconfig1
-rw-r--r--arch/mips/configs/decstation_64_defconfig1
-rw-r--r--arch/mips/configs/decstation_defconfig1
-rw-r--r--arch/mips/configs/decstation_r4k_defconfig1
-rw-r--r--arch/mips/configs/ip22_defconfig1
-rw-r--r--arch/mips/configs/ip32_defconfig1
-rw-r--r--arch/mips/configs/jazz_defconfig1
-rw-r--r--arch/mips/configs/malta_defconfig1
-rw-r--r--arch/mips/configs/malta_kvm_defconfig1
-rw-r--r--arch/mips/configs/maltaup_xpa_defconfig1
-rw-r--r--arch/mips/configs/rm200_defconfig1
-rw-r--r--arch/mips/configs/tb0219_defconfig1
-rw-r--r--arch/mips/configs/tb0226_defconfig1
-rw-r--r--arch/mips/configs/tb0287_defconfig1
-rw-r--r--arch/mips/configs/workpad_defconfig1
-rw-r--r--arch/mips/include/asm/asm.h4
-rw-r--r--arch/mips/include/asm/ftrace.h4
-rw-r--r--arch/mips/include/asm/pgtable.h10
-rw-r--r--arch/mips/include/asm/r4kcache.h4
-rw-r--r--arch/mips/include/asm/unaligned-emul.h176
-rw-r--r--arch/mips/kernel/mips-r2-to-r6-emul.c104
-rw-r--r--arch/mips/kernel/r2300_fpu.S6
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/relocate_kernel.S22
-rw-r--r--arch/mips/kernel/scall32-o32.S10
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-n64.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S10
-rw-r--r--arch/mips/kernel/setup.c2
-rw-r--r--arch/mips/kernel/smp.c6
-rw-r--r--arch/mips/kernel/syscall.c8
-rw-r--r--arch/mips/kernel/topology.c5
-rw-r--r--arch/mips/kvm/mips.c50
-rw-r--r--arch/mips/kvm/vz.c12
-rw-r--r--arch/mips/lib/csum_partial.S4
-rw-r--r--arch/mips/lib/memcpy.S4
-rw-r--r--arch/mips/lib/memset.S2
-rw-r--r--arch/mips/lib/strncpy_user.S4
-rw-r--r--arch/mips/lib/strnlen_user.S2
-rw-r--r--arch/mips/loongson64/vbios_quirk.c9
-rw-r--r--arch/mips/ralink/mt7621.c36
-rw-r--r--arch/nds32/include/asm/pgtable.h1
-rw-r--r--arch/nds32/mm/init.c1
-rw-r--r--arch/nios2/include/asm/pgtable.h1
-rw-r--r--arch/openrisc/include/asm/pgtable.h1
-rw-r--r--arch/openrisc/mm/init.c2
-rw-r--r--arch/parisc/Kconfig14
-rw-r--r--arch/parisc/Makefile37
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig1
-rw-r--r--arch/parisc/include/asm/assembly.h6
-rw-r--r--arch/parisc/include/asm/bitops.h8
-rw-r--r--arch/parisc/include/asm/cache.h11
-rw-r--r--arch/parisc/include/asm/cacheflush.h15
-rw-r--r--arch/parisc/include/asm/current.h8
-rw-r--r--arch/parisc/include/asm/elf.h15
-rw-r--r--arch/parisc/include/asm/kprobes.h5
-rw-r--r--arch/parisc/include/asm/mmu.h6
-rw-r--r--arch/parisc/include/asm/mmu_context.h16
-rw-r--r--arch/parisc/include/asm/pgtable.h12
-rw-r--r--arch/parisc/include/asm/processor.h2
-rw-r--r--arch/parisc/include/asm/rt_sigframe.h10
-rw-r--r--arch/parisc/include/asm/special_insns.h4
-rw-r--r--arch/parisc/include/asm/tlbflush.h2
-rw-r--r--arch/parisc/include/asm/traps.h1
-rw-r--r--arch/parisc/include/asm/uaccess.h47
-rw-r--r--arch/parisc/include/asm/unistd.h4
-rw-r--r--arch/parisc/include/asm/vdso.h24
-rw-r--r--arch/parisc/include/uapi/asm/auxvec.h8
-rw-r--r--arch/parisc/kernel/Makefile5
-rw-r--r--arch/parisc/kernel/alternative.c10
-rw-r--r--arch/parisc/kernel/asm-offsets.c9
-rw-r--r--arch/parisc/kernel/cache.c137
-rw-r--r--arch/parisc/kernel/entry.S72
-rw-r--r--arch/parisc/kernel/kprobes.c28
-rw-r--r--arch/parisc/kernel/pci-dma.c4
-rw-r--r--arch/parisc/kernel/signal.c225
-rw-r--r--arch/parisc/kernel/signal32.h19
-rw-r--r--arch/parisc/kernel/topology.c4
-rw-r--r--arch/parisc/kernel/traps.c12
-rw-r--r--arch/parisc/kernel/unaligned.c269
-rw-r--r--arch/parisc/kernel/vdso.c122
-rw-r--r--arch/parisc/kernel/vdso32/Makefile53
-rwxr-xr-xarch/parisc/kernel/vdso32/gen_vdso_offsets.sh15
-rw-r--r--arch/parisc/kernel/vdso32/note.S26
-rw-r--r--arch/parisc/kernel/vdso32/restart_syscall.S32
-rw-r--r--arch/parisc/kernel/vdso32/sigtramp.S195
-rw-r--r--arch/parisc/kernel/vdso32/vdso32.lds.S111
-rw-r--r--arch/parisc/kernel/vdso32/vdso32_wrapper.S14
-rw-r--r--arch/parisc/kernel/vdso64/Makefile48
-rwxr-xr-xarch/parisc/kernel/vdso64/gen_vdso_offsets.sh15
-rw-r--r--arch/parisc/kernel/vdso64/note.S2
-rw-r--r--arch/parisc/kernel/vdso64/restart_syscall.S3
-rw-r--r--arch/parisc/kernel/vdso64/sigtramp.S166
-rw-r--r--arch/parisc/kernel/vdso64/vdso64.lds.S109
-rw-r--r--arch/parisc/kernel/vdso64/vdso64_wrapper.S14
-rw-r--r--arch/parisc/lib/iomap.c18
-rw-r--r--arch/parisc/lib/memcpy.c16
-rw-r--r--arch/parisc/mm/fault.c89
-rw-r--r--arch/parisc/mm/init.c9
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/configs/linkstation_defconfig1
-rw-r--r--arch/powerpc/configs/mvme5100_defconfig1
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h5
-rw-r--r--arch/powerpc/include/asm/fixmap.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h2
-rw-r--r--arch/powerpc/include/asm/kexec_ranges.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/mmu_context.h1
-rw-r--r--arch/powerpc/include/asm/nmi.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h4
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h8
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/syscall.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/include/asm/xor_altivec.h25
-rw-r--r--arch/powerpc/kernel/fadump.c8
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/interrupt_64.S2
-rw-r--r--arch/powerpc/kernel/sysfs.c17
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c2
-rw-r--r--arch/powerpc/lib/sstep.c2
-rw-r--r--arch/powerpc/lib/xor_vmx.c28
-rw-r--r--arch/powerpc/lib/xor_vmx.h27
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c32
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c10
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c1
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c59
-rw-r--r--arch/powerpc/mm/pgtable.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c29
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c29
-rw-r--r--arch/powerpc/perf/core-book3s.c75
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/Kconfig.erratas1
-rw-r--r--arch/riscv/Kconfig.socs4
-rw-r--r--arch/riscv/Makefile6
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi3
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig2
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h1
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c2
-rw-r--r--arch/riscv/kernel/entry.S10
-rw-r--r--arch/riscv/kernel/head.S11
-rw-r--r--arch/riscv/kernel/module.c21
-rw-r--r--arch/riscv/kernel/sbi.c72
-rw-r--r--arch/riscv/kernel/setup.c3
-rw-r--r--arch/riscv/kernel/stacktrace.c9
-rw-r--r--arch/riscv/kernel/trace_irq.c27
-rw-r--r--arch/riscv/kernel/trace_irq.h11
-rw-r--r--arch/riscv/kvm/vcpu.c48
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c3
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/extable.c6
-rw-r--r--arch/riscv/mm/init.c4
-rw-r--r--arch/riscv/mm/kasan_init.c8
-rw-r--r--arch/riscv/mm/physaddr.c4
-rw-r--r--arch/s390/Kconfig16
-rw-r--r--arch/s390/configs/debug_defconfig20
-rw-r--r--arch/s390/configs/defconfig16
-rw-r--r--arch/s390/configs/zfcpdump_defconfig3
-rw-r--r--arch/s390/hypfs/hypfs_vm.c6
-rw-r--r--arch/s390/include/asm/extable.h9
-rw-r--r--arch/s390/include/asm/ftrace.h10
-rw-r--r--arch/s390/include/asm/ptrace.h2
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/kernel/ftrace.c37
-rw-r--r--arch/s390/kernel/mcount.S9
-rw-r--r--arch/s390/kernel/module.c37
-rw-r--r--arch/s390/kernel/nmi.c27
-rw-r--r--arch/s390/kernel/numa.c7
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/test_modules.c32
-rw-r--r--arch/s390/lib/test_modules.h53
-rw-r--r--arch/s390/lib/test_modules_helpers.c13
-rw-r--r--arch/s390/lib/xor.c21
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/configs/ap325rxa_defconfig1
-rw-r--r--arch/sh/configs/ecovec24_defconfig1
-rw-r--r--arch/sh/configs/landisk_defconfig1
-rw-r--r--arch/sh/configs/sdk7780_defconfig1
-rw-r--r--arch/sh/configs/se7724_defconfig1
-rw-r--r--arch/sh/configs/sh03_defconfig1
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig1
-rw-r--r--arch/sh/configs/titan_defconfig1
-rw-r--r--arch/sh/include/asm/pgtable_32.h1
-rw-r--r--arch/sh/kernel/topology.c5
-rw-r--r--arch/sparc/include/asm/pgtable_32.h7
-rw-r--r--arch/sparc/include/asm/xor_32.h21
-rw-r--r--arch/sparc/include/asm/xor_64.h42
-rw-r--r--arch/sparc/kernel/sysfs.c12
-rw-r--r--arch/sparc/mm/hugetlbpage.c1
-rw-r--r--arch/um/include/asm/pgtable.h1
-rw-r--r--arch/um/os-Linux/execvp.c1
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig27
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/coco/Makefile6
-rw-r--r--arch/x86/coco/core.c (renamed from arch/x86/kernel/cc_platform.c)56
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S63
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S4
-rw-r--r--arch/x86/crypto/blake2s-shash.c4
-rw-r--r--arch/x86/crypto/blowfish_glue.c12
-rw-r--r--arch/x86/crypto/des3_ede_glue.c8
-rw-r--r--arch/x86/crypto/sm3-avx-asm_64.S517
-rw-r--r--arch/x86/crypto/sm3_avx_glue.c134
-rw-r--r--arch/x86/events/intel/core.c68
-rw-r--r--arch/x86/events/intel/ds.c14
-rw-r--r--arch/x86/events/intel/lbr.c172
-rw-r--r--arch/x86/events/intel/pt.c76
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore.h3
-rw-r--r--arch/x86/events/intel/uncore_discovery.c20
-rw-r--r--arch/x86/events/intel/uncore_discovery.h4
-rw-r--r--arch/x86/events/intel/uncore_snb.c214
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/events/rapl.c9
-rw-r--r--arch/x86/include/asm/bug.h20
-rw-r--r--arch/x86/include/asm/coco.h32
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/cpumask.h10
-rw-r--r--arch/x86/include/asm/disabled-features.h7
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/intel_ds.h5
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h16
-rw-r--r--arch/x86/include/asm/page.h10
-rw-r--r--arch/x86/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/pgtable.h13
-rw-r--r--arch/x86/include/asm/preempt.h10
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/set_memory.h2
-rw-r--r--arch/x86/include/asm/svm.h36
-rw-r--r--arch/x86/include/asm/topology.h16
-rw-r--r--arch/x86/include/asm/x86_init.h16
-rw-r--r--arch/x86/include/asm/xen/cpuid.h7
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h14
-rw-r--r--arch/x86/include/asm/xor.h42
-rw-r--r--arch/x86/include/asm/xor_32.h42
-rw-r--r--arch/x86/include/asm/xor_avx.h21
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c24
-rw-r--r--arch/x86/kernel/acpi/cppc.c103
-rw-r--r--arch/x86/kernel/acpi/cppc_msr.c49
-rw-r--r--arch/x86/kernel/acpi/sleep.c23
-rw-r--r--arch/x86/kernel/alternative.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c204
-rw-r--r--arch/x86/kernel/cpu/common.c79
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c9
-rw-r--r--arch/x86/kernel/cpu/mce/core.c15
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c41
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c59
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c10
-rw-r--r--arch/x86/kernel/e820.c41
-rw-r--r--arch/x86/kernel/fpu/core.c7
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.c5
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/kdebugfs.c37
-rw-r--r--arch/x86/kernel/ksysfs.c77
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/kvmclock.c3
-rw-r--r--arch/x86/kernel/module.c13
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/resource.c23
-rw-r--r--arch/x86/kernel/setup.c34
-rw-r--r--arch/x86/kernel/smpboot.c72
-rw-r--r--arch/x86/kernel/topology.c5
-rw-r--r--arch/x86/kernel/traps.c56
-rw-r--r--arch/x86/kernel/x86_init.c16
-rw-r--r--arch/x86/kvm/cpuid.c107
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/kvm_emulate.h6
-rw-r--r--arch/x86/kvm/lapic.c19
-rw-r--r--arch/x86/kvm/mmu/mmu.c15
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/pmu.h2
-rw-r--r--arch/x86/kvm/svm/avic.c93
-rw-r--r--arch/x86/kvm/svm/nested.c35
-rw-r--r--arch/x86/kvm/svm/sev.c9
-rw-r--r--arch/x86/kvm/svm/svm.c288
-rw-r--r--arch/x86/kvm/svm/svm.h22
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h12
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/evmcs.c4
-rw-r--r--arch/x86/kvm/vmx/evmcs.h48
-rw-r--r--arch/x86/kvm/vmx/nested.c93
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/vmcs12.c4
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h6
-rw-r--r--arch/x86/kvm/vmx/vmx.c97
-rw-r--r--arch/x86/kvm/vmx/vmx.h5
-rw-r--r--arch/x86/kvm/x86.c167
-rw-r--r--arch/x86/kvm/x86.h52
-rw-r--r--arch/x86/kvm/xen.c107
-rw-r--r--arch/x86/lib/memcpy_64.S10
-rw-r--r--arch/x86/lib/memmove_64.S4
-rw-r--r--arch/x86/lib/memset_64.S6
-rw-r--r--arch/x86/lib/retpoline.S2
-rw-r--r--arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--arch/x86/mm/ioremap.c57
-rw-r--r--arch/x86/mm/maccess.c7
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c72
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c12
-rw-r--r--arch/x86/mm/numa.c33
-rw-r--r--arch/x86/mm/pat/set_memory.c31
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/pci/fixup.c4
-rw-r--r--arch/x86/um/Kconfig1
-rw-r--r--arch/x86/um/user-offsets.c9
-rw-r--r--arch/x86/xen/enlighten_hvm.c22
-rw-r--r--arch/x86/xen/enlighten_pv.c4
-rw-r--r--arch/x86/xen/smp_pv.c26
-rw-r--r--arch/x86/xen/vga.c16
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/current.h2
-rw-r--r--arch/xtensa/include/asm/pgtable.h1
-rw-r--r--arch/xtensa/include/asm/stacktrace.h8
-rw-r--r--arch/xtensa/kernel/irq.c3
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c4
-rw-r--r--block/Kconfig13
-rw-r--r--block/Makefile3
-rw-r--r--block/bdev.c13
-rw-r--r--block/bfq-cgroup.c16
-rw-r--r--block/bfq-iosched.c41
-rw-r--r--block/bfq-iosched.h2
-rw-r--r--block/bfq-wf2q.c17
-rw-r--r--block/bio-integrity.c3
-rw-r--r--block/bio.c190
-rw-r--r--block/blk-cgroup-rwstat.h2
-rw-r--r--block/blk-cgroup.c20
-rw-r--r--block/blk-cgroup.h494
-rw-r--r--block/blk-core.c332
-rw-r--r--block/blk-crypto-fallback.c2
-rw-r--r--block/blk-crypto-internal.h12
-rw-r--r--block/blk-crypto-sysfs.c172
-rw-r--r--block/blk-crypto.c4
-rw-r--r--block/blk-flush.c4
-rw-r--r--block/blk-ia-ranges.c2
-rw-r--r--block/blk-iocost.c2
-rw-r--r--block/blk-iolatency.c4
-rw-r--r--block/blk-ioprio.c2
-rw-r--r--block/blk-lib.c46
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c33
-rw-r--r--block/blk-mq-debugfs.c6
-rw-r--r--block/blk-mq-debugfs.h2
-rw-r--r--block/blk-mq-sched.c18
-rw-r--r--block/blk-mq-sysfs.c16
-rw-r--r--block/blk-mq-tag.c6
-rw-r--r--block/blk-mq.c344
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-rq-qos.h20
-rw-r--r--block/blk-sysfs.c44
-rw-r--r--block/blk-throttle.c110
-rw-r--r--block/blk-throttle.h19
-rw-r--r--block/blk-zoned.c14
-rw-r--r--block/blk.h10
-rw-r--r--block/bounce.c11
-rw-r--r--block/disk-events.c2
-rw-r--r--block/elevator.c18
-rw-r--r--block/fops.c73
-rw-r--r--block/genhd.c81
-rw-r--r--block/holder.c2
-rw-r--r--block/partitions/check.h1
-rw-r--r--block/partitions/core.c1
-rw-r--r--block/partitions/efi.h1
-rw-r--r--block/partitions/ldm.h1
-rw-r--r--block/sed-opal.c2
-rw-r--r--certs/system_keyring.c44
-rw-r--r--crypto/Kconfig25
-rw-r--r--crypto/af_alg.c3
-rw-r--r--crypto/algapi.c49
-rw-r--r--crypto/api.c20
-rw-r--r--crypto/asymmetric_keys/Kconfig21
-rw-r--r--crypto/asymmetric_keys/Makefile12
-rw-r--r--crypto/asymmetric_keys/asym_tpm.c957
-rw-r--r--crypto/asymmetric_keys/pkcs7_verify.c13
-rw-r--r--crypto/asymmetric_keys/public_key.c126
-rw-r--r--crypto/asymmetric_keys/signature.c2
-rw-r--r--crypto/asymmetric_keys/tpm.asn15
-rw-r--r--crypto/asymmetric_keys/tpm_parser.c102
-rw-r--r--crypto/asymmetric_keys/x509.asn12
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c34
-rw-r--r--crypto/asymmetric_keys/x509_parser.h3
-rw-r--r--crypto/asymmetric_keys/x509_public_key.c24
-rw-r--r--crypto/async_tx/async_xor.c8
-rw-r--r--crypto/async_tx/raid6test.c4
-rw-r--r--crypto/authenc.c2
-rw-r--r--crypto/blake2s_generic.c4
-rw-r--r--crypto/cfb.c2
-rw-r--r--crypto/crypto_engine.c1
-rw-r--r--crypto/dh.c681
-rw-r--r--crypto/dh_helper.c42
-rw-r--r--crypto/hmac.c4
-rw-r--r--crypto/kpp.c29
-rw-r--r--crypto/lrw.c1
-rw-r--r--crypto/memneq.c22
-rw-r--r--crypto/rsa-pkcs1pad.c38
-rw-r--r--crypto/sm2.c40
-rw-r--r--crypto/sm3_generic.c142
-rw-r--r--crypto/tcrypt.c226
-rw-r--r--crypto/testmgr.c67
-rw-r--r--crypto/testmgr.h1456
-rw-r--r--crypto/xts.c1
-rw-r--r--drivers/accessibility/speakup/speakup_dectlk.c1
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/acpi/Makefile3
-rw-r--r--drivers/acpi/acpi_lpss.c33
-rw-r--r--drivers/acpi/acpi_platform.c2
-rw-r--r--drivers/acpi/acpica/nswalk.c3
-rw-r--r--drivers/acpi/apei/bert.c10
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/apei/ghes.c19
-rw-r--r--drivers/acpi/apei/hest.c2
-rw-r--r--drivers/acpi/arm64/Kconfig10
-rw-r--r--drivers/acpi/arm64/Makefile1
-rw-r--r--drivers/acpi/arm64/agdi.c116
-rw-r--r--drivers/acpi/arm64/iort.c14
-rw-r--r--drivers/acpi/battery.c12
-rw-r--r--drivers/acpi/bus.c46
-rw-r--r--drivers/acpi/cppc_acpi.c9
-rw-r--r--drivers/acpi/ec.c100
-rw-r--r--drivers/acpi/fan.h44
-rw-r--r--drivers/acpi/fan_attr.c137
-rw-r--r--drivers/acpi/fan_core.c (renamed from drivers/acpi/fan.c)204
-rw-r--r--drivers/acpi/internal.h2
-rw-r--r--drivers/acpi/osl.c19
-rw-r--r--drivers/acpi/pci_link.c2
-rw-r--r--drivers/acpi/pci_root.c3
-rw-r--r--drivers/acpi/processor_idle.c20
-rw-r--r--drivers/acpi/property.c29
-rw-r--r--drivers/acpi/scan.c15
-rw-r--r--drivers/acpi/sleep.c28
-rw-r--r--drivers/acpi/tables.c4
-rw-r--r--drivers/acpi/video_detect.c75
-rw-r--r--drivers/acpi/x86/s2idle.c12
-rw-r--r--drivers/acpi/x86/utils.c21
-rw-r--r--drivers/amba/bus.c73
-rw-r--r--drivers/ata/libata-core.c25
-rw-r--r--drivers/ata/pata_hpt37x.c18
-rw-r--r--drivers/ata/pata_platform.c2
-rw-r--r--drivers/ata/sata_fsl.c16
-rw-r--r--drivers/atm/eni.c2
-rw-r--r--drivers/atm/firestream.c2
-rw-r--r--drivers/auxdisplay/lcd2s.c24
-rw-r--r--drivers/base/arch_topology.c45
-rw-r--r--drivers/base/class.c2
-rw-r--r--drivers/base/core.c2
-rw-r--r--drivers/base/cpu.c2
-rw-r--r--drivers/base/dd.c5
-rw-r--r--drivers/base/devtmpfs.c2
-rw-r--r--drivers/base/init.c1
-rw-r--r--drivers/base/memory.c147
-rw-r--r--drivers/base/node.c48
-rw-r--r--drivers/base/power/domain.c42
-rw-r--r--drivers/base/power/main.c16
-rw-r--r--drivers/base/power/runtime.c5
-rw-r--r--drivers/base/power/wakeirq.c2
-rw-r--r--drivers/base/power/wakeup.c45
-rw-r--r--drivers/base/regmap/internal.h2
-rw-r--r--drivers/base/regmap/regmap-irq.c26
-rw-r--r--drivers/base/regmap/regmap.c11
-rw-r--r--drivers/base/topology.c20
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c5
-rw-r--r--drivers/block/drbd/drbd_actlog.c5
-rw-r--r--drivers/block/drbd/drbd_bitmap.c7
-rw-r--r--drivers/block/drbd/drbd_int.h4
-rw-r--r--drivers/block/drbd/drbd_receiver.c36
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_worker.c10
-rw-r--r--drivers/block/floppy.c10
-rw-r--r--drivers/block/loop.c115
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c7
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h1
-rw-r--r--drivers/block/null_blk/main.c54
-rw-r--r--drivers/block/pktcdvd.c21
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c28
-rw-r--r--drivers/block/rnbd/rnbd-clt.h1
-rw-r--r--drivers/block/rnbd/rnbd-proto.h4
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.c61
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.h18
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c1
-rw-r--r--drivers/block/rnbd/rnbd-srv.c46
-rw-r--r--drivers/block/rnbd/rnbd-srv.h1
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/virtio_blk.c94
-rw-r--r--drivers/block/xen-blkback/blkback.c25
-rw-r--r--drivers/block/xen-blkback/xenbus.c1
-rw-r--r--drivers/block/xen-blkfront.c70
-rw-r--r--drivers/block/zram/zram_drv.c26
-rw-r--r--drivers/bus/mhi/pci_generic.c2
-rw-r--r--drivers/bus/moxtet.c4
-rw-r--r--drivers/cdrom/gdrom.c1
-rw-r--r--drivers/char/hw_random/Kconfig2
-rw-r--r--drivers/char/hw_random/atmel-rng.c148
-rw-r--r--drivers/char/hw_random/cavium-rng-vf.c2
-rw-r--r--drivers/char/hw_random/core.c162
-rw-r--r--drivers/char/hw_random/nomadik-rng.c4
-rw-r--r--drivers/char/random.c2858
-rw-r--r--drivers/char/tpm/st33zp24/i2c.c5
-rw-r--r--drivers/char/tpm/st33zp24/spi.c9
-rw-r--r--drivers/char/tpm/st33zp24/st33zp24.c3
-rw-r--r--drivers/char/tpm/st33zp24/st33zp24.h2
-rw-r--r--drivers/char/tpm/tpm-chip.c46
-rw-r--r--drivers/char/tpm/tpm-dev-common.c8
-rw-r--r--drivers/char/tpm/tpm.h2
-rw-r--r--drivers/char/tpm/tpm2-space.c73
-rw-r--r--drivers/char/tpm/tpm_tis_spi_main.c3
-rw-r--r--drivers/char/tpm/tpm_vtpm_proxy.c2
-rw-r--r--drivers/char/tpm/xen-tpmfront.c8
-rw-r--r--drivers/char/virtio_console.c7
-rw-r--r--drivers/clk/Kconfig2
-rw-r--r--drivers/clk/clk-lmk04832.c4
-rw-r--r--drivers/clk/ingenic/jz4725b-cgu.c3
-rw-r--r--drivers/clk/qcom/dispcc-sc7180.c5
-rw-r--r--drivers/clk/qcom/dispcc-sc7280.c5
-rw-r--r--drivers/clk/qcom/dispcc-sm8250.c5
-rw-r--r--drivers/clk/qcom/gcc-msm8994.c106
-rw-r--r--drivers/clk/qcom/gdsc.c26
-rw-r--r--drivers/clk/qcom/gdsc.h8
-rw-r--r--drivers/clocksource/Kconfig1
-rw-r--r--drivers/clocksource/acpi_pm.c6
-rw-r--r--drivers/clocksource/arm_arch_timer.c13
-rw-r--r--drivers/clocksource/exynos_mct.c39
-rw-r--r--drivers/clocksource/timer-imx-sysctr.c2
-rw-r--r--drivers/clocksource/timer-imx-tpm.c14
-rw-r--r--drivers/clocksource/timer-microchip-pit64b.c8
-rw-r--r--drivers/clocksource/timer-of.c6
-rw-r--r--drivers/clocksource/timer-ti-dm-systimer.c7
-rw-r--r--drivers/connector/cn_proc.c2
-rw-r--r--drivers/counter/counter-core.c15
-rw-r--r--drivers/counter/counter-sysfs.c17
-rw-r--r--drivers/cpufreq/amd-pstate-trace.h22
-rw-r--r--drivers/cpufreq/amd-pstate.c59
-rw-r--r--drivers/cpufreq/cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c10
-rw-r--r--drivers/cpufreq/cpufreq_governor.c6
-rw-r--r--drivers/cpufreq/cpufreq_governor.h12
-rw-r--r--drivers/cpufreq/cpufreq_governor_attr_set.c5
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c10
-rw-r--r--drivers/cpufreq/intel_pstate.c38
-rw-r--r--drivers/cpufreq/longhaul.c4
-rw-r--r--drivers/cpufreq/powernow-k8.c6
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c11
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c4
-rw-r--r--drivers/crypto/Kconfig10
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c3
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c3
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c3
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c3
-rw-r--r--drivers/crypto/amlogic/amlogic-gxl-cipher.c2
-rw-r--r--drivers/crypto/atmel-aes.c1
-rw-r--r--drivers/crypto/atmel-sha.c1
-rw-r--r--drivers/crypto/atmel-tdes.c1
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.c8
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_req.h2
-rw-r--r--drivers/crypto/cavium/zip/zip_main.c83
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c5
-rw-r--r--drivers/crypto/ccp/ccp-dmaengine.c16
-rw-r--r--drivers/crypto/ccp/sev-dev.c2
-rw-r--r--drivers/crypto/ccree/cc_buffer_mgr.c7
-rw-r--r--drivers/crypto/ccree/cc_cipher.c2
-rw-r--r--drivers/crypto/gemini/sl3516-ce-cipher.c6
-rw-r--r--drivers/crypto/hisilicon/qm.c4
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c43
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.h6
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c59
-rw-r--r--drivers/crypto/marvell/Kconfig1
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_algs.c5
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cptvf_main.c1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_common.h1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c14
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptlf.h19
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf.h1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c25
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c27
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c59
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c17
-rw-r--r--drivers/crypto/mxs-dcp.c2
-rw-r--r--drivers/crypto/nx/nx-common-pseries.c4
-rw-r--r--drivers/crypto/omap-aes.c2
-rw-r--r--drivers/crypto/omap-sham.c2
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c23
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h24
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_drv.c7
-rw-r--r--drivers/crypto/qat/qat_common/Makefile1
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_admin.c37
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h4
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c6
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen4_hw_data.h14
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen4_pfvf.c42
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen4_pm.c137
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen4_pm.h44
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c6
-rw-r--r--drivers/crypto/qat/qat_common/adf_isr.c42
-rw-r--r--drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c4
-rw-r--r--drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h1
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c7
-rw-r--r--drivers/crypto/qat/qat_common/qat_uclo.c9
-rw-r--r--drivers/crypto/qcom-rng.c17
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto_skcipher.c1
-rw-r--r--drivers/crypto/ux500/cryp/cryp_core.c2
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c2
-rw-r--r--drivers/crypto/vmx/Kconfig4
-rw-r--r--drivers/crypto/xilinx/Makefile1
-rw-r--r--drivers/crypto/xilinx/zynqmp-sha.c264
-rw-r--r--drivers/dax/device.c3
-rw-r--r--drivers/dax/super.c2
-rw-r--r--drivers/dma-buf/dma-heap.c2
-rw-r--r--drivers/dma/at_xdmac.c4
-rw-r--r--drivers/dma/pl330.c4
-rw-r--r--drivers/dma/ptdma/ptdma-dev.c17
-rw-r--r--drivers/dma/sh/rcar-dmac.c9
-rw-r--r--drivers/dma/sh/shdma-base.c4
-rw-r--r--drivers/dma/stm32-dmamux.c4
-rw-r--r--drivers/edac/altera_edac.c42
-rw-r--r--drivers/edac/amd64_edac.c109
-rw-r--r--drivers/edac/amd64_edac.h24
-rw-r--r--drivers/edac/edac_device_sysfs.c31
-rw-r--r--drivers/edac/edac_mc.c6
-rw-r--r--drivers/edac/edac_pci_sysfs.c26
-rw-r--r--drivers/edac/xgene_edac.c2
-rw-r--r--drivers/firmware/Kconfig1
-rw-r--r--drivers/firmware/arm_scmi/driver.c2
-rw-r--r--drivers/firmware/arm_sdei.c13
-rw-r--r--drivers/firmware/efi/apple-properties.c2
-rw-r--r--drivers/firmware/efi/efi-pstore.c2
-rw-r--r--drivers/firmware/efi/efi.c9
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c6
-rw-r--r--drivers/firmware/efi/libstub/riscv-stub.c17
-rw-r--r--drivers/firmware/efi/mokvar-table.c2
-rw-r--r--drivers/firmware/efi/vars.c5
-rw-r--r--drivers/firmware/xilinx/zynqmp.c26
-rw-r--r--drivers/gpio/gpio-74x164.c4
-rw-r--r--drivers/gpio/gpio-aggregator.c18
-rw-r--r--drivers/gpio/gpio-max3191x.c4
-rw-r--r--drivers/gpio/gpio-max7301.c4
-rw-r--r--drivers/gpio/gpio-mc33880.c4
-rw-r--r--drivers/gpio/gpio-mt7621.c1
-rw-r--r--drivers/gpio/gpio-omap.c7
-rw-r--r--drivers/gpio/gpio-pisosr.c4
-rw-r--r--drivers/gpio/gpio-rcar.c2
-rw-r--r--drivers/gpio/gpio-rockchip.c56
-rw-r--r--drivers/gpio/gpio-sifive.c2
-rw-r--r--drivers/gpio/gpio-sim.c23
-rw-r--r--drivers/gpio/gpio-tegra186.c16
-rw-r--r--drivers/gpio/gpio-tqmx86.c3
-rw-r--r--drivers/gpio/gpio-ts4900.c24
-rw-r--r--drivers/gpio/gpiolib-acpi.c6
-rw-r--r--drivers/gpio/gpiolib-cdev.c6
-rw-r--r--drivers/gpio/gpiolib-sysfs.c7
-rw-r--r--drivers/gpio/gpiolib.c20
-rw-r--r--drivers/gpio/gpiolib.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c58
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c61
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h11
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c64
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c9
-rw-r--r--drivers/gpu/drm/arm/Kconfig1
-rw-r--r--drivers/gpu/drm/ast/ast_tables.h2
-rw-r--r--drivers/gpu/drm/bridge/Kconfig2
-rw-r--r--drivers/gpu/drm/bridge/nwl-dsi.c12
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c5
-rw-r--r--drivers/gpu/drm/drm_atomic.c12
-rw-r--r--drivers/gpu/drm/drm_atomic_uapi.c14
-rw-r--r--drivers/gpu/drm/drm_cache.c2
-rw-r--r--drivers/gpu/drm/drm_connector.c3
-rw-r--r--drivers/gpu/drm/drm_edid.c2
-rw-r--r--drivers/gpu/drm/drm_gem_cma_helper.c1
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c12
-rw-r--r--drivers/gpu/drm/drm_privacy_screen.c2
-rw-r--r--drivers/gpu/drm/drm_privacy_screen_x86.c3
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c12
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c6
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c13
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c13
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c10
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c14
-rw-r--r--drivers/gpu/drm/i915/Kconfig1
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.c18
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.h8
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_opregion.c15
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c16
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_phy.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.c29
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c9
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c108
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c114
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c2
-rw-r--r--drivers/gpu/drm/i915/i915_mm.h1
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h15
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c3
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c2
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c169
-rw-r--r--drivers/gpu/drm/i915/intel_pm.h1
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c4
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c26
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h2
-rw-r--r--drivers/gpu/drm/imx/dcss/Kconfig1
-rw-r--r--drivers/gpu/drm/imx/parallel-display.c8
-rw-r--r--drivers/gpu/drm/kmb/kmb_plane.c6
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c167
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_pll.c6
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c18
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c11
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi.c7
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c4
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c7
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c5
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c3
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_devfreq.c21
-rw-r--r--drivers/gpu/drm/mxsfb/mxsfb_kms.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c2
-rw-r--r--drivers/gpu/drm/panel/Kconfig2
-rw-r--r--drivers/gpu/drm/panel/panel-abt-y030xx067a.c4
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9322.c4
-rw-r--r--drivers/gpu/drm/panel/panel-ilitek-ili9341.c3
-rw-r--r--drivers/gpu/drm/panel/panel-innolux-ej030na.c4
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lb035q02.c4
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lg4573.c4
-rw-r--r--drivers/gpu/drm/panel/panel-nec-nl8048hl11.c4
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt39016.c4
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-db7430.c3
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-ld9040.c4
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d27a1.c3
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c3
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c3
-rw-r--r--drivers/gpu/drm/panel/panel-sitronix-st7789v.c4
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx565akm.c4
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td028ttec1.c4
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td043mtea1.c4
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-tpg110.c3
-rw-r--r--drivers/gpu/drm/panel/panel-widechips-ws2401.c3
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_uvd.c8
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c14
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop_reg.c8
-rw-r--r--drivers/gpu/drm/sun4i/sun8i_mixer.h8
-rw-r--r--drivers/gpu/drm/tegra/Kconfig1
-rw-r--r--drivers/gpu/drm/tegra/dpaux.c7
-rw-r--r--drivers/gpu/drm/tegra/falcon.c2
-rw-r--r--drivers/gpu/drm/tiny/hx8357d.c4
-rw-r--r--drivers/gpu/drm/tiny/ili9163.c4
-rw-r--r--drivers/gpu/drm/tiny/ili9225.c4
-rw-r--r--drivers/gpu/drm/tiny/ili9341.c4
-rw-r--r--drivers/gpu/drm/tiny/ili9486.c4
-rw-r--r--drivers/gpu/drm/tiny/mi0283qt.c4
-rw-r--r--drivers/gpu/drm/tiny/repaper.c4
-rw-r--r--drivers/gpu/drm/tiny/st7586.c4
-rw-r--r--drivers/gpu/drm/tiny/st7735r.c4
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c9
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c14
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c37
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c33
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c2
-rw-r--r--drivers/gpu/host1x/syncpt.c35
-rw-r--r--drivers/hid/amd-sfh-hid/amd_sfh_pcie.c76
-rw-r--r--drivers/hid/amd-sfh-hid/amd_sfh_pcie.h4
-rw-r--r--drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c4
-rw-r--r--drivers/hid/hid-apple.c16
-rw-r--r--drivers/hid/hid-debug.c5
-rw-r--r--drivers/hid/hid-elo.c6
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/hid-input.c3
-rw-r--r--drivers/hid/hid-logitech-dj.c1
-rw-r--r--drivers/hid/hid-nintendo.c4
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-thrustmaster.c8
-rw-r--r--drivers/hid/hid-vivaldi.c2
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-of-goodix.c28
-rw-r--r--drivers/hv/hv_balloon.c7
-rw-r--r--drivers/hv/hv_utils_transport.c2
-rw-r--r--drivers/hv/vmbus_drv.c9
-rw-r--r--drivers/hwmon/Kconfig49
-rw-r--r--drivers/hwmon/Makefile2
-rw-r--r--drivers/hwmon/adcxx.c4
-rw-r--r--drivers/hwmon/adt7310.c95
-rw-r--r--drivers/hwmon/adt7410.c82
-rw-r--r--drivers/hwmon/adt7470.c3
-rw-r--r--drivers/hwmon/adt7x10.c479
-rw-r--r--drivers/hwmon/adt7x10.h10
-rw-r--r--drivers/hwmon/aquacomputer_d5next.c379
-rw-r--r--drivers/hwmon/asus-ec-sensors.c716
-rw-r--r--drivers/hwmon/asus_wmi_ec_sensors.c3
-rw-r--r--drivers/hwmon/asus_wmi_sensors.c1
-rw-r--r--drivers/hwmon/axi-fan-control.c3
-rw-r--r--drivers/hwmon/dell-smm-hwmon.c84
-rw-r--r--drivers/hwmon/hwmon.c54
-rw-r--r--drivers/hwmon/lm70.c16
-rw-r--r--drivers/hwmon/lm83.c476
-rw-r--r--drivers/hwmon/lm90.c21
-rw-r--r--drivers/hwmon/max1111.c3
-rw-r--r--drivers/hwmon/max31722.c4
-rw-r--r--drivers/hwmon/max6639.c62
-rw-r--r--drivers/hwmon/mlxreg-fan.c84
-rw-r--r--drivers/hwmon/nct6775.c152
-rw-r--r--drivers/hwmon/ntc_thermistor.c2
-rw-r--r--drivers/hwmon/occ/common.c19
-rw-r--r--drivers/hwmon/occ/common.h2
-rw-r--r--drivers/hwmon/occ/sysfs.c46
-rw-r--r--drivers/hwmon/pmbus/Kconfig33
-rw-r--r--drivers/hwmon/pmbus/Makefile1
-rw-r--r--drivers/hwmon/pmbus/adm1275.c40
-rw-r--r--drivers/hwmon/pmbus/ir38064.c2
-rw-r--r--drivers/hwmon/pmbus/lm25066.c14
-rw-r--r--drivers/hwmon/pmbus/pli1209bc.c146
-rw-r--r--drivers/hwmon/pmbus/pmbus.h2
-rw-r--r--drivers/hwmon/pmbus/pmbus_core.c137
-rw-r--r--drivers/hwmon/pmbus/xdpe12284.c32
-rw-r--r--drivers/hwmon/powr1220.c235
-rw-r--r--drivers/hwmon/sch5627.c71
-rw-r--r--drivers/hwmon/sch5636.c10
-rw-r--r--drivers/hwmon/sch56xx-common.c44
-rw-r--r--drivers/hwmon/scpi-hwmon.c6
-rw-r--r--drivers/hwmon/tc654.c104
-rw-r--r--drivers/hwmon/tmp464.c712
-rw-r--r--drivers/hwmon/vexpress-hwmon.c6
-rw-r--r--drivers/hwtracing/intel_th/msu.c4
-rw-r--r--drivers/i2c/busses/Kconfig6
-rw-r--r--drivers/i2c/busses/i2c-bcm2835.c11
-rw-r--r--drivers/i2c/busses/i2c-brcmstb.c2
-rw-r--r--drivers/i2c/busses/i2c-qcom-cci.c16
-rw-r--r--drivers/idle/intel_idle.c111
-rw-r--r--drivers/iio/accel/bma400_spi.c4
-rw-r--r--drivers/iio/accel/bmc150-accel-core.c5
-rw-r--r--drivers/iio/accel/bmc150-accel-spi.c4
-rw-r--r--drivers/iio/accel/bmi088-accel-spi.c4
-rw-r--r--drivers/iio/accel/fxls8962af-core.c12
-rw-r--r--drivers/iio/accel/fxls8962af-i2c.c2
-rw-r--r--drivers/iio/accel/fxls8962af-spi.c2
-rw-r--r--drivers/iio/accel/fxls8962af.h3
-rw-r--r--drivers/iio/accel/kxcjk-1013.c5
-rw-r--r--drivers/iio/accel/kxsd9-spi.c4
-rw-r--r--drivers/iio/accel/mma7455_spi.c4
-rw-r--r--drivers/iio/accel/mma9551.c5
-rw-r--r--drivers/iio/accel/mma9553.c5
-rw-r--r--drivers/iio/accel/sca3000.c4
-rw-r--r--drivers/iio/adc/ad7124.c2
-rw-r--r--drivers/iio/adc/ad7266.c4
-rw-r--r--drivers/iio/adc/ltc2496.c4
-rw-r--r--drivers/iio/adc/mcp320x.c4
-rw-r--r--drivers/iio/adc/mcp3911.c4
-rw-r--r--drivers/iio/adc/men_z188_adc.c9
-rw-r--r--drivers/iio/adc/ti-adc12138.c4
-rw-r--r--drivers/iio/adc/ti-ads7950.c4
-rw-r--r--drivers/iio/adc/ti-ads8688.c4
-rw-r--r--drivers/iio/adc/ti-tlc4541.c4
-rw-r--r--drivers/iio/adc/ti-tsc2046.c4
-rw-r--r--drivers/iio/addac/ad74413r.c17
-rw-r--r--drivers/iio/amplifiers/ad8366.c4
-rw-r--r--drivers/iio/common/ssp_sensors/ssp_dev.c4
-rw-r--r--drivers/iio/dac/ad5360.c4
-rw-r--r--drivers/iio/dac/ad5380.c4
-rw-r--r--drivers/iio/dac/ad5446.c4
-rw-r--r--drivers/iio/dac/ad5449.c4
-rw-r--r--drivers/iio/dac/ad5504.c4
-rw-r--r--drivers/iio/dac/ad5592r.c4
-rw-r--r--drivers/iio/dac/ad5624r_spi.c4
-rw-r--r--drivers/iio/dac/ad5686-spi.c4
-rw-r--r--drivers/iio/dac/ad5761.c4
-rw-r--r--drivers/iio/dac/ad5764.c4
-rw-r--r--drivers/iio/dac/ad5791.c4
-rw-r--r--drivers/iio/dac/ad8801.c4
-rw-r--r--drivers/iio/dac/ltc1660.c4
-rw-r--r--drivers/iio/dac/ltc2632.c4
-rw-r--r--drivers/iio/dac/mcp4922.c4
-rw-r--r--drivers/iio/dac/ti-dac082s085.c4
-rw-r--r--drivers/iio/dac/ti-dac7311.c3
-rw-r--r--drivers/iio/frequency/adf4350.c4
-rw-r--r--drivers/iio/frequency/admv1013.c2
-rw-r--r--drivers/iio/gyro/bmg160_core.c5
-rw-r--r--drivers/iio/gyro/bmg160_spi.c4
-rw-r--r--drivers/iio/gyro/fxas21002c_spi.c4
-rw-r--r--drivers/iio/health/afe4403.c4
-rw-r--r--drivers/iio/imu/adis16480.c7
-rw-r--r--drivers/iio/imu/kmx61.c5
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c6
-rw-r--r--drivers/iio/industrialio-buffer.c14
-rw-r--r--drivers/iio/magnetometer/bmc150_magn.c5
-rw-r--r--drivers/iio/magnetometer/bmc150_magn_spi.c4
-rw-r--r--drivers/iio/magnetometer/hmc5843_spi.c4
-rw-r--r--drivers/iio/potentiometer/max5487.c4
-rw-r--r--drivers/iio/pressure/ms5611_spi.c4
-rw-r--r--drivers/iio/pressure/zpa2326_spi.c4
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/infiniband/core/cma.c62
-rw-r--r--drivers/infiniband/core/rw.c1
-rw-r--r--drivers/infiniband/core/ucma.c34
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h2
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c27
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c38
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c2
-rw-r--r--drivers/infiniband/sw/siw/siw.h7
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c20
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c3
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c39
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c6
-rw-r--r--drivers/input/input.c6
-rw-r--r--drivers/input/keyboard/Kconfig2
-rw-r--r--drivers/input/keyboard/applespi.c4
-rw-r--r--drivers/input/misc/adxl34x-spi.c4
-rw-r--r--drivers/input/mouse/elan_i2c_core.c64
-rw-r--r--drivers/input/mouse/psmouse-smbus.c10
-rw-r--r--drivers/input/tablet/aiptek.c10
-rw-r--r--drivers/input/touchscreen/ads7846.c4
-rw-r--r--drivers/input/touchscreen/cyttsp4_spi.c4
-rw-r--r--drivers/input/touchscreen/goodix.c34
-rw-r--r--drivers/input/touchscreen/tsc2005.c4
-rw-r--r--drivers/input/touchscreen/wm97xx-core.c12
-rw-r--r--drivers/input/touchscreen/zinitix.c56
-rw-r--r--drivers/iommu/Kconfig6
-rw-r--r--drivers/iommu/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd/init.c12
-rw-r--r--drivers/iommu/amd/io_pgtable.c12
-rw-r--r--drivers/iommu/amd/iommu.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c5
-rw-r--r--drivers/iommu/intel/Kconfig2
-rw-r--r--drivers/iommu/intel/iommu.c6
-rw-r--r--drivers/iommu/intel/irq_remapping.c13
-rw-r--r--drivers/iommu/intel/svm.c9
-rw-r--r--drivers/iommu/ioasid.c38
-rw-r--r--drivers/iommu/iommu-sva-lib.c39
-rw-r--r--drivers/iommu/iommu-sva-lib.h7
-rw-r--r--drivers/iommu/iommu.c33
-rw-r--r--drivers/iommu/omap-iommu.c2
-rw-r--r--drivers/iommu/tegra-smmu.c4
-rw-r--r--drivers/irqchip/Kconfig8
-rw-r--r--drivers/irqchip/Makefile1
-rw-r--r--drivers/irqchip/irq-apple-aic.c552
-rw-r--r--drivers/irqchip/irq-ftintc010.c1
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c126
-rw-r--r--drivers/irqchip/irq-gic-v3.c2
-rw-r--r--drivers/irqchip/irq-gic.c104
-rw-r--r--drivers/irqchip/irq-imx-intmux.c8
-rw-r--r--drivers/irqchip/irq-loongson-pch-msi.c2
-rw-r--r--drivers/irqchip/irq-lpc32xx.c34
-rw-r--r--drivers/irqchip/irq-meson-gpio.c106
-rw-r--r--drivers/irqchip/irq-mvebu-pic.c28
-rw-r--r--drivers/irqchip/irq-nvic.c2
-rw-r--r--drivers/irqchip/irq-qcom-mpm.c461
-rw-r--r--drivers/irqchip/irq-realtek-rtl.c18
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c3
-rw-r--r--drivers/irqchip/irq-renesas-irqc.c3
-rw-r--r--drivers/irqchip/irq-sifive-plic.c39
-rw-r--r--drivers/irqchip/irq-stm32-exti.c50
-rw-r--r--drivers/irqchip/irq-ts4800.c25
-rw-r--r--drivers/irqchip/irq-versatile-fpga.c46
-rw-r--r--drivers/irqchip/irq-xilinx-intc.c30
-rw-r--r--drivers/irqchip/qcom-pdc.c137
-rw-r--r--drivers/isdn/hardware/mISDN/hfcpci.c6
-rw-r--r--drivers/isdn/mISDN/dsp_pipeline.c6
-rw-r--r--drivers/leds/leds-cr0014114.c4
-rw-r--r--drivers/leds/leds-dac124s085.c4
-rw-r--r--drivers/leds/leds-el15203000.c4
-rw-r--r--drivers/leds/leds-spi-byte.c4
-rw-r--r--drivers/md/Kconfig1
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/io.c3
-rw-r--r--drivers/md/bcache/journal.c16
-rw-r--r--drivers/md/bcache/movinggc.c4
-rw-r--r--drivers/md/bcache/request.c26
-rw-r--r--drivers/md/bcache/super.c9
-rw-r--r--drivers/md/bcache/writeback.c21
-rw-r--r--drivers/md/dm-cache-target.c26
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c46
-rw-r--r--drivers/md/dm-integrity.c5
-rw-r--r--drivers/md/dm-io.c5
-rw-r--r--drivers/md/dm-log-writes.c39
-rw-r--r--drivers/md/dm-rq.c26
-rw-r--r--drivers/md/dm-snap.c21
-rw-r--r--drivers/md/dm-thin.c41
-rw-r--r--drivers/md/dm-writecache.c7
-rw-r--r--drivers/md/dm-zoned-metadata.c26
-rw-r--r--drivers/md/dm-zoned-target.c3
-rw-r--r--drivers/md/dm.c194
-rw-r--r--drivers/md/md-faulty.c4
-rw-r--r--drivers/md/md-multipath.c13
-rw-r--r--drivers/md/md.c39
-rw-r--r--drivers/md/raid1-10.c5
-rw-r--r--drivers/md/raid1.c58
-rw-r--r--drivers/md/raid1.h1
-rw-r--r--drivers/md/raid10.c47
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/md/raid5-cache.c42
-rw-r--r--drivers/md/raid5-ppl.c29
-rw-r--r--drivers/md/raid5.c29
-rw-r--r--drivers/media/platform/omap3isp/ispstat.c5
-rw-r--r--drivers/media/spi/cxd2880-spi.c4
-rw-r--r--drivers/media/spi/gs1662.c4
-rw-r--r--drivers/media/tuners/msi001.c3
-rw-r--r--drivers/memstick/core/ms_block.c64
-rw-r--r--drivers/memstick/core/ms_block.h1
-rw-r--r--drivers/memstick/core/mspro_block.c57
-rw-r--r--drivers/mfd/arizona-spi.c4
-rw-r--r--drivers/mfd/da9052-spi.c3
-rw-r--r--drivers/mfd/ezx-pcap.c8
-rw-r--r--drivers/mfd/lpc_ich.c59
-rw-r--r--drivers/mfd/madera-spi.c4
-rw-r--r--drivers/mfd/mc13xxx-spi.c3
-rw-r--r--drivers/mfd/rsmu_spi.c4
-rw-r--r--drivers/mfd/stmpe-spi.c4
-rw-r--r--drivers/mfd/tps65912-spi.c4
-rw-r--r--drivers/misc/ad525x_dpot-spi.c3
-rw-r--r--drivers/misc/eeprom/at25.c4
-rw-r--r--drivers/misc/eeprom/ee1004.c3
-rw-r--r--drivers/misc/eeprom/eeprom_93xx46.c4
-rw-r--r--drivers/misc/fastrpc.c9
-rw-r--r--drivers/misc/hi6421v600-irq.c6
-rw-r--r--drivers/misc/lattice-ecp3-config.c4
-rw-r--r--drivers/misc/lis3lv02d/lis3lv02d_spi.c4
-rw-r--r--drivers/mmc/core/block.c30
-rw-r--r--drivers/mmc/core/mmc.c2
-rw-r--r--drivers/mmc/core/mmc_ops.c13
-rw-r--r--drivers/mmc/core/mmc_ops.h3
-rw-r--r--drivers/mmc/core/sd.c10
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c15
-rw-r--r--drivers/mmc/host/mmc_spi.c3
-rw-r--r--drivers/mmc/host/moxart-mmc.c2
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c8
-rw-r--r--drivers/mmc/host/sh_mmcif.c3
-rw-r--r--drivers/mtd/devices/mchp23k256.c4
-rw-r--r--drivers/mtd/devices/mchp48l640.c4
-rw-r--r--drivers/mtd/devices/mtd_dataflash.c4
-rw-r--r--drivers/mtd/devices/phram.c12
-rw-r--r--drivers/mtd/devices/sst25l.c4
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/mtdswap.c2
-rw-r--r--drivers/mtd/nand/raw/Kconfig2
-rw-r--r--drivers/mtd/nand/raw/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c3
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_ecc.c7
-rw-r--r--drivers/mtd/nand/raw/qcom_nandc.c14
-rw-r--r--drivers/mtd/nand/raw/sharpsl.c1
-rw-r--r--drivers/mtd/parsers/qcomsmempart.c36
-rw-r--r--drivers/mtd/spi-nor/controllers/Kconfig36
-rw-r--r--drivers/mtd/spi-nor/controllers/Makefile3
-rw-r--r--drivers/mtd/spi-nor/controllers/intel-spi.h21
-rw-r--r--drivers/net/arcnet/com20020-pci.c3
-rw-r--r--drivers/net/bonding/bond_3ad.c33
-rw-r--r--drivers/net/bonding/bond_main.c9
-rw-r--r--drivers/net/can/flexcan/flexcan-core.c1
-rw-r--r--drivers/net/can/flexcan/flexcan.h2
-rw-r--r--drivers/net/can/m_can/m_can.c6
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c4
-rw-r--r--drivers/net/can/m_can/tcan4x5x-regmap.c2
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c6
-rw-r--r--drivers/net/can/spi/hi311x.c4
-rw-r--r--drivers/net/can/spi/mcp251x.c4
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c4
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.c9
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.h8
-rw-r--r--drivers/net/can/usb/gs_usb.c10
-rw-r--r--drivers/net/dsa/Kconfig2
-rw-r--r--drivers/net/dsa/b53/b53_spi.c4
-rw-r--r--drivers/net/dsa/bcm_sf2.c7
-rw-r--r--drivers/net/dsa/lan9303-core.c13
-rw-r--r--drivers/net/dsa/lantiq_gswip.c14
-rw-r--r--drivers/net/dsa/microchip/ksz8795_spi.c15
-rw-r--r--drivers/net/dsa/microchip/ksz9477_spi.c16
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c26
-rw-r--r--drivers/net/dsa/mt7530.c4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c22
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c4
-rw-r--r--drivers/net/dsa/ocelot/seville_vsc9953.c5
-rw-r--r--drivers/net/dsa/qca/ar9331.c3
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c6
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-spi.c6
-rw-r--r--drivers/net/ethernet/3com/typhoon.c6
-rw-r--r--drivers/net/ethernet/8390/etherh.c6
-rw-r--r--drivers/net/ethernet/8390/mcf8390.c10
-rw-r--r--drivers/net/ethernet/amd/declance.c4
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c14
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-pci.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.c6
-rw-r--r--drivers/net/ethernet/arc/emac_mdio.c5
-rw-r--r--drivers/net/ethernet/asix/ax88796c_main.c4
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c5
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-platform.c23
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c28
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c18
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c47
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c39
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c17
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c7
-rw-r--r--drivers/net/ethernet/broadcom/sb1250-mac.c4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c27
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/t3_hw.c2
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c243
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c6
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c12
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ethtool.c1
-rw-r--r--drivers/net/ethernet/google/gve/gve.h2
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c6
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c4
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c3
-rw-r--r--drivers/net/ethernet/i825xx/ether1.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c367
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h4
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c28
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c65
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c8
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c63
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c160
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h6
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h7
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c187
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c64
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c78
-rw-r--r--drivers/net/ethernet/intel/ice/ice_protocol_type.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c60
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c13
-rw-r--r--drivers/net/ethernet/lantiq_xrx200.c2
-rw-r--r--drivers/net/ethernet/litex/Kconfig2
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c24
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h70
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.c66
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c14
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c22
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c7
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h5
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c4
-rw-r--r--drivers/net/ethernet/microchip/enc28j60.c4
-rw-r--r--drivers/net/ethernet/microchip/encx24j600.c4
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c11
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c6
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_packet.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c20
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c25
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c16
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c18
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.c7
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c4
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c6
-rw-r--r--drivers/net/ethernet/seeq/ether3.c4
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c2
-rw-r--r--drivers/net/ethernet/smsc/smc911x.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c51
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c76
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c3
-rw-r--r--drivers/net/ethernet/sun/sunhme.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c2
-rw-r--r--drivers/net/ethernet/ti/cpts.c4
-rw-r--r--drivers/net/ethernet/tundra/tsi108_eth.c35
-rw-r--r--drivers/net/ethernet/vertexcom/mse102x.c4
-rw-r--r--drivers/net/ethernet/wiznet/w5100-spi.c4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c4
-rw-r--r--drivers/net/hamradio/6pack.c4
-rw-r--r--drivers/net/hamradio/yam.c4
-rw-r--r--drivers/net/hyperv/netvsc_drv.c3
-rw-r--r--drivers/net/ieee802154/adf7242.c4
-rw-r--r--drivers/net/ieee802154/at86rf230.c17
-rw-r--r--drivers/net/ieee802154/ca8210.c11
-rw-r--r--drivers/net/ieee802154/cc2520.c4
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c1
-rw-r--r--drivers/net/ieee802154/mcr20a.c8
-rw-r--r--drivers/net/ieee802154/mrf24j40.c4
-rw-r--r--drivers/net/ipa/Kconfig2
-rw-r--r--drivers/net/ipa/ipa_power.c52
-rw-r--r--drivers/net/ipa/ipa_power.h7
-rw-r--r--drivers/net/ipa/ipa_uc.c5
-rw-r--r--drivers/net/macsec.c33
-rw-r--r--drivers/net/mctp/mctp-serial.c9
-rw-r--r--drivers/net/mdio/mdio-aspeed.c1
-rw-r--r--drivers/net/mdio/mdio-ipq4019.c6
-rw-r--r--drivers/net/mdio/mdio-mscc-miim.c9
-rw-r--r--drivers/net/netdevsim/fib.c4
-rw-r--r--drivers/net/phy/at803x.c26
-rw-r--r--drivers/net/phy/broadcom.c1
-rw-r--r--drivers/net/phy/dp83822.c2
-rw-r--r--drivers/net/phy/marvell.c25
-rw-r--r--drivers/net/phy/mediatek-ge.c3
-rw-r--r--drivers/net/phy/meson-gxl.c31
-rw-r--r--drivers/net/phy/mscc/mscc_main.c3
-rw-r--r--drivers/net/phy/phy_device.c6
-rw-r--r--drivers/net/phy/sfp-bus.c5
-rw-r--r--drivers/net/phy/spi_ks8995.c4
-rw-r--r--drivers/net/usb/ax88179_178a.c68
-rw-r--r--drivers/net/usb/cdc_ether.c12
-rw-r--r--drivers/net/usb/cdc_mbim.c5
-rw-r--r--drivers/net/usb/cdc_ncm.c8
-rw-r--r--drivers/net/usb/ipheth.c6
-rw-r--r--drivers/net/usb/lan78xx.c7
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/smsc95xx.c28
-rw-r--r--drivers/net/usb/sr9700.c2
-rw-r--r--drivers/net/usb/zaurus.c12
-rw-r--r--drivers/net/veth.c13
-rw-r--r--drivers/net/wan/slic_ds26522.c3
-rw-r--r--drivers/net/wireguard/device.c38
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.c33
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c6
-rw-r--r--drivers/net/wireless/intel/Makefile1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Kconfig13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/commands.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/filter.h88
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/rs.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/rs.c33
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-csr.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mei/main.c45
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mei/net.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c214
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c241
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c3
-rw-r--r--drivers/net/wireless/intersil/p54/p54spi.c4
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c13
-rw-r--r--drivers/net/wireless/marvell/libertas/if_spi.c4
-rw-r--r--drivers/net/wireless/microchip/wilc1000/spi.c4
-rw-r--r--drivers/net/wireless/st/cw1200/cw1200_spi.c4
-rw-r--r--drivers/net/wireless/ti/wl1251/spi.c4
-rw-r--r--drivers/net/wireless/ti/wlcore/spi.c4
-rw-r--r--drivers/net/xen-netback/xenbus.c14
-rw-r--r--drivers/net/xen-netfront.c93
-rw-r--r--drivers/nfc/nfcmrvl/spi.c3
-rw-r--r--drivers/nfc/port100.c2
-rw-r--r--drivers/nfc/st-nci/spi.c4
-rw-r--r--drivers/nfc/st95hf/core.c4
-rw-r--r--drivers/nfc/trf7970a.c4
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen4.c17
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen4.h16
-rw-r--r--drivers/ntb/msi.c6
-rw-r--r--drivers/nvdimm/blk.c8
-rw-r--r--drivers/nvdimm/btt.c11
-rw-r--r--drivers/nvdimm/btt_devs.c1
-rw-r--r--drivers/nvdimm/bus.c1
-rw-r--r--drivers/nvdimm/nd_virtio.c6
-rw-r--r--drivers/nvdimm/pfn_devs.c1
-rw-r--r--drivers/nvdimm/pmem.h1
-rw-r--r--drivers/nvme/host/Kconfig8
-rw-r--r--drivers/nvme/host/Makefile2
-rw-r--r--drivers/nvme/host/constants.c185
-rw-r--r--drivers/nvme/host/core.c299
-rw-r--r--drivers/nvme/host/fabrics.c12
-rw-r--r--drivers/nvme/host/fabrics.h1
-rw-r--r--drivers/nvme/host/fc.c22
-rw-r--r--drivers/nvme/host/ioctl.c38
-rw-r--r--drivers/nvme/host/multipath.c34
-rw-r--r--drivers/nvme/host/nvme.h47
-rw-r--r--drivers/nvme/host/pci.c21
-rw-r--r--drivers/nvme/host/rdma.c118
-rw-r--r--drivers/nvme/host/tcp.c125
-rw-r--r--drivers/nvme/target/admin-cmd.c6
-rw-r--r--drivers/nvme/target/configfs.c66
-rw-r--r--drivers/nvme/target/core.c12
-rw-r--r--drivers/nvme/target/fc.c16
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c27
-rw-r--r--drivers/nvme/target/io-cmd-file.c17
-rw-r--r--drivers/nvme/target/loop.c6
-rw-r--r--drivers/nvme/target/nvmet.h4
-rw-r--r--drivers/nvme/target/passthru.c10
-rw-r--r--drivers/nvme/target/rdma.c8
-rw-r--r--drivers/nvme/target/tcp.c6
-rw-r--r--drivers/nvme/target/zns.c20
-rw-r--r--drivers/nvmem/core.c2
-rw-r--r--drivers/of/fdt.c2
-rw-r--r--drivers/of/of_reserved_mem.c9
-rw-r--r--drivers/of/unittest.c16
-rw-r--r--drivers/parisc/ccio-dma.c3
-rw-r--r--drivers/parisc/sba_iommu.c3
-rw-r--r--drivers/pci/controller/cadence/pci-j721e.c85
-rw-r--r--drivers/pci/controller/dwc/pcie-kirin.c31
-rw-r--r--drivers/pci/controller/pci-hyperv.c13
-rw-r--r--drivers/pci/controller/pci-mvebu.c3
-rw-r--r--drivers/pci/controller/pcie-apple.c2
-rw-r--r--drivers/pci/controller/pcie-mt7621.c11
-rw-r--r--drivers/pci/controller/vmd.c14
-rw-r--r--drivers/pci/msi/irqdomain.c4
-rw-r--r--drivers/pci/msi/legacy.c1
-rw-r--r--drivers/pci/msi/msi.c3
-rw-r--r--drivers/pci/pci-driver.c35
-rw-r--r--drivers/pci/pcie/portdrv_core.c47
-rw-r--r--drivers/pci/quirks.c14
-rw-r--r--drivers/perf/Kconfig16
-rw-r--r--drivers/perf/Makefile2
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c584
-rw-r--r--drivers/perf/arm-cci.c2
-rw-r--r--drivers/perf/arm-ccn.c10
-rw-r--r--drivers/perf/arm-cmn.c23
-rw-r--r--drivers/perf/arm_pmu.c6
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c2
-rw-r--r--drivers/perf/marvell_cn10k_ddr_pmu.c758
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c2
-rw-r--r--drivers/perf/thunderx2_pmu.c6
-rw-r--r--drivers/perf/xgene_pmu.c8
-rw-r--r--drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c3
-rw-r--r--drivers/phy/broadcom/Kconfig3
-rw-r--r--drivers/phy/broadcom/phy-brcm-usb.c38
-rw-r--r--drivers/phy/cadence/phy-cadence-sierra.c35
-rw-r--r--drivers/phy/mediatek/phy-mtk-tphy.c2
-rw-r--r--drivers/phy/phy-core-mipi-dphy.c4
-rw-r--r--drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c3
-rw-r--r--drivers/phy/st/phy-stm32-usbphyc.c2
-rw-r--r--drivers/phy/ti/phy-j721e-wiz.c1
-rw-r--r--drivers/phy/xilinx/phy-zynqmp.c11
-rw-r--r--drivers/pinctrl/Makefile2
-rw-r--r--drivers/pinctrl/bcm/Kconfig1
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm2835.c23
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c5
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c64
-rw-r--r--drivers/pinctrl/intel/pinctrl-tigerlake.c1
-rw-r--r--drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c25
-rw-r--r--drivers/pinctrl/pinctrl-k210.c4
-rw-r--r--drivers/pinctrl/pinctrl-microchip-sgpio.c3
-rw-r--r--drivers/pinctrl/pinctrl-starfive.c6
-rw-r--r--drivers/pinctrl/pinctrl-thunderbay.c90
-rw-r--r--drivers/pinctrl/pinctrl-zynqmp.c10
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c8
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi.c15
-rw-r--r--drivers/platform/chrome/cros_ec.c4
-rw-r--r--drivers/platform/chrome/cros_ec.h2
-rw-r--r--drivers/platform/chrome/cros_ec_i2c.c4
-rw-r--r--drivers/platform/chrome/cros_ec_lpc.c4
-rw-r--r--drivers/platform/chrome/cros_ec_spi.c4
-rw-r--r--drivers/platform/olpc/olpc-xo175-ec.c4
-rw-r--r--drivers/platform/surface/Kconfig1
-rw-r--r--drivers/platform/surface/surface3_power.c13
-rw-r--r--drivers/platform/x86/amd-pmc.c57
-rw-r--r--drivers/platform/x86/asus-tf103c-dock.c4
-rw-r--r--drivers/platform/x86/asus-wmi.c2
-rw-r--r--drivers/platform/x86/intel/crystal_cove_charger.c26
-rw-r--r--drivers/platform/x86/intel/int3472/tps68470_board_data.c3
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c97
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c26
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c24
-rw-r--r--drivers/platform/x86/x86-android-tablets.c105
-rw-r--r--drivers/pnp/driver.c2
-rw-r--r--drivers/pnp/pnpacpi/core.c4
-rw-r--r--drivers/power/supply/bq256xx_charger.c3
-rw-r--r--drivers/power/supply/cw2015_battery.c2
-rw-r--r--drivers/powercap/Kconfig8
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/dtpm.c333
-rw-r--r--drivers/powercap/dtpm_cpu.c55
-rw-r--r--drivers/powercap/dtpm_devfreq.c203
-rw-r--r--drivers/powercap/dtpm_subsys.h22
-rw-r--r--drivers/ptp/ptp_ocp.c25
-rw-r--r--drivers/regulator/Kconfig20
-rw-r--r--drivers/regulator/Makefile2
-rw-r--r--drivers/regulator/core.c13
-rw-r--r--drivers/regulator/da9121-regulator.c16
-rw-r--r--drivers/regulator/max20086-regulator.c3
-rw-r--r--drivers/regulator/max8973-regulator.c2
-rw-r--r--drivers/regulator/qcom-rpmh-regulator.c37
-rw-r--r--drivers/regulator/qcom_smd-regulator.c4
-rw-r--r--drivers/regulator/rpi-panel-attiny-regulator.c291
-rw-r--r--drivers/regulator/rt5190a-regulator.c513
-rw-r--r--drivers/regulator/sc2731-regulator.c2
-rw-r--r--drivers/regulator/ti-abb-regulator.c6
-rw-r--r--drivers/regulator/tps6286x-regulator.c159
-rw-r--r--drivers/regulator/vctrl-regulator.c5
-rw-r--r--drivers/regulator/virtual.c41
-rw-r--r--drivers/regulator/wm8350-regulator.c2
-rw-r--r--drivers/remoteproc/Kconfig4
-rw-r--r--drivers/remoteproc/qcom_q6v5.c1
-rw-r--r--drivers/rpmsg/rpmsg_char.c22
-rw-r--r--drivers/rtc/rtc-ds1302.c3
-rw-r--r--drivers/rtc/rtc-ds1305.c4
-rw-r--r--drivers/rtc/rtc-ds1343.c4
-rw-r--r--drivers/s390/block/dasd_int.h1
-rw-r--r--drivers/s390/block/scm_blk.c1
-rw-r--r--drivers/s390/block/scm_blk.h1
-rw-r--r--drivers/s390/cio/device.c2
-rw-r--r--drivers/s390/scsi/zfcp_fc.c13
-rw-r--r--drivers/scsi/3w-sas.c4
-rw-r--r--drivers/scsi/53c700.c1
-rw-r--r--drivers/scsi/bfa/bfad.c6
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_fcoe.c41
-rw-r--r--drivers/scsi/elx/libefc/efc_els.c8
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c13
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c17
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c2
-rw-r--r--drivers/scsi/lpfc/lpfc.h14
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c7
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c20
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c5
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c8
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c5
-rw-r--r--drivers/scsi/myrs.c3
-rw-r--r--drivers/scsi/pm8001/pm8001_hwi.c18
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c5
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c47
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.h6
-rw-r--r--drivers/scsi/qedf/qedf_io.c1
-rw-r--r--drivers/scsi/qedf/qedf_main.c7
-rw-r--r--drivers/scsi/qedi/qedi_fw.c6
-rw-r--r--drivers/scsi/scsi_debug.c1
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_scan.c55
-rw-r--r--drivers/scsi/scsicam.c1
-rw-r--r--drivers/scsi/sd.c115
-rw-r--r--drivers/scsi/sd.h12
-rw-r--r--drivers/scsi/sr.c131
-rw-r--r--drivers/scsi/sr.h6
-rw-r--r--drivers/scsi/st.c1
-rw-r--r--drivers/scsi/st.h1
-rw-r--r--drivers/scsi/ufs/ufshcd-pltfrm.c7
-rw-r--r--drivers/scsi/ufs/ufshcd.c11
-rw-r--r--drivers/scsi/ufs/ufshci.h3
-rw-r--r--drivers/scsi/ufs/ufshpb.c4
-rw-r--r--drivers/scsi/xen-scsifront.c3
-rw-r--r--drivers/soc/aspeed/aspeed-lpc-ctrl.c7
-rw-r--r--drivers/soc/fsl/guts.c14
-rw-r--r--drivers/soc/fsl/qe/qe.c4
-rw-r--r--drivers/soc/fsl/qe/qe_io.c2
-rw-r--r--drivers/soc/imx/gpcv2.c3
-rw-r--r--drivers/soc/mediatek/mt8192-mmsys.h3
-rw-r--r--drivers/soc/mediatek/mtk-scpsys.c15
-rw-r--r--drivers/soc/rockchip/Kconfig8
-rw-r--r--drivers/soc/rockchip/Makefile1
-rw-r--r--drivers/soc/rockchip/dtpm.c65
-rw-r--r--drivers/soc/samsung/Kconfig2
-rw-r--r--drivers/soc/samsung/exynos-chipid.c2
-rw-r--r--drivers/spi/Kconfig50
-rw-r--r--drivers/spi/Makefile4
-rw-r--r--drivers/spi/spi-amd.c87
-rw-r--r--drivers/spi/spi-ath79.c34
-rw-r--r--drivers/spi/spi-bcm-qspi.c2
-rw-r--r--drivers/spi/spi-bcm2835aux.c21
-rw-r--r--drivers/spi/spi-bitbang-txrx.h66
-rw-r--r--drivers/spi/spi-cadence-xspi.c4
-rw-r--r--drivers/spi/spi-fsi.c10
-rw-r--r--drivers/spi/spi-geni-qcom.c7
-rw-r--r--drivers/spi/spi-gpio.c42
-rw-r--r--drivers/spi/spi-intel-pci.c (renamed from drivers/mtd/spi-nor/controllers/intel-spi-pci.c)50
-rw-r--r--drivers/spi/spi-intel-platform.c (renamed from drivers/mtd/spi-nor/controllers/intel-spi-platform.c)21
-rw-r--r--drivers/spi/spi-intel.c (renamed from drivers/mtd/spi-nor/controllers/intel-spi.c)850
-rw-r--r--drivers/spi/spi-intel.h19
-rw-r--r--drivers/spi/spi-lantiq-ssc.c8
-rw-r--r--drivers/spi/spi-mem.c6
-rw-r--r--drivers/spi/spi-meson-spicc.c5
-rw-r--r--drivers/spi/spi-mpc512x-psc.c47
-rw-r--r--drivers/spi/spi-mt65xx.c136
-rw-r--r--drivers/spi/spi-mtk-nor.c71
-rw-r--r--drivers/spi/spi-npcm-fiu.c14
-rw-r--r--drivers/spi/spi-pic32.c9
-rw-r--r--drivers/spi/spi-pxa2xx-pci.c320
-rw-r--r--drivers/spi/spi-pxa2xx.c68
-rw-r--r--drivers/spi/spi-qup.c5
-rw-r--r--drivers/spi/spi-rockchip-sfc.c4
-rw-r--r--drivers/spi/spi-rockchip.c135
-rw-r--r--drivers/spi/spi-s3c24xx.c47
-rw-r--r--drivers/spi/spi-s3c64xx.c80
-rw-r--r--drivers/spi/spi-slave-system-control.c3
-rw-r--r--drivers/spi/spi-slave-time.c3
-rw-r--r--drivers/spi/spi-st-ssc4.c31
-rw-r--r--drivers/spi/spi-stm32-qspi.c47
-rw-r--r--drivers/spi/spi-stm32.c9
-rw-r--r--drivers/spi/spi-sun4i.c2
-rw-r--r--drivers/spi/spi-sunplus-sp7021.c584
-rw-r--r--drivers/spi/spi-tegra114.c4
-rw-r--r--drivers/spi/spi-tegra20-slink.c8
-rw-r--r--drivers/spi/spi-tegra210-quad.c341
-rw-r--r--drivers/spi/spi-tle62x0.c3
-rw-r--r--drivers/spi/spi-topcliff-pch.c15
-rw-r--r--drivers/spi/spi-uniphier.c18
-rw-r--r--drivers/spi/spi-zynq-qspi.c3
-rw-r--r--drivers/spi/spi-zynqmp-gqspi.c5
-rw-r--r--drivers/spi/spi.c291
-rw-r--r--drivers/spi/spidev.c35
-rw-r--r--drivers/staging/fbtft/fb_st7789v.c2
-rw-r--r--drivers/staging/fbtft/fbtft.h97
-rw-r--r--drivers/staging/gdm724x/gdm_lte.c5
-rw-r--r--drivers/staging/greybus/gpio.c5
-rw-r--r--drivers/staging/pi433/pi433_if.c4
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_mlme_ext.c7
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_recv.c10
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_sta_mgt.c22
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_xmit.c16
-rw-r--r--drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c2
-rw-r--r--drivers/staging/rtl8723bs/include/rtw_mlme.h8
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c20
-rw-r--r--drivers/staging/wfx/bus_spi.c3
-rw-r--r--drivers/target/iscsi/iscsi_target_tpg.c3
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--drivers/target/target_core_pscsi.c1
-rw-r--r--drivers/tee/optee/core.c1
-rw-r--r--drivers/tee/optee/ffa_abi.c94
-rw-r--r--drivers/tee/optee/notif.c2
-rw-r--r--drivers/tee/optee/optee_private.h5
-rw-r--r--drivers/tee/optee/smc_abi.c60
-rw-r--r--drivers/thermal/broadcom/brcmstb_thermal.c2
-rw-r--r--drivers/thermal/intel/Kconfig14
-rw-r--r--drivers/thermal/intel/Makefile1
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c23
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c157
-rw-r--r--drivers/thermal/intel/intel_hfi.c569
-rw-r--r--drivers/thermal/intel/intel_hfi.h17
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c9
-rw-r--r--drivers/thermal/intel/therm_throt.c22
-rw-r--r--drivers/thermal/qcom/lmh.c62
-rw-r--r--drivers/thermal/qcom/tsens.c5
-rw-r--r--drivers/thermal/tegra/tegra-bpmp-thermal.c13
-rw-r--r--drivers/thermal/thermal_netlink.c58
-rw-r--r--drivers/thermal/thermal_netlink.h14
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-thermal-common.c12
-rw-r--r--drivers/tty/n_gsm.c65
-rw-r--r--drivers/tty/n_tty.c10
-rw-r--r--drivers/tty/rpmsg_tty.c40
-rw-r--r--drivers/tty/serial/8250/8250_gsc.c2
-rw-r--r--drivers/tty/serial/8250/8250_of.c11
-rw-r--r--drivers/tty/serial/8250/8250_pci.c100
-rw-r--r--drivers/tty/serial/8250/8250_pericom.c2
-rw-r--r--drivers/tty/serial/8250/8250_port.c61
-rw-r--r--drivers/tty/serial/amba-pl011.c11
-rw-r--r--drivers/tty/serial/max3100.c5
-rw-r--r--drivers/tty/serial/max310x.c3
-rw-r--r--drivers/tty/serial/sc16is7xx.c7
-rw-r--r--drivers/tty/serial/serial_core.c34
-rw-r--r--drivers/tty/serial/stm32-usart.c14
-rw-r--r--drivers/tty/tty_io.c2
-rw-r--r--drivers/tty/vt/vt_ioctl.c3
-rw-r--r--drivers/usb/cdns3/drd.c6
-rw-r--r--drivers/usb/class/usbtmc.c13
-rw-r--r--drivers/usb/common/ulpi.c17
-rw-r--r--drivers/usb/core/hcd-pci.c4
-rw-r--r--drivers/usb/core/hcd.c14
-rw-r--r--drivers/usb/core/port.c9
-rw-r--r--drivers/usb/core/urb.c12
-rw-r--r--drivers/usb/dwc2/core.h2
-rw-r--r--drivers/usb/dwc2/drd.c6
-rw-r--r--drivers/usb/dwc2/gadget.c2
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c21
-rw-r--r--drivers/usb/dwc3/dwc3-xilinx.c25
-rw-r--r--drivers/usb/dwc3/gadget.c15
-rw-r--r--drivers/usb/gadget/composite.c3
-rw-r--r--drivers/usb/gadget/function/f_fs.c56
-rw-r--r--drivers/usb/gadget/function/f_mass_storage.c1
-rw-r--r--drivers/usb/gadget/function/f_sourcesink.c1
-rw-r--r--drivers/usb/gadget/function/f_uac2.c4
-rw-r--r--drivers/usb/gadget/function/rndis.c18
-rw-r--r--drivers/usb/gadget/function/rndis.h1
-rw-r--r--drivers/usb/gadget/legacy/raw_gadget.c2
-rw-r--r--drivers/usb/gadget/udc/at91_udc.c2
-rw-r--r--drivers/usb/gadget/udc/core.c3
-rw-r--r--drivers/usb/gadget/udc/max3420_udc.c4
-rw-r--r--drivers/usb/gadget/udc/renesas_usb3.c2
-rw-r--r--drivers/usb/gadget/udc/udc-xilinx.c6
-rw-r--r--drivers/usb/host/max3421-hcd.c3
-rw-r--r--drivers/usb/host/xen-hcd.c26
-rw-r--r--drivers/usb/host/xhci-plat.c3
-rw-r--r--drivers/usb/host/xhci.c28
-rw-r--r--drivers/usb/misc/usb251xb.c4
-rw-r--r--drivers/usb/musb/omap2430.c1
-rw-r--r--drivers/usb/serial/ch341.c2
-rw-r--r--drivers/usb/serial/cp210x.c2
-rw-r--r--drivers/usb/serial/ftdi_sio.c3
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h3
-rw-r--r--drivers/usb/serial/option.c14
-rw-r--r--drivers/usb/storage/unusual_devs.h10
-rw-r--r--drivers/usb/typec/port-mapper.c10
-rw-r--r--drivers/usb/typec/tcpm/tcpci.c26
-rw-r--r--drivers/usb/typec/tcpm/tcpci.h1
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c7
-rw-r--r--drivers/usb/typec/tipd/core.c7
-rw-r--r--drivers/usb/typec/ucsi/ucsi_ccg.c2
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c34
-rw-r--r--drivers/vdpa/vdpa.c2
-rw-r--r--drivers/vdpa/vdpa_user/iova_domain.c2
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c2
-rw-r--r--drivers/vhost/iotlb.c11
-rw-r--r--drivers/vhost/vdpa.c2
-rw-r--r--drivers/vhost/vhost.c11
-rw-r--r--drivers/vhost/vsock.c24
-rw-r--r--drivers/video/backlight/ams369fg06.c3
-rw-r--r--drivers/video/backlight/corgi_lcd.c3
-rw-r--r--drivers/video/backlight/ili922x.c3
-rw-r--r--drivers/video/backlight/l4f00242t03.c3
-rw-r--r--drivers/video/backlight/lms501kf03.c3
-rw-r--r--drivers/video/backlight/ltv350qv.c3
-rw-r--r--drivers/video/backlight/tdo24m.c3
-rw-r--r--drivers/video/backlight/tosa_lcd.c4
-rw-r--r--drivers/video/backlight/vgg2432a4.c4
-rw-r--r--drivers/video/console/Kconfig20
-rw-r--r--drivers/video/fbdev/core/bitblit.c16
-rw-r--r--drivers/video/fbdev/core/fb_defio.c9
-rw-r--r--drivers/video/fbdev/core/fbcon.c554
-rw-r--r--drivers/video/fbdev/core/fbcon.h72
-rw-r--r--drivers/video/fbdev/core/fbcon_ccw.c28
-rw-r--r--drivers/video/fbdev/core/fbcon_cw.c28
-rw-r--r--drivers/video/fbdev/core/fbcon_rotate.h9
-rw-r--r--drivers/video/fbdev/core/fbcon_ud.c37
-rw-r--r--drivers/video/fbdev/core/fbmem.c2
-rw-r--r--drivers/video/fbdev/core/tileblit.c16
-rw-r--r--drivers/video/fbdev/hyperv_fb.c16
-rw-r--r--drivers/video/fbdev/omap/lcd_mipid.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c4
-rw-r--r--drivers/video/fbdev/skeletonfb.c12
-rw-r--r--drivers/video/fbdev/stifb.c45
-rw-r--r--drivers/virt/Kconfig11
-rw-r--r--drivers/virt/Makefile1
-rw-r--r--drivers/virt/vmgenid.c100
-rw-r--r--drivers/virtio/Kconfig1
-rw-r--r--drivers/virtio/virtio.c56
-rw-r--r--drivers/virtio/virtio_mem.c9
-rw-r--r--drivers/virtio/virtio_vdpa.c2
-rw-r--r--drivers/xen/gntalloc.c25
-rw-r--r--drivers/xen/grant-table.c71
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/pvcalls-front.c8
-rw-r--r--drivers/xen/xenbus/xenbus_client.c24
-rw-r--r--fs/9p/fid.c9
-rw-r--r--fs/9p/vfs_addr.c37
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/Kconfig8
-rw-r--r--fs/Kconfig.binfmt13
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/inode.c3
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/file.c6
-rw-r--r--fs/affs/super.c2
-rw-r--r--fs/afs/dir.c18
-rw-r--r--fs/afs/file.c28
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/afs/write.c19
-rw-r--r--fs/aio.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/file.c3
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c180
-rw-r--r--fs/binfmt_elf_fdpic.c20
-rw-r--r--fs/binfmt_elf_test.c64
-rw-r--r--fs/binfmt_flat.c7
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/btrfs/backref.c7
-rw-r--r--fs/btrfs/block-group.c75
-rw-r--r--fs/btrfs/block-group.h1
-rw-r--r--fs/btrfs/btrfs_inode.h42
-rw-r--r--fs/btrfs/check-integrity.c1
-rw-r--r--fs/btrfs/compression.c63
-rw-r--r--fs/btrfs/compression.h10
-rw-r--r--fs/btrfs/ctree.c108
-rw-r--r--fs/btrfs/ctree.h104
-rw-r--r--fs/btrfs/delalloc-space.c18
-rw-r--r--fs/btrfs/dev-replace.c18
-rw-r--r--fs/btrfs/disk-io.c286
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-io-tree.h4
-rw-r--r--fs/btrfs/extent-tree.c158
-rw-r--r--fs/btrfs/extent_io.c102
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c76
-rw-r--r--fs/btrfs/file.c176
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode.c1301
-rw-r--r--fs/btrfs/ioctl.c665
-rw-r--r--fs/btrfs/lzo.c20
-rw-r--r--fs/btrfs/ordered-data.c132
-rw-r--r--fs/btrfs/ordered-data.h25
-rw-r--r--fs/btrfs/print-tree.c5
-rw-r--r--fs/btrfs/qgroup.c100
-rw-r--r--fs/btrfs/reflink.c43
-rw-r--r--fs/btrfs/relocation.c24
-rw-r--r--fs/btrfs/root-tree.c15
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/send.c15
-rw-r--r--fs/btrfs/send.h2
-rw-r--r--fs/btrfs/space-info.c5
-rw-r--r--fs/btrfs/subpage.c2
-rw-r--r--fs/btrfs/super.c96
-rw-r--r--fs/btrfs/sysfs.c15
-rw-r--r--fs/btrfs/tests/extent-map-tests.c2
-rw-r--r--fs/btrfs/transaction.c120
-rw-r--r--fs/btrfs/transaction.h5
-rw-r--r--fs/btrfs/tree-checker.c68
-rw-r--r--fs/btrfs/tree-log.c1030
-rw-r--r--fs/btrfs/tree-log.h7
-rw-r--r--fs/btrfs/volumes.c147
-rw-r--r--fs/btrfs/volumes.h7
-rw-r--r--fs/btrfs/zoned.c167
-rw-r--r--fs/buffer.c118
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/cachefiles/io.c59
-rw-r--r--fs/cachefiles/xattr.c23
-rw-r--r--fs/ceph/addr.c105
-rw-r--r--fs/ceph/cache.h13
-rw-r--r--fs/ceph/caps.c55
-rw-r--r--fs/ceph/file.c9
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifs_swn.c6
-rw-r--r--fs/cifs/cifsacl.c9
-rw-r--r--fs/cifs/cifsfs.c17
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/connect.c99
-rw-r--r--fs/cifs/dfs_cache.c2
-rw-r--r--fs/cifs/file.c270
-rw-r--r--fs/cifs/fs_context.c4
-rw-r--r--fs/cifs/fscache.c126
-rw-r--r--fs/cifs/fscache.h79
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/sess.c17
-rw-r--r--fs/cifs/smb1ops.c4
-rw-r--r--fs/cifs/smb2ops.c18
-rw-r--r--fs/cifs/transport.c5
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/coda/file.c1
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/compat_binfmt_elf.c2
-rw-r--r--fs/configfs/dir.c20
-rw-r--r--fs/coredump.c86
-rw-r--r--fs/crypto/bio.c13
-rw-r--r--fs/crypto/crypto.c8
-rw-r--r--fs/crypto/inline_crypt.c93
-rw-r--r--fs/dax.c1
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/devpts/inode.c2
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/ecryptfs/mmap.c5
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/erofs/data.c20
-rw-r--r--fs/erofs/dir.c21
-rw-r--r--fs/erofs/erofs_fs.h5
-rw-r--r--fs/erofs/inode.c4
-rw-r--r--fs/erofs/internal.h4
-rw-r--r--fs/erofs/namei.c54
-rw-r--r--fs/erofs/super.c40
-rw-r--r--fs/erofs/sysfs.c8
-rw-r--r--fs/erofs/zdata.c302
-rw-r--r--fs/erofs/zmap.c78
-rw-r--r--fs/exec.c32
-rw-r--r--fs/exfat/inode.c3
-rw-r--r--fs/exfat/super.c2
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c9
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext4/acl.c8
-rw-r--r--fs/ext4/balloc.c1
-rw-r--r--fs/ext4/block_validity.c26
-rw-r--r--fs/ext4/ext4.h38
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c17
-rw-r--r--fs/ext4/fast_commit.c306
-rw-r--r--fs/ext4/fast_commit.h2
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inline.c32
-rw-r--r--fs/ext4/inode.c173
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c371
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c41
-rw-r--r--fs/ext4/orphan.c4
-rw-r--r--fs/ext4/page-io.c13
-rw-r--r--fs/ext4/readpage.c8
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c115
-rw-r--r--fs/ext4/sysfs.c8
-rw-r--r--fs/ext4/xattr.c6
-rw-r--r--fs/f2fs/Kconfig7
-rw-r--r--fs/f2fs/acl.c21
-rw-r--r--fs/f2fs/checkpoint.c89
-rw-r--r--fs/f2fs/compress.c17
-rw-r--r--fs/f2fs/data.c234
-rw-r--r--fs/f2fs/debug.c25
-rw-r--r--fs/f2fs/dir.c22
-rw-r--r--fs/f2fs/f2fs.h175
-rw-r--r--fs/f2fs/file.c175
-rw-r--r--fs/f2fs/gc.c53
-rw-r--r--fs/f2fs/hash.c2
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c7
-rw-r--r--fs/f2fs/namei.c82
-rw-r--r--fs/f2fs/node.c121
-rw-r--r--fs/f2fs/node.h3
-rw-r--r--fs/f2fs/recovery.c39
-rw-r--r--fs/f2fs/segment.c81
-rw-r--r--fs/f2fs/segment.h5
-rw-r--r--fs/f2fs/super.c115
-rw-r--r--fs/f2fs/sysfs.c50
-rw-r--r--fs/f2fs/verity.c4
-rw-r--r--fs/f2fs/xattr.c12
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/file_table.c6
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fs-writeback.c46
-rw-r--r--fs/fscache/io.c28
-rw-r--r--fs/fuse/control.c17
-rw-r--r--fs/fuse/dax.c3
-rw-r--r--fs/fuse/dev.c20
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c34
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c5
-rw-r--r--fs/fuse/ioctl.c9
-rw-r--r--fs/fuse/virtio_fs.c1
-rw-r--r--fs/gfs2/aops.c43
-rw-r--r--fs/gfs2/file.c7
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/lops.c8
-rw-r--r--fs/gfs2/meta_io.c10
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/hfs/inode.c6
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/inode.c6
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/wrapper.c5
-rw-r--r--fs/hostfs/hostfs_kern.c5
-rw-r--r--fs/hpfs/file.c3
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h9
-rw-r--r--fs/io-wq.c114
-rw-r--r--fs/io_uring.c1312
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/iomap/buffered-io.c124
-rw-r--r--fs/iomap/direct-io.c14
-rw-r--r--fs/iomap/fiemap.c1
-rw-r--r--fs/iomap/trace.h2
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c10
-rw-r--r--fs/jbd2/transaction.c126
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/jfs_logmgr.c11
-rw-r--r--fs/jfs/jfs_metapage.c23
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/ksmbd/auth.c27
-rw-r--r--fs/ksmbd/smb2pdu.c45
-rw-r--r--fs/ksmbd/smb_common.c5
-rw-r--r--fs/ksmbd/transport_rdma.c2
-rw-r--r--fs/ksmbd/vfs.c1
-rw-r--r--fs/ksmbd/vfs.h1
-rw-r--r--fs/libfs.c25
-rw-r--r--fs/lockd/svc.c24
-rw-r--r--fs/lockd/svcsubs.c18
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/mpage.c82
-rw-r--r--fs/namei.c10
-rw-r--r--fs/namespace.c32
-rw-r--r--fs/nfs/blocklayout/blocklayout.c26
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c1
-rw-r--r--fs/nfs/callback.c66
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/callback_xdr.c18
-rw-r--r--fs/nfs/client.c9
-rw-r--r--fs/nfs/dir.c170
-rw-r--r--fs/nfs/file.c32
-rw-r--r--fs/nfs/filelayout/filelayout.h2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/inode.c11
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c5
-rw-r--r--fs/nfs/nfs42proc.c13
-rw-r--r--fs/nfs/nfs4_fs.h14
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4namespace.c19
-rw-r--r--fs/nfs/nfs4proc.c203
-rw-r--r--fs/nfs/nfs4state.c7
-rw-r--r--fs/nfs/nfs4xdr.c49
-rw-r--r--fs/nfs/sysfs.c3
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/Makefile3
-rw-r--r--fs/nfsd/blocklayout.c1
-rw-r--r--fs/nfsd/filecache.c7
-rw-r--r--fs/nfsd/flexfilelayout.c2
-rw-r--r--fs/nfsd/nfs3proc.c19
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfs4state.c24
-rw-r--r--fs/nfsd/nfs4xdr.c20
-rw-r--r--fs/nfsd/nfscache.c33
-rw-r--r--fs/nfsd/nfsctl.c15
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c4
-rw-r--r--fs/nfsd/nfsfh.h20
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/nfssvc.c25
-rw-r--r--fs/nfsd/trace.h121
-rw-r--r--fs/nfsd/vfs.c67
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--fs/nfsd/xdr.h2
-rw-r--r--fs/nilfs2/inode.c40
-rw-r--r--fs/nilfs2/mdt.c3
-rw-r--r--fs/nilfs2/segbuf.c63
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c9
-rw-r--r--fs/ntfs/aops.c21
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs3/fsntfs.c36
-rw-r--r--fs/ntfs3/inode.c2
-rw-r--r--fs/ntfs3/super.c2
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c4
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c13
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/localalloc.c6
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/stack_user.c18
-rw-r--r--fs/ocfs2/stackglue.c13
-rw-r--r--fs/ocfs2/suballoc.c25
-rw-r--r--fs/ocfs2/super.c24
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/omfs/file.c3
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/orangefs/inode.c121
-rw-r--r--fs/orangefs/super.c2
-rw-r--r--fs/overlayfs/copy_up.c16
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/pipe.c11
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/page.c1
-rw-r--r--fs/proc/task_mmu.c49
-rw-r--r--fs/pstore/platform.c38
-rw-r--r--fs/pstore/ram_core.c4
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/quota/dquot.c11
-rw-r--r--fs/read_write.c34
-rw-r--r--fs/reiserfs/inode.c40
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/remap_range.c23
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/splice.c24
-rw-r--r--fs/squashfs/block.c11
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/stat.c49
-rw-r--r--fs/super.c19
-rw-r--r--fs/sync.c18
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/sysv/itree.c3
-rw-r--r--fs/tracefs/inode.c5
-rw-r--r--fs/ubifs/file.c34
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/file.c3
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/ufs/inode.c3
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/unicode/Kconfig18
-rw-r--r--fs/unicode/Makefile6
-rw-r--r--fs/userfaultfd.c11
-rw-r--r--fs/vboxsf/file.c2
-rw-r--r--fs/vboxsf/super.c2
-rw-r--r--fs/vboxsf/utils.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/xfs_aops.c23
-rw-r--r--fs/xfs/xfs_bio_io.c14
-rw-r--r--fs/xfs/xfs_bmap_util.c9
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_file.c86
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.h9
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_log.c14
-rw-r--r--fs/xfs/xfs_pnfs.c42
-rw-r--r--fs/xfs/xfs_super.c13
-rw-r--r--fs/zonefs/super.c15
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/acpi/actypes.h4
-rw-r--r--include/acpi/apei.h4
-rw-r--r--include/acpi/platform/aclinux.h5
-rw-r--r--include/asm-generic/bitops/instrumented-atomic.h12
-rw-r--r--include/asm-generic/bitops/instrumented-non-atomic.h16
-rw-r--r--include/asm-generic/vmlinux.lds.h11
-rw-r--r--include/asm-generic/xor.h84
-rw-r--r--include/clocksource/arm_arch_timer.h1
-rw-r--r--include/crypto/algapi.h10
-rw-r--r--include/crypto/asym_tpm_subtype.h19
-rw-r--r--include/crypto/dh.h26
-rw-r--r--include/crypto/internal/blake2s.h40
-rw-r--r--include/crypto/internal/kpp.h158
-rw-r--r--include/crypto/sm3.h34
-rw-r--r--include/dt-bindings/clock/dra7.h14
-rw-r--r--include/dt-bindings/interrupt-controller/apple-aic.h2
-rw-r--r--include/dt-bindings/regulator/richtek,rt5190a-regulator.h15
-rw-r--r--include/dt-bindings/regulator/ti,tps62864.h9
-rw-r--r--include/keys/system_keyring.h14
-rw-r--r--include/linux/acpi.h7
-rw-r--r--include/linux/acpi_agdi.h13
-rw-r--r--include/linux/amba/bus.h21
-rw-r--r--include/linux/arch_topology.h4
-rw-r--r--include/linux/arm-smccc.h5
-rw-r--r--include/linux/arm_sdei.h2
-rw-r--r--include/linux/ata.h2
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h38
-rw-r--r--include/linux/backing-dev-defs.h8
-rw-r--r--include/linux/backing-dev.h50
-rw-r--r--include/linux/binfmts.h15
-rw-r--r--include/linux/bio.h38
-rw-r--r--include/linux/blk-cgroup.h461
-rw-r--r--include/linux/blk-mq.h6
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h300
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/buffer_head.h9
-rw-r--r--include/linux/cacheflush.h2
-rw-r--r--include/linux/ceph/libceph.h1
-rw-r--r--include/linux/ceph/messenger.h5
-rw-r--r--include/linux/cgroup.h5
-rw-r--r--include/linux/cgroup_api.h1
-rw-r--r--include/linux/cma.h10
-rw-r--r--include/linux/compiler-gcc.h4
-rw-r--r--include/linux/compiler.h21
-rw-r--r--include/linux/coredump.h20
-rw-r--r--include/linux/cpufreq.h8
-rw-r--r--include/linux/cpuhotplug.h3
-rw-r--r--include/linux/cpumask.h18
-rw-r--r--include/linux/cpumask_api.h1
-rw-r--r--include/linux/crypto.h9
-rw-r--r--include/linux/damon.h87
-rw-r--r--include/linux/dtpm.h36
-rw-r--r--include/linux/elfcore.h4
-rw-r--r--include/linux/entry-common.h15
-rw-r--r--include/linux/ethtool.h2
-rw-r--r--include/linux/fault-inject.h2
-rw-r--r--include/linux/fb.h2
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h8
-rw-r--r--include/linux/fs.h159
-rw-r--r--include/linux/fs_api.h1
-rw-r--r--include/linux/fscache.h8
-rw-r--r--include/linux/fscrypt.h18
-rw-r--r--include/linux/fsnotify.h49
-rw-r--r--include/linux/genhd.h291
-rw-r--r--include/linux/gfp.h10
-rw-r--r--include/linux/gfp_api.h1
-rw-r--r--include/linux/hashtable_api.h1
-rw-r--r--include/linux/highmem-internal.h10
-rw-r--r--include/linux/hmm.h9
-rw-r--r--include/linux/hrtimer_api.h1
-rw-r--r--include/linux/huge_mm.h59
-rw-r--r--include/linux/hugetlb.h13
-rw-r--r--include/linux/hw_random.h2
-rw-r--r--include/linux/hwmon.h2
-rw-r--r--include/linux/hyperv.h1
-rw-r--r--include/linux/if_arp.h1
-rw-r--r--include/linux/if_vlan.h6
-rw-r--r--include/linux/interrupt.h11
-rw-r--r--include/linux/io_uring.h5
-rw-r--r--include/linux/ioasid.h21
-rw-r--r--include/linux/iomap.h7
-rw-r--r--include/linux/irq.h9
-rw-r--r--include/linux/irqchip/versatile-fpga.h14
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/irqdomain.h15
-rw-r--r--include/linux/jbd2.h20
-rw-r--r--include/linux/jump_label.h13
-rw-r--r--include/linux/kasan-enabled.h35
-rw-r--r--include/linux/kasan.h23
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/kfence.h2
-rw-r--r--include/linux/kobject_api.h1
-rw-r--r--include/linux/kref_api.h1
-rw-r--r--include/linux/ksm.h6
-rw-r--r--include/linux/kthread.h22
-rw-r--r--include/linux/ktime_api.h1
-rw-r--r--include/linux/kvm_host.h112
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/linkage.h67
-rw-r--r--include/linux/list_lru.h17
-rw-r--r--include/linux/llist_api.h1
-rw-r--r--include/linux/local_lock_internal.h6
-rw-r--r--include/linux/lockdep_api.h1
-rw-r--r--include/linux/lsm_hook_defs.h4
-rw-r--r--include/linux/lsm_hooks.h5
-rw-r--r--include/linux/memcontrol.h51
-rw-r--r--include/linux/memory.h12
-rw-r--r--include/linux/memory_hotplug.h124
-rw-r--r--include/linux/memremap.h27
-rw-r--r--include/linux/mfd/lpc_ich.h2
-rw-r--r--include/linux/migrate.h8
-rw-r--r--include/linux/mlx5/mlx5_ifc.h5
-rw-r--r--include/linux/mm.h334
-rw-r--r--include/linux/mm_api.h1
-rw-r--r--include/linux/mm_inline.h108
-rw-r--r--include/linux/mm_types.h34
-rw-r--r--include/linux/mmzone.h22
-rw-r--r--include/linux/mod_devicetable.h2
-rw-r--r--include/linux/mutex_api.h1
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/linux/netfilter_netdev.h4
-rw-r--r--include/linux/netfs.h7
-rw-r--r--include/linux/nfs.h8
-rw-r--r--include/linux/nfs_fs.h13
-rw-r--r--include/linux/nfs_fs_sb.h5
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/node.h25
-rw-r--r--include/linux/nvme-fc-driver.h2
-rw-r--r--include/linux/nvme-tcp.h1
-rw-r--r--include/linux/nvme.h11
-rw-r--r--include/linux/nvmem-provider.h4
-rw-r--r--include/linux/overflow.h114
-rw-r--r--include/linux/page-flags.h92
-rw-r--r--include/linux/page_table_check.h19
-rw-r--r--include/linux/pageblock-flags.h7
-rw-r--r--include/linux/pagemap.h182
-rw-r--r--include/linux/part_stat.h2
-rw-r--r--include/linux/perf/arm_pmu.h2
-rw-r--r--include/linux/perf_event.h15
-rw-r--r--include/linux/perf_event_api.h1
-rw-r--r--include/linux/pgtable.h1
-rw-r--r--include/linux/pgtable_api.h1
-rw-r--r--include/linux/phy.h4
-rw-r--r--include/linux/pid_namespace.h5
-rw-r--r--include/linux/platform_data/spi-s3c64xx.h14
-rw-r--r--include/linux/platform_data/x86/spi-intel.h (renamed from include/linux/platform_data/x86/intel-spi.h)12
-rw-r--r--include/linux/pm.h8
-rw-r--r--include/linux/pm_runtime.h4
-rw-r--r--include/linux/psi.h14
-rw-r--r--include/linux/psi_types.h4
-rw-r--r--include/linux/pstore.h6
-rw-r--r--include/linux/ptrace_api.h1
-rw-r--r--include/linux/quota.h2
-rw-r--r--include/linux/raid/xor.h21
-rw-r--r--include/linux/random.h41
-rw-r--r--include/linux/randomize_kstack.h21
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--include/linux/rcutiny.h3
-rw-r--r--include/linux/rcutree.h4
-rw-r--r--include/linux/rcuwait.h6
-rw-r--r--include/linux/rcuwait_api.h1
-rw-r--r--include/linux/refcount_api.h1
-rw-r--r--include/linux/regmap.h6
-rw-r--r--include/linux/rfkill.h5
-rw-r--r--include/linux/rmap.h76
-rw-r--r--include/linux/sbitmap.h51
-rw-r--r--include/linux/sched.h41
-rw-r--r--include/linux/sched/affinity.h1
-rw-r--r--include/linux/sched/cond_resched.h1
-rw-r--r--include/linux/sched/deadline.h2
-rw-r--r--include/linux/sched/isolation.h43
-rw-r--r--include/linux/sched/mm.h26
-rw-r--r--include/linux/sched/posix-timers.h1
-rw-r--r--include/linux/sched/rseq_api.h1
-rw-r--r--include/linux/sched/sysctl.h14
-rw-r--r--include/linux/sched/task.h4
-rw-r--r--include/linux/sched/task_flags.h1
-rw-r--r--include/linux/sched/task_stack.h2
-rw-r--r--include/linux/sched/thread_info_api.h1
-rw-r--r--include/linux/sched/topology.h1
-rw-r--r--include/linux/sched_clock.h2
-rw-r--r--include/linux/security.h8
-rw-r--r--include/linux/seqlock_api.h1
-rw-r--r--include/linux/shmem_fs.h1
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/slab.h6
-rw-r--r--include/linux/softirq.h1
-rw-r--r--include/linux/spi/pxa2xx_spi.h1
-rw-r--r--include/linux/spi/s3c24xx.h5
-rw-r--r--include/linux/spi/spi.h41
-rw-r--r--include/linux/spinlock_api.h1
-rw-r--r--include/linux/sunrpc/svc.h26
-rw-r--r--include/linux/sunrpc/svc_xprt.h12
-rw-r--r--include/linux/sunrpc/xdr.h2
-rw-r--r--include/linux/suspend.h15
-rw-r--r--include/linux/swait_api.h1
-rw-r--r--include/linux/swap.h17
-rw-r--r--include/linux/syscalls_api.h1
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/thread_info.h5
-rw-r--r--include/linux/topology.h3
-rw-r--r--include/linux/trace_events.h22
-rw-r--r--include/linux/u64_stats_sync_api.h1
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--include/linux/usb/role.h6
-rw-r--r--include/linux/vdpa.h18
-rw-r--r--include/linux/virtio.h1
-rw-r--r--include/linux/virtio_config.h3
-rw-r--r--include/linux/vm_event_item.h3
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/linux/wait_api.h1
-rw-r--r--include/linux/watch_queue.h3
-rw-r--r--include/linux/workqueue_api.h1
-rw-r--r--include/linux/xarray.h9
-rw-r--r--include/net/addrconf.h4
-rw-r--r--include/net/af_vsock.h3
-rw-r--r--include/net/ax25.h12
-rw-r--r--include/net/bluetooth/bluetooth.h3
-rw-r--r--include/net/bluetooth/hci_core.h8
-rw-r--r--include/net/bond_3ad.h2
-rw-r--r--include/net/bonding.h2
-rw-r--r--include/net/checksum.h52
-rw-r--r--include/net/dsa.h1
-rw-r--r--include/net/dst_metadata.h14
-rw-r--r--include/net/esp.h2
-rw-r--r--include/net/ip.h21
-rw-r--r--include/net/ip6_fib.h12
-rw-r--r--include/net/ipv6.h5
-rw-r--r--include/net/ndisc.h4
-rw-r--r--include/net/neighbour.h18
-rw-r--r--include/net/netfilter/nf_conntrack.h1
-rw-r--r--include/net/netfilter/nf_flow_table.h6
-rw-r--r--include/net/netfilter/nf_queue.h2
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/net/netfilter/nf_tables_offload.h2
-rw-r--r--include/net/netns/ipv6.h3
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sock.h4
-rw-r--r--include/net/tcp.h4
-rw-r--r--include/net/xfrm.h6
-rw-r--r--include/ras/ras_event.h1
-rw-r--r--include/scsi/scsi_cmnd.h9
-rw-r--r--include/scsi/scsi_driver.h9
-rw-r--r--include/soc/fsl/dpaa2-fd.h3
-rw-r--r--include/soc/fsl/qe/immap_qe.h3
-rw-r--r--include/soc/fsl/qe/qe_tdm.h4
-rw-r--r--include/soc/fsl/qe/ucc_fast.h2
-rw-r--r--include/soc/fsl/qe/ucc_slow.h2
-rw-r--r--include/sound/pcm.h15
-rw-r--r--include/trace/bpf_probe.h6
-rw-r--r--include/trace/events/block.h49
-rw-r--r--include/trace/events/btrfs.h1
-rw-r--r--include/trace/events/cachefiles.h2
-rw-r--r--include/trace/events/compaction.h26
-rw-r--r--include/trace/events/ext4.h328
-rw-r--r--include/trace/events/io_uring.h333
-rw-r--r--include/trace/events/random.h233
-rw-r--r--include/trace/events/rcu.h9
-rw-r--r--include/trace/events/sched.h11
-rw-r--r--include/trace/events/skb.h2
-rw-r--r--include/trace/events/sunrpc.h314
-rw-r--r--include/trace/events/vmscan.h10
-rw-r--r--include/trace/events/writeback.h28
-rw-r--r--include/trace/perf.h6
-rw-r--r--include/trace/stages/stage1_defines.h6
-rw-r--r--include/trace/stages/stage2_defines.h10
-rw-r--r--include/trace/stages/stage3_defines.h6
-rw-r--r--include/trace/stages/stage4_defines.h6
-rw-r--r--include/trace/stages/stage5_defines.h6
-rw-r--r--include/trace/stages/stage6_defines.h20
-rw-r--r--include/trace/stages/stage7_defines.h2
-rw-r--r--include/uapi/linux/btrfs.h133
-rw-r--r--include/uapi/linux/btrfs_tree.h3
-rw-r--r--include/uapi/linux/cyclades.h35
-rw-r--r--include/uapi/linux/elf.h10
-rw-r--r--include/uapi/linux/input-event-codes.h4
-rw-r--r--include/uapi/linux/io_uring.h17
-rw-r--r--include/uapi/linux/kvm.h8
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h2
-rw-r--r--include/uapi/linux/nvme_ioctl.h6
-rw-r--r--include/uapi/linux/omap3isp.h21
-rw-r--r--include/uapi/linux/perf_event.h4
-rw-r--r--include/uapi/linux/rseq.h20
-rw-r--r--include/uapi/linux/smc_diag.h11
-rw-r--r--include/uapi/linux/thermal.h6
-rw-r--r--include/uapi/linux/userfaultfd.h8
-rw-r--r--include/uapi/linux/xfrm.h6
-rw-r--r--include/uapi/sound/asound.h4
-rw-r--r--include/uapi/xen/gntdev.h8
-rw-r--r--include/xen/grant_table.h19
-rw-r--r--include/xen/xenbus_dev.h2
-rw-r--r--init/Kconfig1
-rw-r--r--init/Makefile3
-rw-r--r--init/do_mounts.c1
-rw-r--r--init/main.c2
-rw-r--r--ipc/mqueue.c2
-rw-r--r--ipc/sem.c4
-rw-r--r--kernel/Kconfig.preempt15
-rw-r--r--kernel/async.c3
-rw-r--r--kernel/audit.c62
-rw-r--r--kernel/audit.h4
-rw-r--r--kernel/auditsc.c89
-rw-r--r--kernel/bpf/bpf_lsm.c2
-rw-r--r--kernel/bpf/btf.c5
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/ringbuf.c2
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/trampoline.c5
-rw-r--r--kernel/cgroup/cgroup-v1.c16
-rw-r--r--kernel/cgroup/cgroup.c25
-rw-r--r--kernel/cgroup/cpuset.c83
-rw-r--r--kernel/configs/debug.config1
-rw-r--r--kernel/cpu.c15
-rw-r--r--kernel/cred.c9
-rw-r--r--kernel/dma/contiguous.c4
-rw-r--r--kernel/dma/swiotlb.c22
-rw-r--r--kernel/entry/common.c37
-rw-r--r--kernel/events/core.c294
-rw-r--r--kernel/events/uprobes.c13
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c312
-rw-r--r--kernel/futex/core.c2
-rw-r--r--kernel/irq/chip.c29
-rw-r--r--kernel/irq/cpuhotplug.c4
-rw-r--r--kernel/irq/debugfs.c8
-rw-r--r--kernel/irq/irqdesc.c27
-rw-r--r--kernel/irq/irqdomain.c9
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/locking/lockdep.c47
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/locking/lockdep_proc.c51
-rw-r--r--kernel/locking/percpu-rwsem.c5
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/module.c27
-rw-r--r--kernel/module_decompress.c2
-rw-r--r--kernel/padata.c2
-rw-r--r--kernel/power/hibernate.c7
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/power/snapshot.c21
-rw-r--r--kernel/power/suspend.c2
-rw-r--r--kernel/power/suspend_test.c8
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/power/wakelock.c11
-rw-r--r--kernel/printk/printk.c85
-rw-r--r--kernel/printk/printk_ringbuffer.c52
-rw-r--r--kernel/printk/printk_ringbuffer.h2
-rw-r--r--kernel/printk/sysctl.c2
-rw-r--r--kernel/rcu/rcu_segcblist.h4
-rw-r--r--kernel/rcu/rcutorture.c41
-rw-r--r--kernel/rcu/tasks.h14
-rw-r--r--kernel/rcu/tree.c331
-rw-r--r--kernel/rcu/tree.h18
-rw-r--r--kernel/rcu/tree_exp.h5
-rw-r--r--kernel/rcu/tree_nocb.h18
-rw-r--r--kernel/rcu/tree_plugin.h37
-rw-r--r--kernel/rcu/tree_stall.h35
-rw-r--r--kernel/rcu/update.c7
-rw-r--r--kernel/rseq.c8
-rw-r--r--kernel/sched/Makefile28
-rw-r--r--kernel/sched/autogroup.c26
-rw-r--r--kernel/sched/autogroup.h6
-rw-r--r--kernel/sched/build_policy.c52
-rw-r--r--kernel/sched/build_utility.c109
-rw-r--r--kernel/sched/clock.c44
-rw-r--r--kernel/sched/completion.c2
-rw-r--r--kernel/sched/core.c532
-rw-r--r--kernel/sched/core_sched.c5
-rw-r--r--kernel/sched/cpuacct.c12
-rw-r--r--kernel/sched/cpudeadline.c3
-rw-r--r--kernel/sched/cpufreq.c3
-rw-r--r--kernel/sched/cpufreq_schedutil.c20
-rw-r--r--kernel/sched/cpupri.c1
-rw-r--r--kernel/sched/cputime.c1
-rw-r--r--kernel/sched/deadline.c155
-rw-r--r--kernel/sched/debug.c11
-rw-r--r--kernel/sched/fair.c231
-rw-r--r--kernel/sched/idle.c3
-rw-r--r--kernel/sched/isolation.c163
-rw-r--r--kernel/sched/loadavg.c1
-rw-r--r--kernel/sched/membarrier.c10
-rw-r--r--kernel/sched/pelt.c4
-rw-r--r--kernel/sched/pelt.h4
-rw-r--r--kernel/sched/psi.c206
-rw-r--r--kernel/sched/rt.c51
-rw-r--r--kernel/sched/sched.h354
-rw-r--r--kernel/sched/stats.c1
-rw-r--r--kernel/sched/stats.h4
-rw-r--r--kernel/sched/stop_task.c1
-rw-r--r--kernel/sched/swait.c1
-rw-r--r--kernel/sched/topology.c268
-rw-r--r--kernel/sched/wait.c1
-rw-r--r--kernel/sched/wait_bit.c2
-rw-r--r--kernel/seccomp.c10
-rw-r--r--kernel/signal.c45
-rw-r--r--kernel/softirq.c10
-rw-r--r--kernel/stackleak.c5
-rw-r--r--kernel/sys.c39
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/time/Kconfig9
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-sched.c77
-rw-r--r--kernel/time/tick-sched.h4
-rw-r--r--kernel/torture.c6
-rw-r--r--kernel/trace/blktrace.c26
-rw-r--r--kernel/trace/fgraph.c4
-rw-r--r--kernel/trace/ftrace.c9
-rw-r--r--kernel/trace/trace.c14
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_eprobe.c16
-rw-r--r--kernel/trace/trace_events.c14
-rw-r--r--kernel/trace/trace_events_hist.c6
-rw-r--r--kernel/trace/trace_events_trigger.c73
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_osnoise.c88
-rw-r--r--kernel/trace/trace_probe.c10
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_switch.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
-rw-r--r--kernel/trace/trace_selftest.c6
-rw-r--r--kernel/ucount.c5
-rw-r--r--kernel/user_namespace.c14
-rw-r--r--kernel/watch_queue.c25
-rw-r--r--kernel/watchdog.c2
-rw-r--r--kernel/workqueue.c4
-rw-r--r--lib/Kconfig1
-rw-r--r--lib/Kconfig.debug38
-rw-r--r--lib/Kconfig.kfence12
-rw-r--r--lib/Makefile6
-rw-r--r--lib/crc32.c14
-rw-r--r--lib/crc32test.c2
-rw-r--r--lib/crypto/Kconfig3
-rw-r--r--lib/crypto/Makefile3
-rw-r--r--lib/crypto/blake2s.c4
-rw-r--r--lib/crypto/sm3.c246
-rw-r--r--lib/iov_iter.c2
-rw-r--r--lib/kunit/try-catch.c3
-rw-r--r--lib/mpi/mpi-bit.c1
-rw-r--r--lib/overflow_kunit.c (renamed from lib/test_overflow.c)518
-rw-r--r--lib/raid6/test/Makefile4
-rw-r--r--lib/raid6/test/test.c1
-rw-r--r--lib/raid6/vpermxor.uc2
-rw-r--r--lib/random32.c14
-rw-r--r--lib/sbitmap.c48
-rw-r--r--lib/stackinit_kunit.c (renamed from lib/test_stackinit.c)269
-rw-r--r--lib/test_hmm.c4
-rw-r--r--lib/test_kasan.c10
-rw-r--r--lib/vsprintf.c59
-rw-r--r--lib/xarray.c10
-rw-r--r--mm/Kconfig22
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c57
-rw-r--r--mm/cma.c31
-rw-r--r--mm/cma.h1
-rw-r--r--mm/compaction.c60
-rw-r--r--mm/damon/Kconfig19
-rw-r--r--mm/damon/Makefile7
-rw-r--r--mm/damon/core-test.h21
-rw-r--r--mm/damon/core.c190
-rw-r--r--mm/damon/dbgfs-test.h85
-rw-r--r--mm/damon/dbgfs.c222
-rw-r--r--mm/damon/ops-common.c (renamed from mm/damon/prmtv-common.c)2
-rw-r--r--mm/damon/ops-common.h (renamed from mm/damon/prmtv-common.h)0
-rw-r--r--mm/damon/paddr.c98
-rw-r--r--mm/damon/reclaim.c9
-rw-r--r--mm/damon/sysfs.c2596
-rw-r--r--mm/damon/vaddr-test.h8
-rw-r--r--mm/damon/vaddr.c43
-rw-r--r--mm/debug.c18
-rw-r--r--mm/debug_vm_pgtable.c2
-rw-r--r--mm/early_ioremap.c1
-rw-r--r--mm/fadvise.c5
-rw-r--r--mm/filemap.c84
-rw-r--r--mm/folio-compat.c13
-rw-r--r--mm/gup.c637
-rw-r--r--mm/highmem.c9
-rw-r--r--mm/hmm.c3
-rw-r--r--mm/huge_memory.c219
-rw-r--r--mm/hugetlb.c49
-rw-r--r--mm/hugetlb_vmemmap.c68
-rw-r--r--mm/hwpoison-inject.c7
-rw-r--r--mm/init-mm.c4
-rw-r--r--mm/internal.h136
-rw-r--r--mm/kfence/Makefile2
-rw-r--r--mm/kfence/core.c144
-rw-r--r--mm/kfence/kfence_test.c11
-rw-r--r--mm/khugepaged.c52
-rw-r--r--mm/kmemleak.c13
-rw-r--r--mm/ksm.c38
-rw-r--r--mm/list_lru.c422
-rw-r--r--mm/maccess.c6
-rw-r--r--mm/madvise.c115
-rw-r--r--mm/memblock.c10
-rw-r--r--mm/memcontrol.c542
-rw-r--r--mm/memfd.c40
-rw-r--r--mm/memory-failure.c162
-rw-r--r--mm/memory.c145
-rw-r--r--mm/memory_hotplug.c145
-rw-r--r--mm/mempolicy.c31
-rw-r--r--mm/memremap.c68
-rw-r--r--mm/migrate.c986
-rw-r--r--mm/migrate_device.c773
-rw-r--r--mm/mlock.c641
-rw-r--r--mm/mmap.c50
-rw-r--r--mm/mmzone.c14
-rw-r--r--mm/mprotect.c17
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/oom_kill.c5
-rw-r--r--mm/page-writeback.c48
-rw-r--r--mm/page_alloc.c414
-rw-r--r--mm/page_idle.c30
-rw-r--r--mm/page_io.c32
-rw-r--r--mm/page_isolation.c2
-rw-r--r--mm/page_table_check.c65
-rw-r--r--mm/page_vma_mapped.c58
-rw-r--r--mm/ptdump.c16
-rw-r--r--mm/readahead.c232
-rw-r--r--mm/rmap.c597
-rw-r--r--mm/secretmem.c2
-rw-r--r--mm/shmem.c48
-rw-r--r--mm/slab.c39
-rw-r--r--mm/slab.h25
-rw-r--r--mm/slob.c6
-rw-r--r--mm/slub.c42
-rw-r--r--mm/sparse-vmemmap.c70
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap.c198
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/swapfile.c1
-rw-r--r--mm/truncate.c153
-rw-r--r--mm/usercopy.c39
-rw-r--r--mm/userfaultfd.c17
-rw-r--r--mm/util.c40
-rw-r--r--mm/vmalloc.c102
-rw-r--r--mm/vmscan.c445
-rw-r--r--mm/vmstat.c19
-rw-r--r--mm/workingset.c32
-rw-r--r--mm/zswap.c15
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/8021q/vlan_netlink.c7
-rw-r--r--net/9p/trans_xen.c14
-rw-r--r--net/ax25/af_ax25.c35
-rw-r--r--net/ax25/ax25_dev.c28
-rw-r--r--net/ax25/ax25_route.c13
-rw-r--r--net/batman-adv/hard-interface.c29
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_sync.c79
-rw-r--r--net/bluetooth/mgmt.c101
-rw-r--r--net/bluetooth/mgmt_util.c3
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_vlan.c9
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/can/isotp.c29
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/messenger_v1.c54
-rw-r--r--net/ceph/messenger_v2.c250
-rw-r--r--net/core/drop_monitor.c11
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/gro.c25
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net-procfs.c38
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c10
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/xdp.c3
-rw-r--r--net/dcb/dcbnl.c44
-rw-r--r--net/dsa/dsa.c1
-rw-r--r--net/dsa/dsa2.c34
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/master.c7
-rw-r--r--net/dsa/port.c29
-rw-r--r--net/dsa/tag_lan9303.c21
-rw-r--r--net/ieee802154/nl802154.c8
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_lookup.h7
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c41
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/addrconf.c41
-rw-r--r--net/ipv6/esp6.c10
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c13
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/mcast.c34
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c0
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/mac80211/agg-tx.c10
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/mlme.c45
-rw-r--r--net/mac80211/rx.c14
-rw-r--r--net/mctp/route.c11
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/pm_netlink.c76
-rw-r--r--net/mptcp/protocol.c18
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/netfilter/core.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c11
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c59
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nf_nat_core.c43
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_tables_api.c34
-rw-r--r--net/netfilter/nf_tables_offload.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c12
-rw-r--r--net/netfilter/nft_byteorder.c12
-rw-r--r--net/netfilter/nft_connlimit.c11
-rw-r--r--net/netfilter/nft_ct.c5
-rw-r--r--net/netfilter/nft_dup_netdev.c6
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_fwd_netdev.c6
-rw-r--r--net/netfilter/nft_immediate.c12
-rw-r--r--net/netfilter/nft_limit.c18
-rw-r--r--net/netfilter/nft_payload.c9
-rw-r--r--net/netfilter/nft_synproxy.c4
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/openvswitch/actions.c46
-rw-r--r--net/packet/af_packet.c21
-rw-r--r--net/rxrpc/call_event.c8
-rw-r--r--net/rxrpc/output.c2
-rw-r--r--net/sched/act_api.c15
-rw-r--r--net/sched/act_ct.c18
-rw-r--r--net/sched/cls_api.c17
-rw-r--r--net/sched/sch_api.c24
-rw-r--r--net/sched/sch_generic.c29
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/sctp/diag.c9
-rw-r--r--net/sctp/sm_statefuns.c8
-rw-r--r--net/smc/af_smc.c210
-rw-r--r--net/smc/smc.h20
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/smc/smc_diag.c2
-rw-r--r--net/smc/smc_pnet.c50
-rw-r--r--net/smc/smc_pnet.h2
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/svc.c50
-rw-r--r--net/sunrpc/svc_xprt.c68
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c60
-rw-r--r--net/sunrpc/sysfs.c52
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c4
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c4
-rw-r--r--net/sunrpc/xprtrdma/verbs.c26
-rw-r--r--net/sunrpc/xprtsock.c9
-rw-r--r--net/tipc/bearer.c12
-rw-r--r--net/tipc/crypto.c2
-rw-r--r--net/tipc/link.c14
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/node.c13
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/vmw_vsock/af_vsock.c10
-rw-r--r--net/vmw_vsock/virtio_transport.c7
-rw-r--r--net/vmw_vsock/vmci_transport.c5
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c17
-rw-r--r--net/wireless/nl80211.c15
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_interface.c7
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c29
-rw-r--r--net/xfrm/xfrm_user.c27
-rw-r--r--samples/seccomp/dropper.c9
-rw-r--r--samples/trace_events/trace_custom_sched.h3
-rw-r--r--scripts/Makefile.clang1
-rw-r--r--scripts/Makefile.extrawarn1
-rw-r--r--scripts/Makefile.gcc-plugins2
-rwxr-xr-xscripts/atomic/fallbacks/read_acquire11
-rwxr-xr-xscripts/atomic/fallbacks/set_release7
-rw-r--r--scripts/gcc-plugins/stackleak_plugin.c29
-rw-r--r--scripts/kconfig/confdata.c25
-rw-r--r--scripts/kconfig/preprocess.c2
-rwxr-xr-xscripts/kernel-doc354
-rwxr-xr-xscripts/mkcompile_h17
-rw-r--r--scripts/spelling.txt16
-rw-r--r--security/Kconfig.hardening10
-rw-r--r--security/integrity/Kconfig13
-rw-r--r--security/integrity/Makefile1
-rw-r--r--security/integrity/digsig.c15
-rw-r--r--security/integrity/digsig_asymmetric.c15
-rw-r--r--security/integrity/evm/evm_main.c2
-rw-r--r--security/integrity/ima/ima_api.c12
-rw-r--r--security/integrity/ima/ima_fs.c33
-rw-r--r--security/integrity/ima/ima_init.c5
-rw-r--r--security/integrity/ima/ima_main.c9
-rw-r--r--security/integrity/ima/ima_policy.c13
-rw-r--r--security/integrity/ima/ima_template.c10
-rw-r--r--security/integrity/ima/ima_template_lib.c7
-rw-r--r--security/integrity/integrity.h31
-rw-r--r--security/integrity/integrity_audit.c2
-rw-r--r--security/integrity/platform_certs/keyring_handler.c18
-rw-r--r--security/integrity/platform_certs/keyring_handler.h5
-rw-r--r--security/integrity/platform_certs/load_uefi.c4
-rw-r--r--security/integrity/platform_certs/machine_keyring.c77
-rw-r--r--security/keys/Kconfig19
-rw-r--r--security/keys/dh.c2
-rw-r--r--security/keys/encrypted-keys/encrypted.c71
-rw-r--r--security/keys/keyctl_pkey.c14
-rw-r--r--security/keys/trusted-keys/trusted_core.c6
-rw-r--r--security/security.c39
-rw-r--r--security/selinux/hooks.c299
-rw-r--r--security/selinux/ibpkey.c2
-rw-r--r--security/selinux/ima.c8
-rw-r--r--security/selinux/include/policycap.h21
-rw-r--r--security/selinux/include/policycap_names.h5
-rw-r--r--security/selinux/include/security.h31
-rw-r--r--security/selinux/netnode.c9
-rw-r--r--security/selinux/netport.c2
-rw-r--r--security/selinux/selinuxfs.c4
-rw-r--r--security/selinux/ss/avtab.c6
-rw-r--r--security/selinux/ss/conditional.c5
-rw-r--r--security/selinux/ss/ebitmap.c1
-rw-r--r--security/selinux/ss/ebitmap.h6
-rw-r--r--security/selinux/ss/mls.c1
-rw-r--r--security/selinux/ss/policydb.c4
-rw-r--r--security/selinux/ss/services.c10
-rw-r--r--security/selinux/ss/sidtab.c4
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--security/smack/smack_lsm.c2
-rw-r--r--sound/core/memalloc.c15
-rw-r--r--sound/core/pcm_native.c13
-rw-r--r--sound/hda/intel-sdw-acpi.c7
-rw-r--r--sound/pci/hda/cs35l41_hda_spi.c4
-rw-r--r--sound/pci/hda/hda_auto_parser.c2
-rw-r--r--sound/pci/hda/hda_codec.c4
-rw-r--r--sound/pci/hda/hda_generic.c17
-rw-r--r--sound/pci/hda/hda_generic.h3
-rw-r--r--sound/pci/hda/hda_intel.c6
-rw-r--r--sound/pci/hda/patch_realtek.c79
-rw-r--r--sound/soc/amd/acp/acp-mach-common.c4
-rw-r--r--sound/soc/amd/acp/acp-mach.h1
-rw-r--r--sound/soc/amd/acp/acp-sof-mach.c4
-rw-r--r--sound/soc/codecs/adau1761-spi.c3
-rw-r--r--sound/soc/codecs/adau1781-spi.c3
-rw-r--r--sound/soc/codecs/cpcap.c2
-rw-r--r--sound/soc/codecs/cs35l41-spi.c4
-rw-r--r--sound/soc/codecs/cs4265.c3
-rw-r--r--sound/soc/codecs/hdmi-codec.c2
-rw-r--r--sound/soc/codecs/lpass-rx-macro.c8
-rw-r--r--sound/soc/codecs/max9759.c3
-rw-r--r--sound/soc/codecs/pcm3168a-spi.c4
-rw-r--r--sound/soc/codecs/pcm512x-spi.c3
-rw-r--r--sound/soc/codecs/rt5668.c12
-rw-r--r--sound/soc/codecs/rt5682-i2c.c15
-rw-r--r--sound/soc/codecs/rt5682.c34
-rw-r--r--sound/soc/codecs/rt5682.h2
-rw-r--r--sound/soc/codecs/rt5682s.c12
-rw-r--r--sound/soc/codecs/tas2770.c7
-rw-r--r--sound/soc/codecs/tlv320aic32x4-spi.c4
-rw-r--r--sound/soc/codecs/tlv320aic3x-spi.c4
-rw-r--r--sound/soc/codecs/wcd938x.c31
-rw-r--r--sound/soc/codecs/wm0010.c4
-rw-r--r--sound/soc/codecs/wm8804-spi.c3
-rw-r--r--sound/soc/codecs/wm_adsp.c3
-rw-r--r--sound/soc/fsl/pcm030-audio-fabric.c11
-rw-r--r--sound/soc/generic/simple-card.c26
-rw-r--r--sound/soc/intel/skylake/skl.c1
-rw-r--r--sound/soc/mediatek/Kconfig2
-rw-r--r--sound/soc/qcom/lpass-platform.c8
-rw-r--r--sound/soc/qcom/qdsp6/q6apm-dai.c7
-rw-r--r--sound/soc/soc-acpi.c7
-rw-r--r--sound/soc/soc-ops.c70
-rw-r--r--sound/soc/soc-pcm.c18
-rw-r--r--sound/soc/sof/intel/hda.c1
-rw-r--r--sound/soc/xilinx/xlnx_formatter_pcm.c27
-rw-r--r--sound/spi/at73c213.c4
-rw-r--r--sound/usb/implicit.c4
-rw-r--r--sound/usb/mixer.c13
-rw-r--r--sound/usb/quirks-table.h2
-rw-r--r--sound/x86/intel_hdmi_audio.c2
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h4
-rw-r--r--tools/arch/x86/include/asm/insn.h2
-rw-r--r--tools/arch/x86/include/asm/msr-index.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/lib/memcpy_64.S10
-rw-r--r--tools/arch/x86/lib/memset_64.S6
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--tools/bpf/resolve_btfids/Makefile6
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c2
-rw-r--r--tools/bpf/runqslower/runqslower.c2
-rw-r--r--tools/bpf/runqslower/runqslower.h2
-rw-r--r--tools/cgroup/memcg_slabinfo.py30
-rw-r--r--tools/include/uapi/linux/kvm.h8
-rw-r--r--tools/include/uapi/linux/lirc.h229
-rw-r--r--tools/include/uapi/linux/perf_event.h10
-rw-r--r--tools/include/uapi/linux/prctl.h3
-rw-r--r--tools/include/uapi/sound/asound.h11
-rw-r--r--tools/lib/perf/include/internal/cpumap.h6
-rw-r--r--tools/lib/perf/include/perf/cpumap.h5
-rw-r--r--tools/lib/perf/libperf.map1
-rw-r--r--tools/lib/perf/mmap.c98
-rw-r--r--tools/lib/perf/tests/test-cpumap.c11
-rw-r--r--tools/lib/perf/tests/test-evlist.c5
-rw-r--r--tools/lib/perf/tests/test-evsel.c5
-rw-r--r--tools/lib/subcmd/subcmd-util.h11
-rw-r--r--tools/memory-model/Documentation/explanation.txt51
-rw-r--r--tools/objtool/arch/x86/decode.c11
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-32.c918
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-64.c1426
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c1189
-rw-r--r--tools/perf/arch/x86/util/evlist.c2
-rw-r--r--tools/perf/bench/epoll-ctl.c2
-rw-r--r--tools/perf/builtin-ftrace.c45
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-trace.c23
-rw-r--r--tools/perf/tests/attr/README2
-rw-r--r--tools/perf/tests/attr/test-record-graph-default2
-rw-r--r--tools/perf/tests/attr/test-record-graph-default-aarch649
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp2
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp-aarch649
-rw-r--r--tools/perf/tests/sigtrap.c15
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh2
-rw-r--r--tools/perf/util/annotate.c1
-rw-r--r--tools/perf/util/bpf-loader.c3
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c2
-rw-r--r--tools/perf/util/branch.c4
-rw-r--r--tools/perf/util/cs-etm.c16
-rw-r--r--tools/perf/util/data.c7
-rw-r--r--tools/perf/util/evlist-hybrid.c4
-rw-r--r--tools/perf/util/evlist.c28
-rw-r--r--tools/perf/util/include/linux/linkage.h52
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/map_symbol.h1
-rw-r--r--tools/perf/util/parse-events.c8
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c2
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/sort.c4
-rw-r--r--tools/perf/util/stat-display.c19
-rw-r--r--tools/perf/util/symbol.c2
-rw-r--r--tools/perf/util/synthetic-events.c19
-rw-r--r--tools/power/cpupower/Makefile6
-rw-r--r--tools/power/cpupower/TODO (renamed from tools/power/cpupower/ToDo)0
-rw-r--r--tools/power/cpupower/lib/acpi_cppc.c59
-rw-r--r--tools/power/cpupower/lib/acpi_cppc.h21
-rw-r--r--tools/power/cpupower/lib/cpufreq.c23
-rw-r--r--tools/power/cpupower/lib/cpufreq.h12
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-info.13
-rw-r--r--tools/power/cpupower/man/cpupower-idle-set.12
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c87
-rw-r--r--tools/power/cpupower/utils/helpers/amd.c77
-rw-r--r--tools/power/cpupower/utils/helpers/cpuid.c13
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h22
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c60
-rwxr-xr-xtools/power/x86/amd_pstate_tracer/amd_pstate_trace.py354
-rw-r--r--tools/power/x86/intel-speed-select/Build2
-rw-r--r--tools/power/x86/intel-speed-select/Makefile10
-rw-r--r--tools/power/x86/intel-speed-select/hfi-events.c309
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c54
-rw-r--r--tools/power/x86/intel-speed-select/isst-daemon.c244
-rw-r--r--tools/power/x86/intel-speed-select/isst.h13
-rwxr-xr-xtools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py260
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rw-r--r--tools/scripts/Makefile.include2
-rw-r--r--tools/testing/kunit/kunit_kernel.py1
-rwxr-xr-xtools/testing/kunit/run_checks.py2
-rw-r--r--tools/testing/scatterlist/linux/mm.h3
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c1
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c15
-rw-r--r--tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c11
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c193
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h1
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c34
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h26
-rw-r--r--tools/testing/selftests/bpf/progs/timer_crash.c54
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c1
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c15
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h1
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c78
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh2
-rw-r--r--tools/testing/selftests/damon/Makefile1
-rw-r--r--tools/testing/selftests/damon/sysfs.sh306
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh3
-rw-r--r--tools/testing/selftests/exec/Makefile7
-rw-r--r--tools/testing/selftests/exec/null-argv.c78
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc2
-rw-r--r--tools/testing/selftests/futex/Makefile4
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c10
-rw-r--r--tools/testing/selftests/kselftest_harness.h4
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c7
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c4
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h1
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c4
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c27
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c1
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c1
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c20
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c4
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c5
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh4
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh38
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh21
-rw-r--r--tools/testing/selftests/netfilter/.gitignore1
-rw-r--r--tools/testing/selftests/netfilter/Makefile4
-rw-r--r--tools/testing/selftests/netfilter/connect_close.c136
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh74
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh1
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh157
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh19
-rwxr-xr-xtools/testing/selftests/netfilter/nft_synproxy.sh117
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh12
-rw-r--r--tools/testing/selftests/openat2/Makefile2
-rw-r--r--tools/testing/selftests/openat2/helpers.h12
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c12
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c17
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h13
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c22
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c6
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-check-branches.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh25
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh16
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh38
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE012
-rw-r--r--tools/testing/selftests/rseq/Makefile2
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c2
-rw-r--r--tools/testing/selftests/rseq/compiler.h30
-rw-r--r--tools/testing/selftests/rseq/param_test.c8
-rw-r--r--tools/testing/selftests/rseq/rseq-abi.h151
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h110
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64.h79
-rw-r--r--tools/testing/selftests/rseq/rseq-generic-thread-pointer.h25
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h71
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h30
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h128
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h55
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h2
-rw-r--r--tools/testing/selftests/rseq/rseq-thread-pointer.h19
-rw-r--r--tools/testing/selftests/rseq/rseq-x86-thread-pointer.h40
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h200
-rw-r--r--tools/testing/selftests/rseq/rseq.c164
-rw-r--r--tools/testing/selftests/rseq/rseq.h30
-rw-r--r--tools/testing/selftests/rtc/settings2
-rw-r--r--tools/testing/selftests/seccomp/Makefile2
-rw-r--r--tools/testing/selftests/sgx/Makefile2
-rw-r--r--tools/testing/selftests/sgx/load.c9
-rw-r--r--tools/testing/selftests/sgx/main.c9
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py31
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py45
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c135
-rw-r--r--tools/testing/selftests/vm/.gitignore1
-rw-r--r--tools/testing/selftests/vm/Makefile13
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c26
-rw-r--r--tools/testing/selftests/vm/hugepage-vmemmap.c144
-rw-r--r--tools/testing/selftests/vm/map_fixed_noreplace.c49
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh14
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c35
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c12
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rwxr-xr-xtools/testing/selftests/zram/zram.sh15
-rwxr-xr-xtools/testing/selftests/zram/zram01.sh33
-rwxr-xr-xtools/testing/selftests/zram/zram02.sh1
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh134
-rw-r--r--tools/tracing/rtla/src/osnoise.c85
-rw-r--r--tools/tracing/rtla/src/osnoise.h8
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c99
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c92
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c112
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c103
-rw-r--r--tools/tracing/rtla/src/trace.c345
-rw-r--r--tools/tracing/rtla/src/trace.h23
-rw-r--r--tools/tracing/rtla/src/utils.c33
-rw-r--r--tools/tracing/rtla/src/utils.h1
-rw-r--r--tools/virtio/linux/mm_types.h3
-rw-r--r--tools/virtio/virtio_test.c1
-rw-r--r--usr/include/Makefile1
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/kvm_main.c12
3744 files changed, 87489 insertions, 39941 deletions
diff --git a/.mailmap b/.mailmap
index b157f88ce26a..97ccdf147111 100644
--- a/.mailmap
+++ b/.mailmap
@@ -70,6 +70,7 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
+Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
@@ -79,6 +80,9 @@ Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
+Christian Brauner <brauner@kernel.org> <christian@brauner.io>
+Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
+Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
@@ -183,6 +187,8 @@ Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
+Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
+Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
@@ -212,6 +218,7 @@ Koushik <raghavendra.koushik@neterion.com>
Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
+Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
@@ -329,6 +336,9 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
+Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
+Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
Rudolf Marek <R.Marek@sh.cvut.cz>
Rui Saraiva <rmps@joel.ist.utl.pt>
diff --git a/CREDITS b/CREDITS
index b97256d5bc24..7e85a53b6a88 100644
--- a/CREDITS
+++ b/CREDITS
@@ -895,6 +895,12 @@ S: 3000 FORE Drive
S: Warrendale, Pennsylvania 15086
S: USA
+N: Ludovic Desroches
+E: ludovic.desroches@microchip.com
+D: Maintainer for ARM/Microchip (AT91) SoC support
+D: Author of ADC, pinctrl, XDMA and SDHCI drivers for this platform
+S: France
+
N: Martin Devera
E: devik@cdi.cz
W: http://luxik.cdi.cz/~devik/qos/
diff --git a/Documentation/ABI/obsolete/procfs-i8k b/Documentation/ABI/obsolete/procfs-i8k
new file mode 100644
index 000000000000..32df4d5bdd15
--- /dev/null
+++ b/Documentation/ABI/obsolete/procfs-i8k
@@ -0,0 +1,10 @@
+What: /proc/i8k
+Date: November 2001
+KernelVersion: 2.4.14
+Contact: Pali Rohár <pali@kernel.org>
+Description: Legacy interface for getting/setting sensor information like
+ fan speed, temperature, serial number, hotkey status etc
+ on Dell Laptops.
+ Since the driver is now using the standard hwmon sysfs interface,
+ the procfs interface is deprecated.
+Users: https://github.com/vitorafsr/i8kutils
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 8dd3e84a8aad..e8797cd09aff 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -155,6 +155,55 @@ Description:
last zone of the device which may be smaller.
+What: /sys/block/<disk>/queue/crypto/
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ The presence of this subdirectory of /sys/block/<disk>/queue/
+ indicates that the device supports inline encryption. This
+ subdirectory contains files which describe the inline encryption
+ capabilities of the device. For more information about inline
+ encryption, refer to Documentation/block/inline-encryption.rst.
+
+
+What: /sys/block/<disk>/queue/crypto/max_dun_bits
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file shows the maximum length, in bits, of data unit
+ numbers accepted by the device in inline encryption requests.
+
+
+What: /sys/block/<disk>/queue/crypto/modes/<mode>
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] For each crypto mode (i.e., encryption/decryption
+ algorithm) the device supports with inline encryption, a file
+ will exist at this location. It will contain a hexadecimal
+ number that is a bitmask of the supported data unit sizes, in
+ bytes, for that crypto mode.
+
+ Currently, the crypto modes that may be supported are:
+
+ * AES-256-XTS
+ * AES-128-CBC-ESSIV
+ * Adiantum
+
+ For example, if a device supports AES-256-XTS inline encryption
+ with data unit sizes of 512 and 4096 bytes, the file
+ /sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
+ will contain "0x1200".
+
+
+What: /sys/block/<disk>/queue/crypto/num_keyslots
+Date: February 2022
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] This file shows the number of keyslots the device has for
+ use with inline encryption.
+
+
What: /sys/block/<disk>/queue/dax
Date: June 2016
Contact: linux-block@vger.kernel.org
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 3965ce504484..902392d7eddf 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus
Description: internal kernel map of CPUs within the same die.
Values: hexadecimal bitmask.
+What: /sys/devices/system/cpu/cpuX/topology/ppin
+Description: per-socket protected processor inventory number
+Values: hexadecimal.
+
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
Description: human-readable list of CPUs within the same die.
The format is like 0-3, 8-11, 14,17.
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index b4be5f1db4b7..396de7bc735d 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -1,140 +1,150 @@
-What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Dump debug registers from the HPRE cluster.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the HPRE cluster.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Write the HPRE core selection in the cluster into this file,
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Write the HPRE core selection in the cluster into this file,
and then we can read the debug information of the core.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: HPRE cores debug registers read clear control. 1 means enable
+What: /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: HPRE cores debug registers read clear control. 1 means enable
register read clear, otherwise 0. Writing to this file has no
functional effect, only enable or disable counters clear after
reading of these registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: One HPRE controller has one PF and multiple VFs, each function
+What: /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One HPRE controller has one PF and multiple VFs, each function
has a QM. Select the QM which below qm refers to.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Dump debug registers from the HPRE.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ HPRE driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the HPRE.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Dump debug registers from the QM.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump debug registers from the QM.
Available for PF and VF in host. VF in guest currently only
has one debug register.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: One QM may contain multiple queues. Select specific queue to
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One QM may contain multiple queues. Select specific queue to
show its debug registers in above regs.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
-Date: Sep 2019
-Contact: linux-crypto@vger.kernel.org
-Description: QM debug registers(regs) read clear control. 1 means enable
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
+Date: Sep 2019
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read clear control. 1 means enable
register read clear, otherwise 0.
Writing to this file has no functional effect, only enable or
disable counters clear after reading of these registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of invalid interrupts for
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
QM task completion.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of QM async event queue interrupts.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of interrupts for QM abnormal event.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of queue allocation errors.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of failed QM mailbox commands.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the status of the QM.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of sent requests.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of received requests.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of requests sent
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests sent
with returning busy.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of completed but error requests.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of completed but error requests.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of invalid requests being received.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of invalid requests being received.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Set the threshold time for counting the request which is
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Set the threshold time for counting the request which is
processed longer than the threshold.
0: disable(default), 1: 1 microsecond.
Available for both PF and VF, and take no other effect on HPRE.
-What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of time out requests.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of time out requests.
Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 85feb4408e0f..2bf84ced484b 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -1,113 +1,123 @@
-What: /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
-Date: Oct 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Enabling/disabling of clear action after reading
+What: /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Enabling/disabling of clear action after reading
the SEC debug registers.
0: disable, 1: enable.
Only available for PF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
-Date: Oct 2019
-Contact: linux-crypto@vger.kernel.org
-Description: One SEC controller has one PF and multiple VFs, each function
+What: /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One SEC controller has one PF and multiple VFs, each function
has a QM. This file can be used to select the QM which below
qm refers to.
Only available for PF.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
-Date: Oct 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Dump of QM related debug registers.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ SEC driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of QM related debug registers.
Available for PF and VF in host. VF in guest currently only
has one debug register.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
-Date: Oct 2019
-Contact: linux-crypto@vger.kernel.org
-Description: One QM of SEC may contain multiple queues. Select specific
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: One QM of SEC may contain multiple queues. Select specific
queue to show its debug registers in above 'regs'.
Only available for PF.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
-Date: Oct 2019
-Contact: linux-crypto@vger.kernel.org
-Description: Enabling/disabling of clear action after reading
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
+Date: Oct 2019
+Contact: linux-crypto@vger.kernel.org
+Description: Enabling/disabling of clear action after reading
the SEC's QM debug registers.
0: disable, 1: enable.
Only available for PF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of invalid interrupts for
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
QM task completion.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of QM async event queue interrupts.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of interrupts for QM abnormal event.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of queue allocation errors.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of failed QM mailbox commands.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the status of the QM.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of sent requests.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of received requests.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of requests sent with returning busy.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests sent with returning busy.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of BD type error requests
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of BD type error requests
to be received.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of invalid requests being received.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of invalid requests being received.
Available for both PF and VF, and take no other effect on SEC.
-What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of completed but marked error requests
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of completed but marked error requests
to be received.
Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 3034a2bf99ca..bf1258bc6495 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -1,114 +1,124 @@
-What: /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: Dump of compression cores related debug registers.
+What: /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of compression cores related debug registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: Dump of decompression cores related debug registers.
+What: /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of decompression cores related debug registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: Compression/decompression core debug registers read clear
+What: /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Compression/decompression core debug registers read clear
control. 1 means enable register read clear, otherwise 0.
Writing to this file has no functional effect, only enable or
disable counters clear after reading of these registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/current_qm
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: One ZIP controller has one PF and multiple VFs, each function
+What: /sys/kernel/debug/hisi_zip/<bdf>/current_qm
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: One ZIP controller has one PF and multiple VFs, each function
has a QM. Select the QM which below qm refers to.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: Dump of QM related debug registers.
+What: /sys/kernel/debug/hisi_zip/<bdf>/alg_qos
+Date: Jun 2021
+Contact: linux-crypto@vger.kernel.org
+Description: The <bdf> is related the function for PF and VF.
+ ZIP driver supports to configure each function's QoS, the driver
+ supports to write <bdf> value to alg_qos in the host. Such as
+ "echo <bdf> value > alg_qos". The qos value is 1~1000, means
+ 1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+ get related QoS in the host and VM, Such as "cat alg_qos".
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: Dump of QM related debug registers.
Available for PF and VF in host. VF in guest currently only
has one debug register.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: One QM may contain multiple queues. Select specific queue to
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: One QM may contain multiple queues. Select specific queue to
show its debug registers in above regs.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
-Date: Nov 2018
-Contact: linux-crypto@vger.kernel.org
-Description: QM debug registers(regs) read clear control. 1 means enable
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
+Date: Nov 2018
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read clear control. 1 means enable
register read clear, otherwise 0.
Writing to this file has no functional effect, only enable or
disable counters clear after reading of these registers.
Only available for PF.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of invalid interrupts for
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of invalid interrupts for
QM task completion.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of QM async event queue interrupts.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of QM async event queue interrupts.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of interrupts for QM abnormal event.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of interrupts for QM abnormal event.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of queue allocation errors.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of queue allocation errors.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the number of failed QM mailbox commands.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the number of failed QM mailbox commands.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/qm/status
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the status of the QM.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/status
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of sent requests.
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of sent requests.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of received requests.
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of received requests.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of requests received
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of requests received
with returning busy.
Available for both PF and VF, and take no other effect on ZIP.
-What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
-Date: Apr 2020
-Contact: linux-crypto@vger.kernel.org
-Description: Dump the total number of BD type error requests
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
+Date: Apr 2020
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the total number of BD type error requests
to be received.
Available for both PF and VF, and take no other effect on ZIP.
diff --git a/Documentation/ABI/testing/sysfs-class-hwmon b/Documentation/ABI/testing/sysfs-class-hwmon
index 1f20687def44..653d4c75eddb 100644
--- a/Documentation/ABI/testing/sysfs-class-hwmon
+++ b/Documentation/ABI/testing/sysfs-class-hwmon
@@ -9,6 +9,14 @@ Description:
RO
+What: /sys/class/hwmon/hwmonX/label
+Description:
+ A descriptive label that allows to uniquely identify a
+ device within the system.
+ The contents of the label are free-form.
+
+ RO
+
What: /sys/class/hwmon/hwmonX/update_interval
Description:
The interval at which the chip will update readings.
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index fde21d900420..859501366777 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -468,6 +468,7 @@ Description:
auto: Charge normally, respect thresholds
inhibit-charge: Do not charge while AC is attached
force-discharge: Force discharge while AC is attached
+ ================ ====================================
What: /sys/class/power_supply/<supply_name>/technology
Date: May 2007
diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal
index 2c52bb1f864c..8eee37982b2a 100644
--- a/Documentation/ABI/testing/sysfs-class-thermal
+++ b/Documentation/ABI/testing/sysfs-class-thermal
@@ -203,7 +203,7 @@ Description:
- for generic ACPI: should be "Fan", "Processor" or "LCD"
- for memory controller device on intel_menlow platform:
- should be "Memory controller".
+ should be "Memory controller".
RO, Required
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 61f5676a7429..2ad01cad7f1c 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id
/sys/devices/system/cpu/cpuX/topology/physical_package_id
/sys/devices/system/cpu/cpuX/topology/thread_siblings
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
+ /sys/devices/system/cpu/cpuX/topology/ppin
Date: December 2008
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: CPU topology files that describe a logical CPU's relationship
@@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship
thread_siblings_list: human-readable list of cpuX's hardware
threads within the same core as cpuX
+ ppin: human-readable Protected Processor Identification
+ Number of the socket the cpu# belongs to. There should be
+ one per physical_package_id. File is readable only to
+ admin.
+
See Documentation/admin-guide/cputopology.rst for more information.
@@ -662,6 +668,7 @@ Description: Preferred MTE tag checking mode
================ ==============================================
"sync" Prefer synchronous mode
+ "asymm" Prefer asymmetric mode
"async" Prefer asynchronous mode
================ ==============================================
diff --git a/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing
index b363827da437..910df0e5815a 100644
--- a/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing
+++ b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing
@@ -1,4 +1,4 @@
-What: /sys/bus/platform/drivers/aspeed-uart-routing/*/uart*
+What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/uart\*
Date: September 2021
Contact: Oskar Senft <osk@google.com>
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
@@ -9,7 +9,7 @@ Description: Selects the RX source of the UARTx device.
depends on the selected file.
e.g.
- cat /sys/bus/platform/drivers/aspeed-uart-routing/*.uart_routing/uart1
+ cat /sys/bus/platform/drivers/aspeed-uart-routing/\*.uart_routing/uart1
[io1] io2 io3 io4 uart2 uart3 uart4 io6
In this case, UART1 gets its input from IO1 (physical serial port 1).
@@ -17,7 +17,7 @@ Description: Selects the RX source of the UARTx device.
Users: OpenBMC. Proposed changes should be mailed to
openbmc@lists.ozlabs.org
-What: /sys/bus/platform/drivers/aspeed-uart-routing/*/io*
+What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/io\*
Date: September 2021
Contact: Oskar Senft <osk@google.com>
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 2416b03ff283..9b583dd0298b 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
0x04 F2FS_IPU_UTIL
0x08 F2FS_IPU_SSR_UTIL
0x10 F2FS_IPU_FSYNC
- 0x20 F2FS_IPU_ASYNC,
+ 0x20 F2FS_IPU_ASYNC
0x40 F2FS_IPU_NOCACHE
+ 0x80 F2FS_IPU_HONOR_OPU_WRITE
==== =================
Refer segment.h for details.
@@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small
checkpoint is triggered, and issued during the checkpoint.
By default, it is disabled with 0.
+What: /sys/fs/f2fs/<disk>/max_discard_request
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the number of discards a thread will issue at a time.
+ Higher number will allow the discard thread to finish its work
+ faster, at the cost of higher latency for incomming I/O.
+
+What: /sys/fs/f2fs/<disk>/min_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait between
+ issuing discard requests when there are discards to be issued and
+ no I/O aware interruptions occur.
+
+What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait between
+ issuing discard requests when there are discards to be issued and
+ an I/O aware interruption occurs.
+
+What: /sys/fs/f2fs/<disk>/max_discard_issue_time
+Date: December 2021
+Contact: "Konstantin Vyshetsky" <vkon@google.com>
+Description: Controls the interval the discard thread will wait when there are
+ no discard operations to be issued.
+
What: /sys/fs/f2fs/<disk>/discard_granularity
Date: July 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
@@ -269,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description: Do background GC aggressively when set. When gc_urgent = 1,
- background thread starts to do GC by given gc_urgent_sleep_time
- interval. When gc_urgent = 2, F2FS will lower the bar of
- checking idle in order to process outstanding discard commands
- and GC a little bit aggressively. It is set to 0 by default.
+Description: Do background GC aggressively when set. Set to 0 by default.
+ gc urgent high(1): does GC forcibly in a period of given
+ gc_urgent_sleep_time and ignores I/O idling check. uses greedy
+ GC approach and turns SSR mode on.
+ gc urgent low(2): lowers the bar of checking I/O idling in
+ order to process outstanding discard commands and GC a
+ little bit aggressively. uses cost benefit GC approach.
+ gc urgent mid(3): does GC forcibly in a period of given
+ gc_urgent_sleep_time and executes a mid level of I/O idling check.
+ uses cost benefit GC approach.
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
@@ -430,6 +463,7 @@ Description: Show status of f2fs superblock in real time.
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
0x2000 SBI_IS_RESIZEFS resizefs is in process
+ 0x4000 SBI_IS_FREEZING freefs is in process
====== ===================== =================================
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
@@ -503,7 +537,7 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show how many segments have been reclaimed by GC during a specific
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
- 3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
+ 3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
You can re-initialize this value to "0".
What: /sys/fs/f2fs/<disk>/gc_segment_mode
@@ -540,3 +574,9 @@ Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can set the trial count limit for GC urgent high mode with this value.
If GC thread gets to the limit, the mode will turn back to GC normal mode.
By default, the value is zero, which means there is no limit like before.
+
+What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
+Date: January 2022
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Controls max # of node block writes to be used for roll forward
+ recovery. This can limit the roll forward recovery time.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
new file mode 100644
index 000000000000..9e282065cbcf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -0,0 +1,274 @@
+what: /sys/kernel/mm/damon/
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Interface for Data Access MONitoring (DAMON). Contains files
+ for controlling DAMON. For more details on DAMON itself,
+ please refer to Documentation/admin-guide/mm/damon/index.rst.
+
+What: /sys/kernel/mm/damon/admin/
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Interface for privileged users of DAMON. Contains files for
+ controlling DAMON that aimed to be used by privileged users.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON worker thread (kdamond)
+ named '0' to 'N-1' under the kdamonds/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing 'on' or 'off' to this file makes the kdamond starts or
+ stops, respectively. Reading the file returns the keywords
+ based on the current status. Writing 'update_schemes_stats' to
+ the file updates contents of schemes stats files of the
+ kdamond.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the pid of the kdamond if it is
+ running.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON context named '0' to
+ 'N-1' under the contexts/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a keyword for a monitoring operations set ('vaddr' for
+ virtual address spaces monitoring, and 'paddr' for the physical
+ address space monitoring) to this file makes the context to use
+ the operations set. Reading the file returns the keyword for
+ the operations set the context is set to use.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the sampling interval of the
+ DAMON context in microseconds as the value. Reading this file
+ returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the aggregation interval of
+ the DAMON context in microseconds as the value. Reading this
+ file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the update interval of the
+ DAMON context in microseconds as the value. Reading this file
+ returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
+
+WDate: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the minimum number of
+ monitoring regions of the DAMON context as the value. Reading
+ this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the maximum number of
+ monitoring regions of the DAMON context as the value. Reading
+ this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON target of the context
+ named '0' to 'N-1' under the contexts/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the pid of
+ the target process if the context is for virtual address spaces
+ monitoring, respectively.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting each DAMON target memory region of the
+ context named '0' to 'N-1' under the regions/ directory. In
+ case of the virtual address space monitoring, DAMON
+ automatically sets the target memory region based on the target
+ processes' mappings.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the start
+ address of the monitoring region.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the end
+ address of the monitoring region.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for controlling each DAMON-based operation scheme
+ of the context named '0' to 'N-1' under the schemes/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the action
+ of the scheme.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the mimimum
+ size of the scheme's target regions in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ size of the scheme's target regions in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the manimum
+ 'nr_accesses' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ 'nr_accesses' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the minimum
+ 'age' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the maximum
+ 'age' of the scheme's target regions.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the time
+ quota of the scheme in milliseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the size
+ quota of the scheme in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the quotas
+ charge reset interval of the scheme in milliseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for 'size' in
+ permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for
+ 'nr_accesses' in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ under-quota limit regions prioritization weight for 'age' in
+ permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the metric
+ of the watermarks for the scheme. The writable/readable
+ keywords for this file are 'none' for disabling the watermarks
+ feature, or 'free_mem_rate' for the system's global free memory
+ rate in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the metric
+ check interval of the watermarks for the scheme in
+ microseconds.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the high
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the mid
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the low
+ watermark of the scheme in permil.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of regions that the action
+ of the scheme has tried to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of regions that the
+ action of the scheme has tried to be applied in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of regions that the action
+ of the scheme has successfully applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of regions that the
+ action of the scheme has successfully applied in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
+Date: Mar 2022
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the number of the exceed events of
+ the scheme's quotas.
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9f4bd42cef18..64d44c1ecad3 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -26,7 +26,7 @@ SPHINX_CONF = conf.py
PAPER =
BUILDDIR = $(obj)/output
PDFLATEX = xelatex
-LATEXOPTS = -interaction=batchmode
+LATEXOPTS = -interaction=batchmode -no-shell-escape
ifeq ($(KBUILD_VERBOSE),0)
SPHINXOPTS += "-q"
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index f2b3439edcc2..860fe651d645 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
for the same psi metric can be specified. However for each trigger a separate
file descriptor is required to be able to poll it separately from others,
therefore for each trigger a separate open() syscall should be made even
-when opening the same psi interface file.
+when opening the same psi interface file. Write operations to a file descriptor
+with an already existing psi trigger will fail with EBUSY.
Monitors activate only when system enters stall state for the monitored
psi metric and deactivates upon exit from the stall state. While system is
diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
index 98fe5c333121..b9e4b4d146c1 100644
--- a/Documentation/admin-guide/acpi/fan_performance_states.rst
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -60,3 +60,31 @@ For example::
When a given field is not populated or its value provided by the platform
firmware is invalid, the "not-defined" string is shown instead of the value.
+
+ACPI Fan Fine Grain Control
+=============================
+
+When _FIF object specifies support for fine grain control, then fan speed
+can be set from 0 to 100% with the recommended minimum "step size" via
+_FSL object. User can adjust fan speed using thermal sysfs cooling device.
+
+Here use can look at fan performance states for a reference speed (speed_rpm)
+and set it by changing cooling device cur_state. If the fine grain control
+is supported then user can also adjust to some other speeds which are
+not defined in the performance states.
+
+The support of fine grain control is presented via sysfs attribute
+"fine_grain_control". If fine grain control is present, this attribute
+will show "1" otherwise "0".
+
+This sysfs attribute is presented in the same directory as performance states.
+
+ACPI Fan Performance Feedback
+=============================
+
+The optional _FST object provides status information for the fan device.
+This includes field to provide current fan speed in revolutions per minute
+at which the fan is rotating.
+
+This speed is presented in the sysfs using the attribute "fan_speed_rpm",
+in the same directory as performance states.
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 3e11926a4df9..54fe63745ed8 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
echo /dev/sda5 > /sys/block/zramX/backing_dev
-before disksize setting. It supports only partition at this moment.
-If admin wants to use incompressible page writeback, they could do via::
+before disksize setting. It supports only partitions at this moment.
+If admin wants to use incompressible page writeback, they could do it via::
echo huge > /sys/block/zramX/writeback
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
echo idle > /sys/block/zramX/writeback
-With the command, zram writeback idle pages from memory to the storage.
+With the command, zram will writeback idle pages from memory to the storage.
-If admin want to write a specific page in zram device to backing device,
+If an admin wants to write a specific page in zram device to the backing device,
they could write a page index into the interface.
echo "page_index=1251" > /sys/block/zramX/writeback
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
To overcome the concern, zram supports "writeback_limit" feature.
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin wants to apply writeback budget, he should
+any writeback. IOW, if admin wants to apply writeback budget, they should
enable writeback_limit_enable via::
$ echo 1 > /sys/block/zramX/writeback_limit_enable
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
assigned via /sys/block/zramX/writeback_limit is meaningless.)
-If admin want to limit writeback as per-day 400M, he could do it
+If admin wants to limit writeback as per-day 400M, they could do it
like below::
$ MB_SHIFT=20
@@ -375,16 +375,16 @@ like below::
$ echo 1 > /sys/block/zram0/writeback_limit_enable
If admins want to allow further write again once the budget is exhausted,
-he could do it like below::
+they could do it like below::
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
/sys/block/zram0/writeback_limit
-If admin wants to see remaining writeback budget since last set::
+If an admin wants to see the remaining writeback budget since last set::
$ cat /sys/block/zramX/writeback_limit
-If admin want to disable writeback limit, he could do::
+If an admin wants to disable writeback limit, they could do::
$ echo 0 > /sys/block/zramX/writeback_limit_enable
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
writeback happened until you reset the zram to allocate extra writeback
budget in next setting is user's job.
-If admin wants to measure writeback count in a certain period, he could
+If admin wants to measure writeback count in a certain period, they could
know it via /sys/block/zram0/bd_stat's 3rd column.
memory tracking
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index faac50149a22..2cc502a75ef6 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -64,6 +64,7 @@ Brief summary of control files.
threads
cgroup.procs show list of processes
cgroup.event_control an interface for event_fd()
+ This knob is not available on CONFIG_PREEMPT_RT systems.
memory.usage_in_bytes show current usage for memory
(See 5.5 for details)
memory.memsw.usage_in_bytes show current usage for memory+Swap
@@ -75,6 +76,7 @@ Brief summary of control files.
memory.max_usage_in_bytes show max memory usage recorded
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
memory.soft_limit_in_bytes set/show soft limit of memory usage
+ This knob is not available on CONFIG_PREEMPT_RT systems.
memory.stat show various statistics
memory.use_hierarchy set/show hierarchical account enabled
This knob is deprecated and shouldn't be
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 5aa368d165da..69d7a6983f78 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
Amount of memory used to cache filesystem data,
including tmpfs and shared memory.
+ kernel (npn)
+ Amount of total kernel memory, including
+ (kernel_stack, pagetables, percpu, vmalloc, slab) in
+ addition to other kernel memory use cases.
+
kernel_stack
Amount of memory allocated to kernel stacks.
diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
index 7db367572f30..f6861ca16ffe 100644
--- a/Documentation/admin-guide/gpio/index.rst
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -10,6 +10,7 @@ gpio
gpio-aggregator
sysfs
gpio-mockup
+ gpio-sim
.. only:: subproject and html
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index a2b22d5640ec..9e9556826450 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
Spectre variant 1 attacks take advantage of speculative execution of
conditional branches, while Spectre variant 2 attacks use speculative
execution of indirect branches to leak privileged memory.
-See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
-:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
+:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
Spectre variant 1 (Bounds Check Bypass)
---------------------------------------
@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
speculative execution's side effects left in level 1 cache to infer the
victim's data.
+Yet another variant 2 attack vector is for the attacker to poison the
+Branch History Buffer (BHB) to speculatively steer an indirect branch
+to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
+associated with the source address of the indirect branch. Specifically,
+the BHB might be shared across privilege levels even in the presence of
+Enhanced IBRS.
+
+Currently the only known real-world BHB attack vector is via
+unprivileged eBPF. Therefore, it's highly recommended to not enable
+unprivileged eBPF, especially when eIBRS is used (without retpolines).
+For a full mitigation against BHB attacks, it's recommended to use
+retpolines (or eIBRS combined with retpolines).
+
Attack scenarios
----------------
@@ -364,13 +377,15 @@ The possible values in this file are:
- Kernel status:
- ==================================== =================================
- 'Not affected' The processor is not vulnerable
- 'Vulnerable' Vulnerable, no mitigation
- 'Mitigation: Full generic retpoline' Software-focused mitigation
- 'Mitigation: Full AMD retpoline' AMD-specific software mitigation
- 'Mitigation: Enhanced IBRS' Hardware-focused mitigation
- ==================================== =================================
+ ======================================== =================================
+ 'Not affected' The processor is not vulnerable
+ 'Mitigation: None' Vulnerable, no mitigation
+ 'Mitigation: Retpolines' Use Retpoline thunks
+ 'Mitigation: LFENCE' Use LFENCE instructions
+ 'Mitigation: Enhanced IBRS' Hardware-focused mitigation
+ 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines
+ 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE
+ ======================================== =================================
- Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
@@ -583,12 +598,13 @@ kernel command line.
Specific mitigations can also be selected manually:
- retpoline
- replace indirect branches
- retpoline,generic
- google's original retpoline
- retpoline,amd
- AMD-specific minimal thunk
+ retpoline auto pick between generic,lfence
+ retpoline,generic Retpolines
+ retpoline,lfence LFENCE; indirect branch
+ retpoline,amd alias for retpoline,lfence
+ eibrs enhanced IBRS
+ eibrs,retpoline enhanced IBRS + Retpolines
+ eibrs,lfence enhanced IBRS + LFENCE
Not specifying this option is equivalent to
spectre_v2=auto.
@@ -599,7 +615,7 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
-For spectre_v2_user see :doc:`/admin-guide/kernel-parameters`.
+For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
--------------------------
@@ -681,7 +697,7 @@ AMD white papers:
.. _spec_ref6:
-[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
+[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
ARM white papers:
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 1bedab498104..5bfafcbb9562 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -35,6 +35,7 @@ problems and bugs in particular.
:maxdepth: 1
reporting-issues
+ reporting-regressions
security-bugs
bug-hunting
bug-bisect
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 9b14b0c2c9c4..609a3201fd4e 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long)
Field 4 -- # of milliseconds spent reading (unsigned int)
This is the total number of milliseconds spent by all reads (as
- measured from __make_request() to end_that_request_last()).
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
Field 5 -- # of writes completed (unsigned long)
This is the total number of writes completed successfully.
@@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long)
Field 8 -- # of milliseconds spent writing (unsigned int)
This is the total number of milliseconds spent by all writes (as
- measured from __make_request() to end_that_request_last()).
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
Field 9 -- # of I/Os currently in progress (unsigned int)
The only field that should go to zero. Incremented as requests are
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
Field 15 -- # of milliseconds spent discarding (unsigned int)
This is the total number of milliseconds spent by all discards (as
- measured from __make_request() to end_that_request_last()).
+ measured from blk_mq_alloc_request() to __blk_mq_end_request()).
Field 16 -- # of flush requests completed
This is the total number of flush requests completed successfully.
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 3861a25faae1..8419019b6a88 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
addresses in the higher VA range (refer to ARMv8 ARM document for
more details).
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
+-----------------------------------------------------------------------------
+
+Used to get the correct ranges:
+ MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
+ VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
+ VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
+
arm
===
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f6b7ee64ace8..c658e5d2d52c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -724,6 +724,12 @@
hvc<n> Use the hypervisor console device <n>. This is for
both Xen and PowerPC hypervisors.
+ { null | "" }
+ Use to disable console output, i.e., to have kernel
+ console messages discarded.
+ This must be the only console= parameter used on the
+ kernel command line.
+
If the device connected to the port is not a TTY but a braille
device, prepend "brl," before the device type, for instance
console=brl,ttyS0
@@ -944,6 +950,30 @@
dump out devices still on the deferred probe list after
retrying.
+ dell_smm_hwmon.ignore_dmi=
+ [HW] Continue probing hardware even if DMI data
+ indicates that the driver is running on unsupported
+ hardware.
+
+ dell_smm_hwmon.force=
+ [HW] Activate driver even if SMM BIOS signature does
+ not match list of supported models and enable otherwise
+ blacklisted features.
+
+ dell_smm_hwmon.power_status=
+ [HW] Report power status in /proc/i8k
+ (disabled by default).
+
+ dell_smm_hwmon.restricted=
+ [HW] Allow controlling fans only if SYS_ADMIN
+ capability is set.
+
+ dell_smm_hwmon.fan_mult=
+ [HW] Factor to multiply fan speed with.
+
+ dell_smm_hwmon.fan_max=
+ [HW] Maximum configurable fan speed.
+
dfltcc= [HW,S390]
Format: { on | off | def_only | inf_only | always }
on: s390 zlib hardware support for compression on
@@ -1633,7 +1663,7 @@
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
enabled.
Allows heavy hugetlb users to free up some more
- memory (6 * PAGE_SIZE for each 2MB hugetlb page).
+ memory (7 * PAGE_SIZE for each 2MB hugetlb page).
Format: { on | off (default) }
on: enable the feature
@@ -1711,17 +1741,6 @@
i810= [HW,DRM]
- i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
- indicates that the driver is running on unsupported
- hardware.
- i8k.force [HW] Activate i8k driver even if SMM BIOS signature
- does not match list of supported models.
- i8k.power_status
- [HW] Report power status in /proc/i8k
- (disabled by default)
- i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
- capability is set.
-
i915.invert_brightness=
[DRM] Invert the sense of the variable that is used to
set the brightness of the panel backlight. Normally a
@@ -2835,6 +2854,9 @@
For details see: Documentation/admin-guide/hw-vuln/mds.rst
+ mem=nn[KMG] [HEXAGON] Set the memory size.
+ Must be specified, otherwise memory size will be 0.
+
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used in cases as follows:
@@ -2842,6 +2864,13 @@
2 when the kernel is not able to see the whole system memory;
3 memory that lies after 'mem=' boundary is excluded from
the hypervisor, then assigned to KVM guests.
+ 4 to limit the memory available for kdump kernel.
+
+ [ARC,MICROBLAZE] - the limit applies only to low memory,
+ high memory is not affected.
+
+ [ARM64] - only limits memory covered by the linear
+ mapping. The NOMAP regions are not affected.
[X86] Work as limiting max address. Use together
with memmap= to avoid physical address space collisions.
@@ -2852,6 +2881,14 @@
in above case 3, memory may need be hot added after boot
if system memory of hypervisor is not sufficient.
+ mem=nn[KMG]@ss[KMG]
+ [ARM,MIPS] - override the memory layout reported by
+ firmware.
+ Define a memory region of size nn[KMG] starting at
+ ss[KMG].
+ Multiple different regions can be specified with
+ multiple mem= parameters on the command line.
+
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
memory.
@@ -3493,8 +3530,7 @@
difficult since unequal pointers can no longer be
compared. However, if this command-line option is
specified, then all normal pointers will have their true
- value printed. Pointers printed via %pK may still be
- hashed. This option should only be specified when
+ value printed. This option should only be specified when
debugging the kernel. Please do not use on production
kernels.
@@ -4512,6 +4548,8 @@
(the least-favored priority). Otherwise, when
RCU_BOOST is not set, valid values are 0-99 and
the default is zero (non-realtime operation).
+ When RCU_NOCB_CPU is set, also adjust the
+ priority of NOCB callback kthreads.
rcutree.rcu_nocb_gp_stride= [KNL]
Set the number of NOCB callback kthreads in
@@ -5369,8 +5407,12 @@
Specific mitigations can also be selected manually:
retpoline - replace indirect branches
- retpoline,generic - google's original retpoline
- retpoline,amd - AMD-specific minimal thunk
+ retpoline,generic - Retpolines
+ retpoline,lfence - LFENCE; indirect branch
+ retpoline,amd - alias for retpoline,lfence
+ eibrs - enhanced IBRS
+ eibrs,retpoline - enhanced IBRS + Retpolines
+ eibrs,lfence - enhanced IBRS + LFENCE
Not specifying this option is equivalent to
spectre_v2=auto.
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index 59b84904a854..592ea9a50881 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -4,7 +4,7 @@
Detailed Usages
===============
-DAMON provides below three interfaces for different users.
+DAMON provides below interfaces for different users.
- *DAMON user space tool.*
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
@@ -14,17 +14,21 @@ DAMON provides below three interfaces for different users.
virtual and physical address spaces monitoring. For more detail, please
refer to its `usage document
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
-- *debugfs interface.*
- :ref:`This <debugfs_interface>` is for privileged user space programmers who
+- *sysfs interface.*
+ :ref:`This <sysfs_interface>` is for privileged user space programmers who
want more optimized use of DAMON. Using this, users can use DAMON’s major
- features by reading from and writing to special debugfs files. Therefore,
- you can write and use your personalized DAMON debugfs wrapper programs that
- reads/writes the debugfs files instead of you. The `DAMON user space tool
+ features by reading from and writing to special sysfs files. Therefore,
+ you can write and use your personalized DAMON sysfs wrapper programs that
+ reads/writes the sysfs files instead of you. The `DAMON user space tool
<https://github.com/awslabs/damo>`_ is one example of such programs. It
supports both virtual and physical address spaces monitoring. Note that this
interface provides only simple :ref:`statistics <damos_stats>` for the
monitoring results. For detailed monitoring results, DAMON provides a
:ref:`tracepoint <tracepoint>`.
+- *debugfs interface.*
+ :ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
+ <sysfs_interface>`. This will be removed after next LTS kernel is released,
+ so users should move to the :ref:`sysfs interface <sysfs_interface>`.
- *Kernel Space Programming Interface.*
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
users can utilize every feature of DAMON most flexibly and efficiently by
@@ -32,6 +36,340 @@ DAMON provides below three interfaces for different users.
DAMON for various address spaces. For detail, please refer to the interface
:doc:`document </vm/damon/api>`.
+.. _sysfs_interface:
+
+sysfs Interface
+===============
+
+DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
+creates multiple directories and files under its sysfs directory,
+``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
+from the files under the directory.
+
+For a short example, users can monitor the virtual address space of a given
+workload as below. ::
+
+ # cd /sys/kernel/mm/damon/admin/
+ # echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
+ # echo vaddr > kdamonds/0/contexts/0/operations
+ # echo 1 > kdamonds/0/contexts/0/targets/nr
+ # echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
+ # echo on > kdamonds/0/state
+
+Files Hierarchy
+---------------
+
+The files hierarchy of DAMON sysfs interface is shown below. In the below
+figure, parents-children relations are represented with indentations, each
+directory is having ``/`` suffix, and files in each directory are separated by
+comma (","). ::
+
+ /sys/kernel/mm/damon/admin
+ │ kdamonds/nr_kdamonds
+ │ │ 0/state,pid
+ │ │ │ contexts/nr_contexts
+ │ │ │ │ 0/operations
+ │ │ │ │ │ monitoring_attrs/
+ │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
+ │ │ │ │ │ │ nr_regions/min,max
+ │ │ │ │ │ targets/nr_targets
+ │ │ │ │ │ │ 0/pid_target
+ │ │ │ │ │ │ │ regions/nr_regions
+ │ │ │ │ │ │ │ │ 0/start,end
+ │ │ │ │ │ │ │ │ ...
+ │ │ │ │ │ │ ...
+ │ │ │ │ │ schemes/nr_schemes
+ │ │ │ │ │ │ 0/action
+ │ │ │ │ │ │ │ access_pattern/
+ │ │ │ │ │ │ │ │ sz/min,max
+ │ │ │ │ │ │ │ │ nr_accesses/min,max
+ │ │ │ │ │ │ │ │ age/min,max
+ │ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
+ │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
+ │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
+ │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
+ │ │ │ │ │ │ ...
+ │ │ │ │ ...
+ │ │ ...
+
+Root
+----
+
+The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
+has one directory named ``admin``. The directory contains the files for
+privileged user space programs' control of DAMON. User space tools or deamons
+having the root permission could use this directory.
+
+kdamonds/
+---------
+
+The monitoring-related information including request specifications and results
+are called DAMON context. DAMON executes each context with a kernel thread
+called kdamond, and multiple kdamonds could run in parallel.
+
+Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
+controlling the kdamonds exist. In the beginning, this directory has only one
+file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number
+of child directories named ``0`` to ``N-1``. Each directory represents each
+kdamond.
+
+kdamonds/<N>/
+-------------
+
+In each kdamond directory, two files (``state`` and ``pid``) and one directory
+(``contexts``) exist.
+
+Reading ``state`` returns ``on`` if the kdamond is currently running, or
+``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
+in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
+contents of stats files for each DAMON-based operation scheme of the kdamond.
+For details of the stats, please refer to :ref:`stats section
+<sysfs_schemes_stats>`.
+
+If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
+
+``contexts`` directory contains files for controlling the monitoring contexts
+that this kdamond will execute.
+
+kdamonds/<N>/contexts/
+----------------------
+
+In the beginning, this directory has only one file, ``nr_contexts``. Writing a
+number (``N``) to the file creates the number of child directories named as
+``0`` to ``N-1``. Each directory represents each monitoring context. At the
+moment, only one context per kdamond is supported, so only ``0`` or ``1`` can
+be written to the file.
+
+contexts/<N>/
+-------------
+
+In each context directory, one file (``operations``) and three directories
+(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
+
+DAMON supports multiple types of monitoring operations, including those for
+virtual address space and the physical address space. You can set and get what
+type of monitoring operations DAMON will use for the context by writing one of
+below keywords to, and reading from the file.
+
+ - vaddr: Monitor virtual address spaces of specific processes
+ - paddr: Monitor the physical address space of the system
+
+contexts/<N>/monitoring_attrs/
+------------------------------
+
+Files for specifying attributes of the monitoring including required quality
+and efficiency of the monitoring are in ``monitoring_attrs`` directory.
+Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
+directory.
+
+Under ``intervals`` directory, three files for DAMON's sampling interval
+(``sample_us``), aggregation interval (``aggr_us``), and update interval
+(``update_us``) exist. You can set and get the values in micro-seconds by
+writing to and reading from the files.
+
+Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
+of DAMON's monitoring regions (``min`` and ``max``, respectively), which
+controls the monitoring overhead, exist. You can set and get the values by
+writing to and rading from the files.
+
+For more details about the intervals and monitoring regions range, please refer
+to the Design document (:doc:`/vm/damon/design`).
+
+contexts/<N>/targets/
+---------------------
+
+In the beginning, this directory has only one file, ``nr_targets``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each monitoring target.
+
+targets/<N>/
+------------
+
+In each target directory, one file (``pid_target``) and one directory
+(``regions``) exist.
+
+If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
+be a process. You can specify the process to DAMON by writing the pid of the
+process to the ``pid_target`` file.
+
+targets/<N>/regions
+-------------------
+
+When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
+the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
+monitoring target regions so that entire memory mappings of target processes
+can be covered. However, users could want to set the initial monitoring region
+to specific address ranges.
+
+In contrast, DAMON do not automatically sets and updates the monitoring target
+regions when ``paddr`` monitoring operations set is being used (``paddr`` is
+written to the ``contexts/<N>/operations``). Therefore, users should set the
+monitoring target regions by themselves in the case.
+
+For such cases, users can explicitly set the initial monitoring target regions
+as they want, by writing proper values to the files under this directory.
+
+In the beginning, this directory has only one file, ``nr_regions``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each initial monitoring target region.
+
+regions/<N>/
+------------
+
+In each region directory, you will find two files (``start`` and ``end``). You
+can set and get the start and end addresses of the initial monitoring target
+region by writing to and reading from the files, respectively.
+
+contexts/<N>/schemes/
+---------------------
+
+For usual DAMON-based data access aware memory management optimizations, users
+would normally want the system to apply a memory management action to a memory
+region of a specific access pattern. DAMON receives such formalized operation
+schemes from the user and applies those to the target memory regions. Users
+can get and set the schemes by reading from and writing to files under this
+directory.
+
+In the beginning, this directory has only one file, ``nr_schemes``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each DAMON-based operation scheme.
+
+schemes/<N>/
+------------
+
+In each scheme directory, four directories (``access_pattern``, ``quotas``,
+``watermarks``, and ``stats``) and one file (``action``) exist.
+
+The ``action`` file is for setting and getting what action you want to apply to
+memory regions having specific access pattern of the interest. The keywords
+that can be written to and read from the file and their meaning are as below.
+
+ - ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``
+ - ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``
+ - ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
+ - ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
+ - ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
+ - ``stat``: Do nothing but count the statistics
+
+schemes/<N>/access_pattern/
+---------------------------
+
+The target access pattern of each DAMON-based operation scheme is constructed
+with three ranges including the size of the region in bytes, number of
+monitored accesses per aggregate interval, and number of aggregated intervals
+for the age of the region.
+
+Under the ``access_pattern`` directory, three directories (``sz``,
+``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
+exist. You can set and get the access pattern for the given scheme by writing
+to and reading from the ``min`` and ``max`` files under ``sz``,
+``nr_accesses``, and ``age`` directories, respectively.
+
+schemes/<N>/quotas/
+-------------------
+
+Optimal ``target access pattern`` for each ``action`` is workload dependent, so
+not easy to find. Worse yet, setting a scheme of some action too aggressive
+can cause severe overhead. To avoid such overhead, users can limit time and
+size quota for each scheme. In detail, users can ask DAMON to try to use only
+up to specific time (``time quota``) for applying the action, and to apply the
+action to only up to specific amount (``size quota``) of memory regions having
+the target access pattern within a given time interval (``reset interval``).
+
+When the quota limit is expected to be exceeded, DAMON prioritizes found memory
+regions of the ``target access pattern`` based on their size, access frequency,
+and age. For personalized prioritization, users can set the weights for the
+three properties.
+
+Under ``quotas`` directory, three files (``ms``, ``bytes``,
+``reset_interval_ms``) and one directory (``weights``) having three files
+(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
+
+You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
+``reset interval`` in milliseconds by writing the values to the three files,
+respectively. You can also set the prioritization weights for size, access
+frequency, and age in per-thousand unit by writing the values to the three
+files under the ``weights`` directory.
+
+schemes/<N>/watermarks/
+-----------------------
+
+To allow easy activation and deactivation of each scheme based on system
+status, DAMON provides a feature called watermarks. The feature receives five
+values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
+``metric`` is the system metric such as free memory ratio that can be measured.
+If the metric value of the system is higher than the value in ``high`` or lower
+than ``low`` at the memoent, the scheme is deactivated. If the value is lower
+than ``mid``, the scheme is activated.
+
+Under the watermarks directory, five files (``metric``, ``interval_us``,
+``high``, ``mid``, and ``low``) for setting each value exist. You can set and
+get the five values by writing to the files, respectively.
+
+Keywords and meanings of those that can be written to the ``metric`` file are
+as below.
+
+ - none: Ignore the watermarks
+ - free_mem_rate: System's free memory rate (per thousand)
+
+The ``interval`` should written in microseconds unit.
+
+.. _sysfs_schemes_stats:
+
+schemes/<N>/stats/
+------------------
+
+DAMON counts the total number and bytes of regions that each scheme is tried to
+be applied, the two numbers for the regions that each scheme is successfully
+applied, and the total number of the quota limit exceeds. This statistics can
+be used for online analysis or tuning of the schemes.
+
+The statistics can be retrieved by reading the files under ``stats`` directory
+(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
+``qt_exceeds``), respectively. The files are not updated in real time, so you
+should ask DAMON sysfs interface to updte the content of the files for the
+stats by writing a special keyword, ``update_schemes_stats`` to the relevant
+``kdamonds/<N>/state`` file.
+
+Example
+~~~~~~~
+
+Below commands applies a scheme saying "If a memory region of size in [4KiB,
+8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
+interval in [10, 20], page out the region. For the paging out, use only up to
+10ms per second, and also don't page out more than 1GiB per second. Under the
+limitation, page out memory regions having longer age first. Also, check the
+free memory rate of the system every 5 seconds, start the monitoring and paging
+out when the free memory rate becomes lower than 50%, but stop it if the free
+memory rate becomes larger than 60%, or lower than 30%". ::
+
+ # cd <sysfs>/kernel/mm/damon/admin
+ # # populate directories
+ # echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
+ # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
+ # cd kdamonds/0/contexts/0/schemes/0
+ # # set the basic access pattern and the action
+ # echo 4096 > access_patterns/sz/min
+ # echo 8192 > access_patterns/sz/max
+ # echo 0 > access_patterns/nr_accesses/min
+ # echo 5 > access_patterns/nr_accesses/max
+ # echo 10 > access_patterns/age/min
+ # echo 20 > access_patterns/age/max
+ # echo pageout > action
+ # # set quotas
+ # echo 10 > quotas/ms
+ # echo $((1024*1024*1024)) > quotas/bytes
+ # echo 1000 > quotas/reset_interval_ms
+ # # set watermark
+ # echo free_mem_rate > watermarks/metric
+ # echo 5000000 > watermarks/interval_us
+ # echo 600 > watermarks/high
+ # echo 500 > watermarks/mid
+ # echo 300 > watermarks/low
+
+Please note that it's highly recommended to use user space tools like `damo
+<https://github.com/awslabs/damo>`_ rather than manually reading and writing
+the files as above. Above is only for an example.
.. _debugfs_interface:
@@ -47,7 +385,7 @@ Attributes
----------
Users can get and set the ``sampling interval``, ``aggregation interval``,
-``regions update interval``, and min/max number of monitoring target regions by
+``update interval``, and min/max number of monitoring target regions by
reading from and writing to the ``attrs`` file. To know about the monitoring
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
@@ -108,24 +446,28 @@ In such cases, users can explicitly set the initial monitoring target regions
as they want, by writing proper values to the ``init_regions`` file. Each line
of the input should represent one region in below form.::
- <target id> <start address> <end address>
+ <target idx> <start address> <end address>
-The ``target id`` should already in ``target_ids`` file, and the regions should
-be passed in address order. For example, below commands will set a couple of
-address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
-region of process 42, and another couple of address ranges, ``20-40`` and
-``50-100`` as that of process 4242.::
+The ``target idx`` should be the index of the target in ``target_ids`` file,
+starting from ``0``, and the regions should be passed in address order. For
+example, below commands will set a couple of address ranges, ``1-100`` and
+``100-200`` as the initial monitoring target region of pid 42, which is the
+first one (index ``0``) in ``target_ids``, and another couple of address
+ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
+(index ``1``) in ``target_ids``.::
# cd <debugfs>/damon
- # echo "42 1 100
- 42 100 200
- 4242 20 40
- 4242 50 100" > init_regions
+ # cat target_ids
+ 42 4242
+ # echo "0 1 100
+ 0 100 200
+ 1 20 40
+ 1 50 100" > init_regions
Note that this sets the initial monitoring target regions only. In case of
virtual memory monitoring, DAMON will automatically updates the boundary of the
-regions after one ``regions update interval``. Therefore, users should set the
-``regions update interval`` large enough in this case, if they don't want the
+regions after one ``update interval``. Therefore, users should set the
+``update interval`` large enough in this case, if they don't want the
update.
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index bfc28704856c..6e2e416af783 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -23,7 +23,7 @@ There are four components to pagemap:
* Bit 56 page exclusively mapped (since 4.2)
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
- * Bits 57-60 zero
+ * Bits 58-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
* Bit 62 page swapped
* Bit 63 page present
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index 8edb8d578caf..6e6f7b0d6562 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -130,9 +130,25 @@ attribute, e.g.::
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
When zswap same-filled page identification is disabled at runtime, it will stop
-checking for the same-value filled pages during store operation. However, the
-existing pages which are marked as same-value filled pages remain stored
-unchanged in zswap until they are either loaded or invalidated.
+checking for the same-value filled pages during store operation.
+In other words, every page will be then considered non-same-value filled.
+However, the existing pages which are marked as same-value filled pages remain
+stored unchanged in zswap until they are either loaded or invalidated.
+
+In some circumstances it might be advantageous to make use of just the zswap
+ability to efficiently store same-filled pages without enabling the whole
+compressed page storage.
+In this case the handling of non-same-value pages by zswap (enabled by default)
+can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
+to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
+It can also be enabled and disabled at runtime using the sysfs
+``non_same_filled_pages_enabled`` attribute, e.g.::
+
+ echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
+
+Disabling both ``zswap.same_filled_pages_enabled`` and
+``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
+pages by zswap.
To prevent zswap from shrinking pool when zswap is full and there's a high
pressure on swap (this will result in flipping pages in and out zswap pool
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 5a8f2529a033..69b23f087c05 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -8,6 +8,7 @@ Performance monitor support
:maxdepth: 1
hisi-pmu
+ hisi-pcie-pmu
imx-ddr
qcom_l2_pmu
qcom_l3_pmu
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 2f066df4ee9c..1923cb25073b 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -369,6 +369,32 @@ governor (for the policies it is attached to), or by the ``CPUFreq`` core (for t
policies with other scaling governors).
+Tracer Tool
+-------------
+
+``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
+generate performance plots. This utility can be used to debug and tune the
+performance of ``amd-pstate`` driver. The tracer tool needs to import intel
+pstate tracer.
+
+Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
+used in two ways. If trace file is available, then directly parse the file
+with command ::
+
+ ./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
+
+Or generate trace file with root privilege, then parse and plot with command ::
+
+ sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
+
+The test result can be found in ``results/test_name``. Following is the example
+about part of the output. ::
+
+ common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
+ CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
+ CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
+
+
Reference
===========
diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
new file mode 100644
index 000000000000..09169d935835
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================
+Intel Uncore Frequency Scaling
+==============================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+The uncore can consume significant amount of power in Intel's Xeon servers based
+on the workload characteristics. To optimize the total power and improve overall
+performance, SoCs have internal algorithms for scaling uncore frequency. These
+algorithms monitor workload usage of uncore and set a desirable frequency.
+
+It is possible that users have different expectations of uncore performance and
+want to have control over it. The objective is similar to allowing users to set
+the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
+Users may have some latency sensitive workloads where they do not want any
+change to uncore frequency. Also, users may have workloads which require
+different core and uncore performance at distinct phases and they may want to
+use both cpufreq and the uncore scaling interface to distribute power and
+improve overall performance.
+
+Sysfs Interface
+---------------
+
+To control uncore frequency, a sysfs interface is provided in the directory:
+`/sys/devices/system/cpu/intel_uncore_frequency/`.
+
+There is one directory for each package and die combination as the scope of
+uncore scaling control is per die in multiple die/package SoCs or per
+package for single die per package SoCs. The name represents the
+scope of control. For example: 'package_00_die_00' is for package id 0 and
+die 0.
+
+Each package_*_die_* contains the following attributes:
+
+``initial_max_freq_khz``
+ Out of reset, this attribute represent the maximum possible frequency.
+ This is a read-only attribute. If users adjust max_freq_khz,
+ they can always go back to maximum using the value from this attribute.
+
+``initial_min_freq_khz``
+ Out of reset, this attribute represent the minimum possible frequency.
+ This is a read-only attribute. If users adjust min_freq_khz,
+ they can always go back to minimum using the value from this attribute.
+
+``max_freq_khz``
+ This attribute is used to set the maximum uncore frequency.
+
+``min_freq_khz``
+ This attribute is used to set the minimum uncore frequency.
+
+``current_freq_khz``
+ This attribute is used to get the current uncore frequency.
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index 5d2757e2de65..ee45887811ff 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -15,3 +15,4 @@ Working-State Power Management
cpufreq_drivers
intel_epb
intel-speed-select
+ intel_uncore_frequency_scaling
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index d7ac13f789cc..ec62151fe672 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -1,14 +1,5 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
-..
- If you want to distribute this text under CC-BY-4.0 only, please use 'The
- Linux kernel developers' for author attribution and link this as source:
- https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
-..
- Note: Only the content of this RST file as found in the Linux kernel sources
- is available under CC-BY-4.0, as versions of this text that were processed
- (for example by the kernel's build system) might contain content taken from
- files which use a more restrictive license.
-
+.. See the bottom of this file for additional redistribution information.
Reporting issues
++++++++++++++++
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
handled slightly differently in the reporting process. Three type of cases
qualify: regressions, security issues, and really severe problems.
-You deal with a 'regression' if something that worked with an older version of
-the Linux kernel does not work with a newer one or somehow works worse with it.
-It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
-somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
-an application shows erratic behavior with a newer kernel, which might happen
-due to incompatible changes in the interface between the kernel and the
-userland (like procfs and sysfs). Significantly reduced performance or
-increased power consumption also qualify as regression. But keep in mind: the
-new kernel needs to be built with a configuration that is similar to the one
-from the old kernel (see below how to achieve that). That's because the kernel
-developers sometimes can not avoid incompatibilities when implementing new
-features; but to avoid regressions such features have to be enabled explicitly
-during build time configuration.
+You deal with a regression if some application or practical use case running
+fine with one Linux kernel works worse or not at all with a newer version
+compiled using a similar configuration. The document
+Documentation/admin-guide/reporting-regressions.rst explains this in more
+detail. It also provides a good deal of other information about regressions you
+might want to be aware of; it for example explains how to add your issue to the
+list of tracked regressions, to ensure it won't fall through the cracks.
What qualifies as security issue is left to your judgment. Consider reading
-'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
+Documentation/admin-guide/security-bugs.rst before proceeding, as it
provides additional details how to best handle security issues.
An issue is a 'really severe problem' when something totally unacceptably bad
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
followed by a few spaces and some letters.
-If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
+If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
to find out why. Try to eliminate the reason. Often it's caused by one these
three things:
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
reproduce it themselves.
To find the change there is a process called 'bisection' which the document
-'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
+Documentation/admin-guide/bug-bisect.rst describes in detail. That process
will often require you to build about ten to twenty kernel images, trying to
reproduce the issue with each of them before building the next. Yes, that takes
some time, but don't worry, it works a lot quicker than most people assume.
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
the kernel and not by something else, as outlined above already.
In the whole process keep in mind: an issue only qualifies as regression if the
-older and the newer kernel got built with a similar configuration. The best way
-to archive this: copy the configuration file (``.config``) from the old working
-kernel freshly to each newer kernel version you try. Afterwards run ``make
-olddefconfig`` to adjust it for the needs of the new version.
+older and the newer kernel got built with a similar configuration. This can be
+achieved by using ``make olddefconfig``, as explained in more detail by
+Documentation/admin-guide/reporting-regressions.rst; that document also
+provides a good deal of other information about regressions you might want to be
+aware of.
Write and send the report
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
the report's text to these addresses; but on top of it put a small note where
you mention that you filed it with a link to the ticket.
-See 'Documentation/admin-guide/security-bugs.rst' for more information.
+See Documentation/admin-guide/security-bugs.rst for more information.
Duties after the report went out
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
pinpoint the exact change that causes the issue (which then can easily get
reverted to fix the issue quickly). Hence consider to do a proper bisection
right away if time permits. See the section 'Special care for regressions' and
-the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
+the document Documentation/admin-guide/bug-bisect.rst for details how to
perform one. In case of a successful bisection add the author of the culprit to
the recipients; also CC everyone in the signed-off-by chain, which you find at
the end of its commit message.
@@ -1594,7 +1580,7 @@ Some fixes are too complex
Even small and seemingly obvious code-changes sometimes introduce new and
totally unexpected problems. The maintainers of the stable and longterm kernels
are very aware of that and thus only apply changes to these kernels that are
-within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
+within rules outlined in Documentation/process/stable-kernel-rules.rst.
Complex or risky changes for example do not qualify and thus only get applied
to mainline. Other fixes are easy to get backported to the newest stable and
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
..
- This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you
- spot a typo or small mistake, feel free to let him know directly and he'll
- fix it. You are free to do the same in a mostly informal way if you want
- to contribute changes to the text, but for copyright reasons please CC
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text, but for copyright reasons please CC
linux-doc@vger.kernel.org and "sign-off" your contribution as
Documentation/process/submitting-patches.rst outlines in the section "Sign
your work - the Developer's Certificate of Origin".
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use "The Linux kernel developers" for author attribution and link
+ this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
new file mode 100644
index 000000000000..d8adccdae23f
--- /dev/null
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -0,0 +1,451 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+Reporting regressions
++++++++++++++++++++++
+
+"*We don't cause regressions*" is the first rule of Linux kernel development;
+Linux founder and lead developer Linus Torvalds established it himself and
+ensures it's obeyed.
+
+This document describes what the rule means for users and how the Linux kernel's
+development model ensures to address all reported regressions; aspects relevant
+for kernel developers are left to Documentation/process/handling-regressions.rst.
+
+
+The important bits (aka "TL;DR")
+================================
+
+#. It's a regression if something running fine with one Linux kernel works worse
+ or not at all with a newer version. Note, the newer kernel has to be compiled
+ using a similar configuration; the detailed explanations below describes this
+ and other fine print in more detail.
+
+#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
+ it already covers all aspects important for regressions and repeated
+ below for convenience. Two of them are important: start your report's subject
+ with "[REGRESSION]" and CC or forward it to `the regression mailing list
+ <https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
+
+#. Optional, but recommended: when sending or forwarding your report, make the
+ Linux kernel regression tracking bot "regzbot" track the issue by specifying
+ when the regression started like this::
+
+ #regzbot introduced v5.13..v5.14-rc1
+
+
+All the details on Linux kernel regressions relevant for users
+==============================================================
+
+
+The important basics
+--------------------
+
+
+What is a "regression" and what is the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's a regression if some application or practical use case running fine with
+one Linux kernel works worse or not at all with a newer version compiled using a
+similar configuration. The "no regressions rule" forbids this to take place; if
+it happens by accident, developers that caused it are expected to quickly fix
+the issue.
+
+It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
+5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
+It's also a regression if a perfectly working application suddenly shows erratic
+behavior with a newer kernel version; such issues can be caused by changes in
+procfs, sysfs, or one of the many other interfaces Linux provides to userland
+software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
+be built from a configuration similar to the one from 5.13. This can be achieved
+using ``make olddefconfig``, as explained in more detail below.
+
+Note the "practical use case" in the first sentence of this section: developers
+despite the "no regressions" rule are free to change any aspect of the kernel
+and even APIs or ABIs to userland, as long as no existing application or use
+case breaks.
+
+Also be aware the "no regressions" rule covers only interfaces the kernel
+provides to the userland. It thus does not apply to kernel-internal interfaces
+like the module API, which some externally developed drivers use to hook into
+the kernel.
+
+How do I report a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Just report the issue as outlined in
+Documentation/admin-guide/reporting-issues.rst, it already describes the
+important points. The following aspects outlined there are especially relevant
+for regressions:
+
+ * When checking for existing reports to join, also search the `archives of the
+ Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
+ `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+ * Start your report's subject with "[REGRESSION]".
+
+ * In your report, clearly mention the last kernel version that worked fine and
+ the first broken one. Ideally try to find the exact change causing the
+ regression using a bisection, as explained below in more detail.
+
+ * Remember to let the Linux regressions mailing list
+ (regressions@lists.linux.dev) know about your report:
+
+ * If you report the regression by mail, CC the regressions list.
+
+ * If you report your regression to some bug tracker, forward the submitted
+ report by mail to the regressions list while CCing the maintainer and the
+ mailing list for the subsystem in question.
+
+ If it's a regression within a stable or longterm series (e.g.
+ v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
+ <https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
+
+ In case you performed a successful bisection, add everyone to the CC the
+ culprit's commit message mentions in lines starting with "Signed-off-by:".
+
+When CCing for forwarding your report to the list, consider directly telling the
+aforementioned Linux kernel regression tracking bot about your report. To do
+that, include a paragraph like this in your mail::
+
+ #regzbot introduced: v5.13..v5.14-rc1
+
+Regzbot will then consider your mail a report for a regression introduced in the
+specified version range. In above case Linux v5.13 still worked fine and Linux
+v5.14-rc1 was the first version where you encountered the issue. If you
+performed a bisection to find the commit that caused the regression, specify the
+culprit's commit-id instead::
+
+ #regzbot introduced: 1f2e3d4c5d
+
+Placing such a "regzbot command" is in your interest, as it will ensure the
+report won't fall through the cracks unnoticed. If you omit this, the Linux
+kernel's regressions tracker will take care of telling regzbot about your
+regression, as long as you send a copy to the regressions mailing lists. But the
+regression tracker is just one human which sometimes has to rest or occasionally
+might even enjoy some time away from computers (as crazy as that might sound).
+Relying on this person thus will result in an unnecessary delay before the
+regressions becomes mentioned `on the list of tracked and unresolved Linux
+kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
+weekly regression reports sent by regzbot. Such delays can result in Linus
+Torvalds being unaware of important regressions when deciding between "continue
+development or call this finished and release the final?".
+
+Are really all regressions fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Nearly all of them are, as long as the change causing the regression (the
+"culprit commit") is reliably identified. Some regressions can be fixed without
+this, but often it's required.
+
+Who needs to find the root cause of a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers of the affected code area should try to locate the culprit on their
+own. But for them that's often impossible to do with reasonable effort, as quite
+a lot of issues only occur in a particular environment outside the developer's
+reach -- for example, a specific hardware platform, firmware, Linux distro,
+system's configuration, or application. That's why in the end it's often up to
+the reporter to locate the culprit commit; sometimes users might even need to
+run additional tests afterwards to pinpoint the exact root cause. Developers
+should offer advice and reasonably help where they can, to make this process
+relatively easy and achievable for typical users.
+
+How can I find the culprit?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Perform a bisection, as roughly outlined in
+Documentation/admin-guide/reporting-issues.rst and described in more detail by
+Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
+in many cases finds the culprit relatively quickly. If it's hard or
+time-consuming to reliably reproduce the issue, consider teaming up with other
+affected users to narrow down the search range together.
+
+Who can I ask for advice when it comes to regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
+CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+Additional details about regressions
+------------------------------------
+
+
+What is the goal of the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Users should feel safe when updating kernel versions and not have to worry
+something might break. This is in the interest of the kernel developers to make
+updating attractive: they don't want users to stay on stable or longterm Linux
+series that are either abandoned or more than one and a half years old. That's
+in everybody's interest, as `those series might have known bugs, security
+issues, or other problematic aspects already fixed in later versions
+<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
+Additionally, the kernel developers want to make it simple and appealing for
+users to test the latest pre-release or regular release. That's also in
+everybody's interest, as it's a lot easier to track down and fix problems, if
+they are reported shortly after being introduced.
+
+Is the "no regressions" rule really adhered in practice?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's taken really seriously, as can be seen by many mailing list posts from
+Linux creator and lead developer Linus Torvalds, some of which are quoted in
+Documentation/process/handling-regressions.rst.
+
+Exceptions to this rule are extremely rare; in the past developers almost always
+turned out to be wrong when they assumed a particular situation was warranting
+an exception.
+
+Who ensures the "no regressions" is actually followed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The subsystem maintainers should take care of that, which are watched and
+supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
+Greg Kroah-Hartman et al. for various stable/longterm series.
+
+All of them are helped by people trying to ensure no regression report falls
+through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
+the Linux kernel's "regressions tracker"; to facilitate this work he relies on
+regzbot, the Linux kernel regression tracking bot. That's why you want to bring
+your report on the radar of these people by CCing or forwarding each report to
+the regressions mailing list, ideally with a "regzbot command" in your mail to
+get it tracked immediately.
+
+How quickly are regressions normally fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+The answer thus depends on various factors like the impact of a regression, its
+age, or the Linux series in which it occurs. In the end though, most regressions
+should be fixed within two weeks.
+
+Is it a regression, if the issue can be avoided by updating some software?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Almost always: yes. If a developer tells you otherwise, ask the regression
+tracker for advice as outlined above.
+
+Is it a regression, if a newer kernel works slower or consumes more energy?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Yes, but the difference has to be significant. A five percent slow-down in a
+micro-benchmark thus is unlikely to qualify as regression, unless it also
+influences the results of a broad benchmark by more than one percent. If in
+doubt, ask for advice.
+
+Is it a regression, if an external kernel module breaks when updating Linux?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+No, as the "no regression" rule is about interfaces and services the Linux
+kernel provides to the userland. It thus does not cover building or running
+externally developed kernel modules, as they run in kernel-space and hook into
+the kernel using internal interfaces occasionally changed.
+
+How are regressions handled that are caused by security fixes?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In extremely rare situations security issues can't be fixed without causing
+regressions; those fixes are given way, as they are the lesser evil in the end.
+Luckily this middling almost always can be avoided, as key developers for the
+affected area and often Linus Torvalds himself try very hard to fix security
+issues without causing regressions.
+
+If you nevertheless face such a case, check the mailing list archives if people
+tried their best to avoid the regression. If not, report it; if in doubt, ask
+for advice as outlined above.
+
+What happens if fixing a regression is impossible without causing another?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sadly these things happen, but luckily not very often; if they occur, expert
+developers of the affected code area should look into the issue to find a fix
+that avoids regressions or at least their impact. If you run into such a
+situation, do what was outlined already for regressions caused by security
+fixes: check earlier discussions if people already tried their best and ask for
+advice if in doubt.
+
+A quick note while at it: these situations could be avoided, if people would
+regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
+development cycle a test run. This is best explained by imagining a change
+integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
+the same time is a hard requirement for some other improvement applied for
+5.15-rc1. All these changes often can simply be reverted and the regression thus
+solved, if someone finds and reports it before 5.15 is released. A few days or
+weeks later this solution can become impossible, as some software might have
+started to rely on aspects introduced by one of the follow-up changes: reverting
+all changes would then cause a regression for users of said software and thus is
+out of the question.
+
+Is it a regression, if some feature I relied on was removed months ago?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is, but often it's hard to fix such regressions due to the aspects outlined
+in the previous section. It hence needs to be dealt with on a case-by-case
+basis. This is another reason why it's in everybody's interest to regularly test
+mainline pre-releases.
+
+Does the "no regression" rule apply if I seem to be the only affected person?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It does, but only for practical usage: the Linux developers want to be free to
+remove support for hardware only to be found in attics and museums anymore.
+
+Note, sometimes regressions can't be avoided to make progress -- and the latter
+is needed to prevent Linux from stagnation. Hence, if only very few users seem
+to be affected by a regression, it for the greater good might be in their and
+everyone else's interest to lettings things pass. Especially if there is an
+easy way to circumvent the regression somehow, for example by updating some
+software or using a kernel parameter created just for this purpose.
+
+Does the regression rule apply for code in the staging tree as well?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not according to the `help text for the configuration option covering all
+staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
+which since its early days states::
+
+ Please note that these drivers are under heavy development, may or
+ may not work, and may contain userspace interfaces that most likely
+ will be changed in the near future.
+
+The staging developers nevertheless often adhere to the "no regressions" rule,
+but sometimes bend it to make progress. That's for example why some users had to
+deal with (often negligible) regressions when a WiFi driver from the staging
+tree was replaced by a totally different one written from scratch.
+
+Why do later versions have to be "compiled with a similar configuration"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because the Linux kernel developers sometimes integrate changes known to cause
+regressions, but make them optional and disable them in the kernel's default
+configuration. This trick allows progress, as the "no regressions" rule
+otherwise would lead to stagnation.
+
+Consider for example a new security feature blocking access to some kernel
+interfaces often abused by malware, which at the same time are required to run a
+few rarely used applications. The outlined approach makes both camps happy:
+people using these applications can leave the new security feature off, while
+everyone else can enable it without running into trouble.
+
+How to create a configuration similar to the one of an older kernel?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start your machine with a known-good kernel and configure the newer Linux
+version with ``make olddefconfig``. This makes the kernel's build scripts pick
+up the configuration file (the ".config" file) from the running kernel as base
+for the new one you are about to compile; afterwards they set all new
+configuration options to their default value, which should disable new features
+that might cause regressions.
+
+Can I report a regression I found with pre-compiled vanilla kernels?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You need to ensure the newer kernel was compiled with a similar configuration
+file as the older one (see above), as those that built them might have enabled
+some known-to-be incompatible feature for the newer kernel. If in doubt, report
+the matter to the kernel's provider and ask for advice.
+
+
+More about regression tracking with "regzbot"
+---------------------------------------------
+
+What is regression tracking and why should I care about it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for Linux kernel development as well. That's why Thorsten Leemhuis, the
+Linux Kernel's regression tracker, and some people try to ensure all regression
+are fixed by keeping an eye on them until they are resolved. Neither of them are
+paid for this, that's why the work is done on a best effort basis.
+
+Why and how are Linux kernel regressions tracked using a bot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tracking regressions completely manually has proven to be quite hard due to the
+distributed and loosely structured nature of Linux kernel development process.
+That's why the Linux kernel's regression tracker developed regzbot to facilitate
+the work, with the long term goal to automate regression tracking as much as
+possible for everyone involved.
+
+Regzbot works by watching for replies to reports of tracked regressions.
+Additionally, it's looking out for posted or committed patches referencing such
+reports with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+involve regzbot to track severe issues, like reports about hangs, corrupted
+data, or internal errors (Panic, Oops, BUG(), warning, ...).
+
+How to change aspects of a tracked regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+report. The easiest way to do that: find the report in your "Sent" folder or the
+mailing list archive and reply to it using your mailer's "Reply-all" function.
+In that mail, use one of the following commands in a stand-alone paragraph (IOW:
+use blank lines to separate one or multiple of these commands from the rest of
+the mail's text).
+
+ * Update when the regression started to happen, for example after performing a
+ bisection::
+
+ #regzbot introduced: 1f2e3d4c5d
+
+ * Set or update the title::
+
+ #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+ the issue or a fix are discussed:::
+
+ #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+ #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Point to a place with further details of interest, like a mailing list post
+ or a ticket in a bug tracker that are slightly related, but about a different
+ topic::
+
+ #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as invalid::
+
+ #regzbot invalid: wasn't a regression, problem has always existed
+
+Regzbot supports a few other commands primarily used by developers or people
+tracking regressions. They and more details about the aforementioned regzbot
+commands can be found in the `getting started guide
+<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
+the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+for regzbot.
+
+..
+ end-of-content
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use "The Linux kernel developers" for author attribution and link
+ this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d359bcfadd39..803c60bf21d3 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -595,65 +595,33 @@ Documentation/admin-guide/kernel-parameters.rst).
numa_balancing
==============
-Enables/disables automatic page fault based NUMA memory
-balancing. Memory is moved automatically to nodes
-that access it often.
+Enables/disables and configures automatic page fault based NUMA memory
+balancing. Memory is moved automatically to nodes that access it often.
+The value to set can be the result of ORing the following:
-Enables/disables automatic NUMA memory balancing. On NUMA machines, there
-is a performance penalty if remote memory is accessed by a CPU. When this
-feature is enabled the kernel samples what task thread is accessing memory
-by periodically unmapping pages and later trapping a page fault. At the
-time of the page fault, it is determined if the data being accessed should
-be migrated to a local memory node.
+= =================================
+0 NUMA_BALANCING_DISABLED
+1 NUMA_BALANCING_NORMAL
+2 NUMA_BALANCING_MEMORY_TIERING
+= =================================
+
+Or NUMA_BALANCING_NORMAL to optimize page placement among different
+NUMA nodes to reduce remote accessing. On NUMA machines, there is a
+performance penalty if remote memory is accessed by a CPU. When this
+feature is enabled the kernel samples what task thread is accessing
+memory by periodically unmapping pages and later trapping a page
+fault. At the time of the page fault, it is determined if the data
+being accessed should be migrated to a local memory node.
The unmapping of pages and trapping faults incur additional overhead that
ideally is offset by improved memory locality but there is no universal
guarantee. If the target workload is already bound to NUMA nodes then this
-feature should be disabled. Otherwise, if the system overhead from the
-feature is too high then the rate the kernel samples for NUMA hinting
-faults may be controlled by the `numa_balancing_scan_period_min_ms,
-numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
-numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
-
-
-numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
-===============================================================================================================================
-
-
-Automatic NUMA balancing scans tasks address space and unmaps pages to
-detect if pages are properly placed or if the data should be migrated to a
-memory node local to where the task is running. Every "scan delay" the task
-scans the next "scan size" number of pages in its address space. When the
-end of the address space is reached the scanner restarts from the beginning.
-
-In combination, the "scan delay" and "scan size" determine the scan rate.
-When "scan delay" decreases, the scan rate increases. The scan delay and
-hence the scan rate of every task is adaptive and depends on historical
-behaviour. If pages are properly placed then the scan delay increases,
-otherwise the scan delay decreases. The "scan size" is not adaptive but
-the higher the "scan size", the higher the scan rate.
-
-Higher scan rates incur higher system overhead as page faults must be
-trapped and potentially data must be migrated. However, the higher the scan
-rate, the more quickly a tasks memory is migrated to a local node if the
-workload pattern changes and minimises performance impact due to remote
-memory accesses. These sysctls control the thresholds for scan delays and
-the number of pages scanned.
-
-``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the maximum scanning
-rate for each task.
-
-``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
-when it initially forks.
-
-``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
-scan a tasks virtual memory. It effectively controls the minimum scanning
-rate for each task.
-
-``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
-scanned for a given scan.
+feature should be disabled.
+Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
+different types of memory (represented as different NUMA nodes) to
+place the hot pages in the fast memory. This is implemented based on
+unmapping and page fault too.
oops_all_cpu_backtrace
======================
@@ -1029,23 +997,17 @@ This is a directory, with the following entries:
* ``poolsize``: the entropy pool size, in bits;
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
- number of seconds between urandom pool reseeding).
+ number of seconds between urandom pool reseeding). This file is
+ writable for compatibility purposes, but writing to it has no effect
+ on any RNG behavior.
* ``uuid``: a UUID generated every time this is retrieved (this can
thus be used to generate UUIDs at will);
* ``write_wakeup_threshold``: when the entropy count drops below this
(as a number of bits), processes waiting to write to ``/dev/random``
- are woken up.
-
-If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
-defined, these additional entries are present:
-
-* ``add_interrupt_avg_cycles``: the average number of cycles between
- interrupts used to feed the pool;
-
-* ``add_interrupt_avg_deviation``: the standard deviation seen on the
- number of cycles between interrupts used to feed the pool.
+ are woken up. This file is writable for compatibility purposes, but
+ writing to it has no effect on any RNG behavior.
randomize_va_space
diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index 9485a5a2e2e9..2f41caa0096c 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -266,10 +266,12 @@ Avanta family
-------------
Flavors:
+ - 88F6500
- 88F6510
- 88F6530P
- 88F6550
- 88F6560
+ - 88F6601
Homepage:
https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 52d060caf8bb..29884b261aa9 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
is relevant to all public releases of the AArch64 Linux kernel.
The AArch64 exception model is made up of a number of exception levels
-(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
-counterpart. EL2 is the hypervisor level and exists only in non-secure
-mode. EL3 is the highest priority level and exists only in secure mode.
+(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
+counterpart. EL2 is the hypervisor level, EL3 is the highest priority
+level and exists only in secure mode. Both are architecturally optional.
For the purposes of this document, we will use the term `boot loader`
simply to define all software that executes on the CPU(s) before control
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
IRQ and FIQ).
- The CPU must be in either EL2 (RECOMMENDED in order to have access to
- the virtualisation extensions) or non-secure EL1.
+ The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
+ to have access to the virtualisation extensions), or in EL1.
- Caches, MMUs
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index b72ff17d600a..a8f30963e550 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -259,6 +259,11 @@ HWCAP2_RPRES
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
+HWCAP2_MTE3
+
+ Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
+ by Documentation/arm64/memory-tagging-extension.rst.
+
4. Unused AT_HWCAP bits
-----------------------
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 7b99c8f428eb..dd27f78d7608 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -76,6 +76,9 @@ configurable behaviours:
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
address is unknown).
+- *Asymmetric* - Reads are handled as for synchronous mode while writes
+ are handled as for asynchronous mode.
+
The user can select the above modes, per thread, using the
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
contains any number of the following values in the ``PR_MTE_TCF_MASK``
@@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple
modes are specified, the mode is selected as described in the "Per-CPU
preferred tag checking modes" section below.
-The current tag check fault mode can be read using the
-``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
+The current tag check fault configuration can be read using the
+``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
+multiple modes were requested then all will be reported.
Tag checking can also be disabled for a user thread by setting the
``PSTATE.TCO`` bit with ``MSR TCO, #1``.
@@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode.
The preferred tag checking mode for each CPU is controlled by
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
-privileged user may write the value ``async`` or ``sync``. The default
-preferred mode for each CPU is ``async``.
+privileged user may write the value ``async``, ``sync`` or ``asymm``. The
+default preferred mode for each CPU is ``async``.
To allow a program to potentially run in the CPU's preferred tag
checking mode, the user program may set multiple tag check fault mode
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
-flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
-mode is in the task's set of provided tag checking modes (this will
-always be the case at present because the kernel only supports two
-tag checking modes, but future kernels may support more modes), that
-mode will be selected. Otherwise, one of the modes in the task's mode
-set will be selected in a currently unspecified manner.
+flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
+modes are requested then asymmetric mode may also be selected by the
+kernel. If the CPU's preferred tag checking mode is in the task's set
+of provided tag checking modes, that mode will be selected. Otherwise,
+one of the modes in the task's mode will be selected by the kernel
+from the task's mode set using the preference order:
+
+ 1. Asynchronous
+ 2. Asymmetric
+ 3. Synchronous
+
+Note that there is no way for userspace to request multiple modes and
+also disable asymmetric mode.
Initial process state
---------------------
@@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the
Documentation/arm64/tagged-address-abi.rst and above. The corresponding
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
+Core dump support
+-----------------
+
+The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
+in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
+program header for such segment is defined as:
+
+:``p_type``: ``PT_ARM_MEMTAG_MTE``
+:``p_flags``: 0
+:``p_offset``: segment file offset
+:``p_vaddr``: segment virtual address, same as the corresponding
+ ``PT_LOAD`` segment
+:``p_paddr``: 0
+:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
+ (two 4-bit tags cover 32 bytes of memory)
+:``p_memsz``: segment size in memory, same as the corresponding
+ ``PT_LOAD`` segment
+:``p_align``: 0
+
+The tags are stored in the core file at ``p_offset`` as two 4-bit tags
+in a byte. With the tag granule of 16 bytes, a 4K page requires 128
+bytes in the core file.
+
Example of correct usage
========================
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 5342e895fb60..466cb9e89047 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -52,6 +52,12 @@ stable kernels.
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
@@ -92,12 +98,20 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
@@ -122,7 +136,7 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
+----------------+-----------------+-----------------+-----------------------------+
-| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
+| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX GICv3 | #38539 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst
index f4bf0f6395fb..a64f2ca469d4 100644
--- a/Documentation/asm-annotations.rst
+++ b/Documentation/asm-annotations.rst
@@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
and ``ENDPROC`` macros.
-* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
- who decided to have two or more names for one function. The typical use is::
+* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
+ be used to define multiple names for a function. The typical use is::
- SYM_FUNC_START_ALIAS(__memset)
- SYM_FUNC_START(memset)
+ SYM_FUNC_START(__memset)
... asm insns ...
- SYM_FUNC_END(memset)
- SYM_FUNC_END_ALIAS(__memset)
+ SYN_FUNC_END(__memset)
+ SYM_FUNC_ALIAS(memset, __memset)
In this example, one can call ``__memset`` or ``memset`` with the same
result, except the debug information for the instructions is generated to
diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
deleted file mode 100644
index 2098477851a4..000000000000
--- a/Documentation/block/biodoc.rst
+++ /dev/null
@@ -1,1164 +0,0 @@
-=====================================================
-Notes on the Generic Block Layer Rewrite in Linux 2.5
-=====================================================
-
-.. note::
-
- It seems that there are lot of outdated stuff here. This seems
- to be written somewhat as a task list. Yet, eventually, something
- here might still be useful.
-
-Notes Written on Jan 15, 2002:
-
- - Jens Axboe <jens.axboe@oracle.com>
- - Suparna Bhattacharya <suparna@in.ibm.com>
-
-Last Updated May 2, 2002
-
-September 2003: Updated I/O Scheduler portions
- - Nick Piggin <npiggin@kernel.dk>
-
-Introduction
-============
-
-These are some notes describing some aspects of the 2.5 block layer in the
-context of the bio rewrite. The idea is to bring out some of the key
-changes and a glimpse of the rationale behind those changes.
-
-Please mail corrections & suggestions to suparna@in.ibm.com.
-
-Credits
-=======
-
-2.5 bio rewrite:
- - Jens Axboe <jens.axboe@oracle.com>
-
-Many aspects of the generic block layer redesign were driven by and evolved
-over discussions, prior patches and the collective experience of several
-people. See sections 8 and 9 for a list of some related references.
-
-The following people helped with review comments and inputs for this
-document:
-
- - Christoph Hellwig <hch@infradead.org>
- - Arjan van de Ven <arjanv@redhat.com>
- - Randy Dunlap <rdunlap@xenotime.net>
- - Andre Hedrick <andre@linux-ide.org>
-
-The following people helped with fixes/contributions to the bio patches
-while it was still work-in-progress:
-
- - David S. Miller <davem@redhat.com>
-
-
-.. Description of Contents:
-
- 1. Scope for tuning of logic to various needs
- 1.1 Tuning based on device or low level driver capabilities
- - Per-queue parameters
- - Highmem I/O support
- - I/O scheduler modularization
- 1.2 Tuning based on high level requirements/capabilities
- 1.2.1 Request Priority/Latency
- 1.3 Direct access/bypass to lower layers for diagnostics and special
- device operations
- 1.3.1 Pre-built commands
- 2. New flexible and generic but minimalist i/o structure or descriptor
- (instead of using buffer heads at the i/o layer)
- 2.1 Requirements/Goals addressed
- 2.2 The bio struct in detail (multi-page io unit)
- 2.3 Changes in the request structure
- 3. Using bios
- 3.1 Setup/teardown (allocation, splitting)
- 3.2 Generic bio helper routines
- 3.2.1 Traversing segments and completion units in a request
- 3.2.2 Setting up DMA scatterlists
- 3.2.3 I/O completion
- 3.2.4 Implications for drivers that do not interpret bios (don't handle
- multiple segments)
- 3.3 I/O submission
- 4. The I/O scheduler
- 5. Scalability related changes
- 5.1 Granular locking: Removal of io_request_lock
- 5.2 Prepare for transition to 64 bit sector_t
- 6. Other Changes/Implications
- 6.1 Partition re-mapping handled by the generic block layer
- 7. A few tips on migration of older drivers
- 8. A list of prior/related/impacted patches/ideas
- 9. Other References/Discussion Threads
-
-
-Bio Notes
-=========
-
-Let us discuss the changes in the context of how some overall goals for the
-block layer are addressed.
-
-1. Scope for tuning the generic logic to satisfy various requirements
-=====================================================================
-
-The block layer design supports adaptable abstractions to handle common
-processing with the ability to tune the logic to an appropriate extent
-depending on the nature of the device and the requirements of the caller.
-One of the objectives of the rewrite was to increase the degree of tunability
-and to enable higher level code to utilize underlying device/driver
-capabilities to the maximum extent for better i/o performance. This is
-important especially in the light of ever improving hardware capabilities
-and application/middleware software designed to take advantage of these
-capabilities.
-
-1.1 Tuning based on low level device / driver capabilities
-----------------------------------------------------------
-
-Sophisticated devices with large built-in caches, intelligent i/o scheduling
-optimizations, high memory DMA support, etc may find some of the
-generic processing an overhead, while for less capable devices the
-generic functionality is essential for performance or correctness reasons.
-Knowledge of some of the capabilities or parameters of the device should be
-used at the generic block layer to take the right decisions on
-behalf of the driver.
-
-How is this achieved ?
-
-Tuning at a per-queue level:
-
-i. Per-queue limits/values exported to the generic layer by the driver
-
-Various parameters that the generic i/o scheduler logic uses are set at
-a per-queue level (e.g maximum request size, maximum number of segments in
-a scatter-gather list, logical block size)
-
-Some parameters that were earlier available as global arrays indexed by
-major/minor are now directly associated with the queue. Some of these may
-move into the block device structure in the future. Some characteristics
-have been incorporated into a queue flags field rather than separate fields
-in themselves. There are blk_queue_xxx functions to set the parameters,
-rather than update the fields directly
-
-Some new queue property settings:
-
- blk_queue_bounce_limit(q, u64 dma_address)
- Enable I/O to highmem pages, dma_address being the
- limit. No highmem default.
-
- blk_queue_max_sectors(q, max_sectors)
- Sets two variables that limit the size of the request.
-
- - The request queue's max_sectors, which is a soft size in
- units of 512 byte sectors, and could be dynamically varied
- by the core kernel.
-
- - The request queue's max_hw_sectors, which is a hard limit
- and reflects the maximum size request a driver can handle
- in units of 512 byte sectors.
-
- The default for both max_sectors and max_hw_sectors is
- 255. The upper limit of max_sectors is 1024.
-
- blk_queue_max_phys_segments(q, max_segments)
- Maximum physical segments you can handle in a request. 128
- default (driver limit). (See 3.2.2)
-
- blk_queue_max_hw_segments(q, max_segments)
- Maximum dma segments the hardware can handle in a request. 128
- default (host adapter limit, after dma remapping).
- (See 3.2.2)
-
- blk_queue_max_segment_size(q, max_seg_size)
- Maximum size of a clustered segment, 64kB default.
-
- blk_queue_logical_block_size(q, logical_block_size)
- Lowest possible sector size that the hardware can operate
- on, 512 bytes default.
-
-New queue flags:
-
- - QUEUE_FLAG_CLUSTER (see 3.2.2)
- - QUEUE_FLAG_QUEUED (see 3.2.4)
-
-
-ii. High-mem i/o capabilities are now considered the default
-
-The generic bounce buffer logic, present in 2.4, where the block layer would
-by default copyin/out i/o requests on high-memory buffers to low-memory buffers
-assuming that the driver wouldn't be able to handle it directly, has been
-changed in 2.5. The bounce logic is now applied only for memory ranges
-for which the device cannot handle i/o. A driver can specify this by
-setting the queue bounce limit for the request queue for the device
-(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
-where a device is capable of handling high memory i/o.
-
-In order to enable high-memory i/o where the device is capable of supporting
-it, the pci dma mapping routines and associated data structures have now been
-modified to accomplish a direct page -> bus translation, without requiring
-a virtual address mapping (unlike the earlier scheme of virtual address
--> bus translation). So this works uniformly for high-memory pages (which
-do not have a corresponding kernel virtual address space mapping) and
-low-memory pages.
-
-Note: Please refer to Documentation/core-api/dma-api-howto.rst for a discussion
-on PCI high mem DMA aspects and mapping of scatter gather lists, and support
-for 64 bit PCI.
-
-Special handling is required only for cases where i/o needs to happen on
-pages at physical memory addresses beyond what the device can support. In these
-cases, a bounce bio representing a buffer from the supported memory range
-is used for performing the i/o with copyin/copyout as needed depending on
-the type of the operation. For example, in case of a read operation, the
-data read has to be copied to the original buffer on i/o completion, so a
-callback routine is set up to do this, while for write, the data is copied
-from the original buffer to the bounce buffer prior to issuing the
-operation. Since an original buffer may be in a high memory area that's not
-mapped in kernel virtual addr, a kmap operation may be required for
-performing the copy, and special care may be needed in the completion path
-as it may not be in irq context. Special care is also required (by way of
-GFP flags) when allocating bounce buffers, to avoid certain highmem
-deadlock possibilities.
-
-It is also possible that a bounce buffer may be allocated from high-memory
-area that's not mapped in kernel virtual addr, but within the range that the
-device can use directly; so the bounce page may need to be kmapped during
-copy operations. [Note: This does not hold in the current implementation,
-though]
-
-There are some situations when pages from high memory may need to
-be kmapped, even if bounce buffers are not necessary. For example a device
-may need to abort DMA operations and revert to PIO for the transfer, in
-which case a virtual mapping of the page is required. For SCSI it is also
-done in some scenarios where the low level driver cannot be trusted to
-handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions as appropriate. A driver could also use
-the blk_queue_bounce() routine on its own to bounce highmem i/o to low
-memory for specific requests if so desired.
-
-iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
-
-As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
-queue or pick from (copy) existing generic schedulers and replace/override
-certain portions of it. The 2.5 rewrite provides improved modularization
-of the i/o scheduler. There are more pluggable callbacks, e.g for init,
-add request, extract request, which makes it possible to abstract specific
-i/o scheduling algorithm aspects and details outside of the generic loop.
-It also makes it possible to completely hide the implementation details of
-the i/o scheduler from block drivers.
-
-I/O scheduler wrappers are to be used instead of accessing the queue directly.
-See section 4. The I/O scheduler for details.
-
-1.2 Tuning Based on High level code capabilities
-------------------------------------------------
-
-i. Application capabilities for raw i/o
-
-This comes from some of the high-performance database/middleware
-requirements where an application prefers to make its own i/o scheduling
-decisions based on an understanding of the access patterns and i/o
-characteristics
-
-ii. High performance filesystems or other higher level kernel code's
-capabilities
-
-Kernel components like filesystems could also take their own i/o scheduling
-decisions for optimizing performance. Journalling filesystems may need
-some control over i/o ordering.
-
-What kind of support exists at the generic block layer for this ?
-
-The flags and rw fields in the bio structure can be used for some tuning
-from above e.g indicating that an i/o is just a readahead request, or priority
-settings (currently unused). As far as user applications are concerned they
-would need an additional mechanism either via open flags or ioctls, or some
-other upper level mechanism to communicate such settings to block.
-
-1.2.1 Request Priority/Latency
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Todo/Under discussion::
-
- Arjan's proposed request priority scheme allows higher levels some broad
- control (high/med/low) over the priority of an i/o request vs other pending
- requests in the queue. For example it allows reads for bringing in an
- executable page on demand to be given a higher priority over pending write
- requests which haven't aged too much on the queue. Potentially this priority
- could even be exposed to applications in some manner, providing higher level
- tunability. Time based aging avoids starvation of lower priority
- requests. Some bits in the bi_opf flags field in the bio structure are
- intended to be used for this priority information.
-
-
-1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
------------------------------------------------------------------------
-
-(e.g Diagnostics, Systems Management)
-
-There are situations where high-level code needs to have direct access to
-the low level device capabilities or requires the ability to issue commands
-to the device bypassing some of the intermediate i/o layers.
-These could, for example, be special control commands issued through ioctl
-interfaces, or could be raw read/write commands that stress the drive's
-capabilities for certain kinds of fitness tests. Having direct interfaces at
-multiple levels without having to pass through upper layers makes
-it possible to perform bottom up validation of the i/o path, layer by
-layer, starting from the media.
-
-The normal i/o submission interfaces, e.g submit_bio, could be bypassed
-for specially crafted requests which such ioctl or diagnostics
-interfaces would typically use, and the elevator add_request routine
-can instead be used to directly insert such requests in the queue or preferably
-the blk_do_rq routine can be used to place the request on the queue and
-wait for completion. Alternatively, sometimes the caller might just
-invoke a lower level driver specific interface with the request as a
-parameter.
-
-If the request is a means for passing on special information associated with
-the command, then such information is associated with the request->special
-field (rather than misuse the request->buffer field which is meant for the
-request data buffer's virtual mapping).
-
-For passing request data, the caller must build up a bio descriptor
-representing the concerned memory buffer if the underlying driver interprets
-bio segments or uses the block layer end*request* functions for i/o
-completion. Alternatively one could directly use the request->buffer field to
-specify the virtual address of the buffer, if the driver expects buffer
-addresses passed in this way and ignores bio entries for the request type
-involved. In the latter case, the driver would modify and manage the
-request->buffer, request->sector and request->nr_sectors or
-request->current_nr_sectors fields itself rather than using the block layer
-end_request or end_that_request_first completion interfaces.
-(See 2.3 or Documentation/block/request.rst for a brief explanation of
-the request structure fields)
-
-::
-
- [TBD: end_that_request_last should be usable even in this case;
- Perhaps an end_that_direct_request_first routine could be implemented to make
- handling direct requests easier for such drivers; Also for drivers that
- expect bios, a helper function could be provided for setting up a bio
- corresponding to a data buffer]
-
- <JENS: I dont understand the above, why is end_that_request_first() not
- usable? Or _last for that matter. I must be missing something>
-
- <SUP: What I meant here was that if the request doesn't have a bio, then
- end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
- and hence can't be used for advancing request state settings on the
- completion of partial transfers. The driver has to modify these fields
- directly by hand.
- This is because end_that_request_first only iterates over the bio list,
- and always returns 0 if there are none associated with the request.
- _last works OK in this case, and is not a problem, as I mentioned earlier
- >
-
-1.3.1 Pre-built Commands
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-A request can be created with a pre-built custom command to be sent directly
-to the device. The cmd block in the request structure has room for filling
-in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
-command pre-building, and the type of the request is now indicated
-through rq->flags instead of via rq->cmd)
-
-The request structure flags can be set up to indicate the type of request
-in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
-packet command issued via blk_do_rq, REQ_SPECIAL: special request).
-
-It can help to pre-build device commands for requests in advance.
-Drivers can now specify a request prepare function (q->prep_rq_fn) that the
-block layer would invoke to pre-build device commands for a given request,
-or perform other preparatory processing for the request. This is routine is
-called by elv_next_request(), i.e. typically just before servicing a request.
-(The prepare function would not be called for requests that have RQF_DONTPREP
-enabled)
-
-Aside:
- Pre-building could possibly even be done early, i.e before placing the
- request on the queue, rather than construct the command on the fly in the
- driver while servicing the request queue when it may affect latencies in
- interrupt context or responsiveness in general. One way to add early
- pre-building would be to do it whenever we fail to merge on a request.
- Now REQ_NOMERGE is set in the request flags to skip this one in the future,
- which means that it will not change before we feed it to the device. So
- the pre-builder hook can be invoked there.
-
-
-2. Flexible and generic but minimalist i/o structure/descriptor
-===============================================================
-
-2.1 Reason for a new structure and requirements addressed
----------------------------------------------------------
-
-Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
-layer, and the low level request structure was associated with a chain of
-buffer heads for a contiguous i/o request. This led to certain inefficiencies
-when it came to large i/o requests and readv/writev style operations, as it
-forced such requests to be broken up into small chunks before being passed
-on to the generic block layer, only to be merged by the i/o scheduler
-when the underlying device was capable of handling the i/o in one shot.
-Also, using the buffer head as an i/o structure for i/os that didn't originate
-from the buffer cache unnecessarily added to the weight of the descriptors
-which were generated for each such chunk.
-
-The following were some of the goals and expectations considered in the
-redesign of the block i/o data structure in 2.5.
-
-1. Should be appropriate as a descriptor for both raw and buffered i/o -
- avoid cache related fields which are irrelevant in the direct/page i/o path,
- or filesystem block size alignment restrictions which may not be relevant
- for raw i/o.
-2. Ability to represent high-memory buffers (which do not have a virtual
- address mapping in kernel address space).
-3. Ability to represent large i/os w/o unnecessarily breaking them up (i.e
- greater than PAGE_SIZE chunks in one shot)
-4. At the same time, ability to retain independent identity of i/os from
- different sources or i/o units requiring individual completion (e.g. for
- latency reasons)
-5. Ability to represent an i/o involving multiple physical memory segments
- (including non-page aligned page fragments, as specified via readv/writev)
- without unnecessarily breaking it up, if the underlying device is capable of
- handling it.
-6. Preferably should be based on a memory descriptor structure that can be
- passed around different types of subsystems or layers, maybe even
- networking, without duplication or extra copies of data/descriptor fields
- themselves in the process
-7. Ability to handle the possibility of splits/merges as the structure passes
- through layered drivers (lvm, md, evms), with minimal overhead.
-
-The solution was to define a new structure (bio) for the block layer,
-instead of using the buffer head structure (bh) directly, the idea being
-avoidance of some associated baggage and limitations. The bio structure
-is uniformly used for all i/o at the block layer ; it forms a part of the
-bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
-mapped to bio structures.
-
-2.2 The bio struct
-------------------
-
-The bio structure uses a vector representation pointing to an array of tuples
-of <page, offset, len> to describe the i/o buffer, and has various other
-fields describing i/o parameters and state that needs to be maintained for
-performing the i/o.
-
-Notice that this representation means that a bio has no virtual address
-mapping at all (unlike buffer heads).
-
-::
-
- struct bio_vec {
- struct page *bv_page;
- unsigned short bv_len;
- unsigned short bv_offset;
- };
-
- /*
- * main unit of I/O for the block layer and lower layers (ie drivers)
- */
- struct bio {
- struct bio *bi_next; /* request queue link */
- struct block_device *bi_bdev; /* target device */
- unsigned long bi_flags; /* status, command, etc */
- unsigned long bi_opf; /* low bits: r/w, high: priority */
-
- unsigned int bi_vcnt; /* how may bio_vec's */
- struct bvec_iter bi_iter; /* current index into bio_vec array */
-
- unsigned int bi_size; /* total size in bytes */
- unsigned short bi_hw_segments; /* segments after DMA remapping */
- unsigned int bi_max; /* max bio_vecs we can hold
- used as index into pool */
- struct bio_vec *bi_io_vec; /* the actual vec list */
- bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
- atomic_t bi_cnt; /* pin count: free when it hits zero */
- void *bi_private;
- };
-
-With this multipage bio design:
-
-- Large i/os can be sent down in one go using a bio_vec list consisting
- of an array of <page, offset, len> fragments (similar to the way fragments
- are represented in the zero-copy network code)
-- Splitting of an i/o request across multiple devices (as in the case of
- lvm or raid) is achieved by cloning the bio (where the clone points to
- the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges [#]_ - this
- avoids reallocs and makes independent completions easier to handle.
-- Code that traverses the req list can find all the segments of a bio
- by using rq_for_each_segment. This handles the fact that a request
- has multiple bios, each of which can have multiple segments.
-- Drivers which can't process a large bio in one shot can use the bi_iter
- field to keep track of the next bio_vec entry to process.
- (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
- [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
- bi_offset an len fields]
-
-.. [#]
-
- unrelated merges -- a request ends up containing two or more bios that
- didn't originate from the same place.
-
-bi_end_io() i/o callback gets called on i/o completion of the entire bio.
-
-At a lower level, drivers build a scatter gather list from the merged bios.
-The scatter gather list is in the form of an array of <page, offset, len>
-entries with their corresponding dma address mappings filled in at the
-appropriate time. As an optimization, contiguous physical pages can be
-covered by a single entry where <page> refers to the first page and <len>
-covers the range of pages (up to 16 contiguous pages could be covered this
-way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
-the sg list.
-
-Note: Right now the only user of bios with more than one page is ll_rw_kio,
-which in turn means that only raw I/O uses it (direct i/o may not work
-right now). The intent however is to enable clustering of pages etc to
-become possible. The pagebuf abstraction layer from SGI also uses multi-page
-bios, but that is currently not included in the stock development kernels.
-The same is true of Andrew Morton's work-in-progress multipage bio writeout
-and readahead patches.
-
-2.3 Changes in the Request Structure
-------------------------------------
-
-The request structure is the structure that gets passed down to low level
-drivers. The block layer make_request function builds up a request structure,
-places it on the queue and invokes the drivers request_fn. The driver makes
-use of block layer helper routine elv_next_request to pull the next request
-off the queue. Control or diagnostic functions might bypass block and directly
-invoke underlying driver entry points passing in a specially constructed
-request structure.
-
-Only some relevant fields (mainly those which changed or may be referred
-to in some of the discussion here) are listed below, not necessarily in
-the order in which they occur in the structure (see include/linux/blkdev.h)
-Refer to Documentation/block/request.rst for details about all the request
-structure fields and a quick reference about the layers which are
-supposed to use or modify those fields::
-
- struct request {
- struct list_head queuelist; /* Not meant to be directly accessed by
- the driver.
- Used by q->elv_next_request_fn
- rq->queue is gone
- */
- .
- .
- unsigned char cmd[16]; /* prebuilt command data block */
- unsigned long flags; /* also includes earlier rq->cmd settings */
- .
- .
- sector_t sector; /* this field is now of type sector_t instead of int
- preparation for 64 bit sectors */
- .
- .
-
- /* Number of scatter-gather DMA addr+len pairs after
- * physical address coalescing is performed.
- */
- unsigned short nr_phys_segments;
-
- /* Number of scatter-gather addr+len pairs after
- * physical and DMA remapping hardware coalescing is performed.
- * This is the number of scatter-gather entries the driver
- * will actually have to deal with after DMA mapping is done.
- */
- unsigned short nr_hw_segments;
-
- /* Various sector counts */
- unsigned long nr_sectors; /* no. of sectors left: driver modifiable */
- unsigned long hard_nr_sectors; /* block internal copy of above */
- unsigned int current_nr_sectors; /* no. of sectors left in the
- current segment:driver modifiable */
- unsigned long hard_cur_sectors; /* block internal copy of the above */
- .
- .
- int tag; /* command tag associated with request */
- void *special; /* same as before */
- char *buffer; /* valid only for low memory buffers up to
- current_nr_sectors */
- .
- .
- struct bio *bio, *biotail; /* bio list instead of bh */
- struct request_list *rl;
- }
-
-See the req_ops and req_flag_bits definitions for an explanation of the various
-flags available. Some bits are used by the block layer or i/o scheduler.
-
-The behaviour of the various sector counts are almost the same as before,
-except that since we have multi-segment bios, current_nr_sectors refers
-to the numbers of sectors in the current segment being processed which could
-be one of the many segments in the current bio (i.e i/o completion unit).
-The nr_sectors value refers to the total number of sectors in the whole
-request that remain to be transferred (no change). The purpose of the
-hard_xxx values is for block to remember these counts every time it hands
-over the request to the driver. These values are updated by block on
-end_that_request_first, i.e. every time the driver completes a part of the
-transfer and invokes block end*request helpers to mark this. The
-driver should not modify these values. The block layer sets up the
-nr_sectors and current_nr_sectors fields (based on the corresponding
-hard_xxx values and the number of bytes transferred) and updates it on
-every transfer that invokes end_that_request_first. It does the same for the
-buffer, bio, bio->bi_iter fields too.
-
-The buffer field is just a virtual address mapping of the current segment
-of the i/o buffer in cases where the buffer resides in low-memory. For high
-memory i/o, this field is not valid and must not be used by drivers.
-
-Code that sets up its own request structures and passes them down to
-a driver needs to be careful about interoperation with the block layer helper
-functions which the driver uses. (Section 1.3)
-
-3. Using bios
-=============
-
-3.1 Setup/Teardown
-------------------
-
-There are routines for managing the allocation, and reference counting, and
-freeing of bios (bio_alloc, bio_get, bio_put).
-
-This makes use of Ingo Molnar's mempool implementation, which enables
-subsystems like bio to maintain their own reserve memory pools for guaranteed
-deadlock-free allocations during extreme VM load. For example, the VM
-subsystem makes use of the block layer to writeout dirty pages in order to be
-able to free up memory space, a case which needs careful handling. The
-allocation logic draws from the preallocated emergency reserve in situations
-where it cannot allocate through normal means. If the pool is empty and it
-can wait, then it would trigger action that would help free up memory or
-replenish the pool (without deadlocking) and wait for availability in the pool.
-If it is in IRQ context, and hence not in a position to do this, allocation
-could fail if the pool is empty. In general mempool always first tries to
-perform allocation without having to wait, even if it means digging into the
-pool as long it is not less that 50% full.
-
-On a free, memory is released to the pool or directly freed depending on
-the current availability in the pool. The mempool interface lets the
-subsystem specify the routines to be used for normal alloc and free. In the
-case of bio, these routines make use of the standard slab allocator.
-
-The caller of bio_alloc is expected to taken certain steps to avoid
-deadlocks, e.g. avoid trying to allocate more memory from the pool while
-already holding memory obtained from the pool.
-
-::
-
- [TBD: This is a potential issue, though a rare possibility
- in the bounce bio allocation that happens in the current code, since
- it ends up allocating a second bio from the same pool while
- holding the original bio ]
-
-Memory allocated from the pool should be released back within a limited
-amount of time (in the case of bio, that would be after the i/o is completed).
-This ensures that if part of the pool has been used up, some work (in this
-case i/o) must already be in progress and memory would be available when it
-is over. If allocating from multiple pools in the same code path, the order
-or hierarchy of allocation needs to be consistent, just the way one deals
-with multiple locks.
-
-The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
-for a non-clone bio. There are the 6 pools setup for different size biovecs,
-so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
-given size from these slabs.
-
-The bio_get() routine may be used to hold an extra reference on a bio prior
-to i/o submission, if the bio fields are likely to be accessed after the
-i/o is issued (since the bio may otherwise get freed in case i/o completion
-happens in the meantime).
-
-The bio_clone_fast() routine may be used to duplicate a bio, where the clone
-shares the bio_vec_list with the original bio (i.e. both point to the
-same bio_vec_list). This would typically be used for splitting i/o requests
-in lvm or md.
-
-3.2 Generic bio helper Routines
--------------------------------
-
-3.2.1 Traversing segments and completion units in a request
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The macro rq_for_each_segment() should be used for traversing the bios
-in the request list (drivers should avoid directly trying to do it
-themselves). Using these helpers should also make it easier to cope
-with block changes in the future.
-
-::
-
- struct req_iterator iter;
- rq_for_each_segment(bio_vec, rq, iter)
- /* bio_vec is now current segment */
-
-I/O completion callbacks are per-bio rather than per-segment, so drivers
-that traverse bio chains on completion need to keep that in mind. Drivers
-which don't make a distinction between segments and completion units would
-need to be reorganized to support multi-segment bios.
-
-3.2.2 Setting up DMA scatterlists
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The blk_rq_map_sg() helper routine would be used for setting up scatter
-gather lists from a request, so a driver need not do it on its own.
-
- nr_segments = blk_rq_map_sg(q, rq, scatterlist);
-
-The helper routine provides a level of abstraction which makes it easier
-to modify the internals of request to scatterlist conversion down the line
-without breaking drivers. The blk_rq_map_sg routine takes care of several
-things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
-is set) and correct segment accounting to avoid exceeding the limits which
-the i/o hardware can handle, based on various queue properties.
-
-- Prevents a clustered segment from crossing a 4GB mem boundary
-- Avoids building segments that would exceed the number of physical
- memory segments that the driver can handle (phys_segments) and the
- number that the underlying hardware can handle at once, accounting for
- DMA remapping (hw_segments) (i.e. IOMMU aware limits).
-
-Routines which the low level driver can use to set up the segment limits:
-
-blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
-hw data segments in a request (i.e. the maximum number of address/length
-pairs the host adapter can actually hand to the device at once)
-
-blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
-of physical data segments in a request (i.e. the largest sized scatter list
-a driver could handle)
-
-3.2.3 I/O completion
-^^^^^^^^^^^^^^^^^^^^
-
-The existing generic block layer helper routines end_request,
-end_that_request_first and end_that_request_last can be used for i/o
-completion (and setting things up so the rest of the i/o or the next
-request can be kicked of) as before. With the introduction of multi-page
-bio support, end_that_request_first requires an additional argument indicating
-the number of sectors completed.
-
-3.2.4 Implications for drivers that do not interpret bios
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-(don't handle multiple segments)
-
-Drivers that do not interpret bios e.g those which do not handle multiple
-segments and do not support i/o into high memory addresses (require bounce
-buffers) and expect only virtually mapped buffers, can access the rq->buffer
-field. As before the driver should use current_nr_sectors to determine the
-size of remaining data in the current segment (that is the maximum it can
-transfer in one go unless it interprets segments), and rely on the block layer
-end_request, or end_that_request_first/last to take care of all accounting
-and transparent mapping of the next bio segment when a segment boundary
-is crossed on completion of a transfer. (The end*request* functions should
-be used if only if the request has come down from block/bio path, not for
-direct access requests which only specify rq->buffer without a valid rq->bio)
-
-3.3 I/O Submission
-------------------
-
-The routine submit_bio() is used to submit a single io. Higher level i/o
-routines make use of this:
-
-(a) Buffered i/o:
-
-The routine submit_bh() invokes submit_bio() on a bio corresponding to the
-bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
-
-(b) Kiobuf i/o (for raw/direct i/o):
-
-The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
-maps the array to one or more multi-page bios, issuing submit_bio() to
-perform the i/o on each of these.
-
-The embedded bh array in the kiobuf structure has been removed and no
-preallocation of bios is done for kiobufs. [The intent is to remove the
-blocks array as well, but it's currently in there to kludge around direct i/o.]
-Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
-
-Todo/Observation:
-
- A single kiobuf structure is assumed to correspond to a contiguous range
- of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
- So right now it wouldn't work for direct i/o on non-contiguous blocks.
- This is to be resolved. The eventual direction is to replace kiobuf
- by kvec's.
-
- Badari Pulavarty has a patch to implement direct i/o correctly using
- bio and kvec.
-
-
-(c) Page i/o:
-
-Todo/Under discussion:
-
- Andrew Morton's multi-page bio patches attempt to issue multi-page
- writeouts (and reads) from the page cache, by directly building up
- large bios for submission completely bypassing the usage of buffer
- heads. This work is still in progress.
-
- Christoph Hellwig had some code that uses bios for page-io (rather than
- bh). This isn't included in bio as yet. Christoph was also working on a
- design for representing virtual/real extents as an entity and modifying
- some of the address space ops interfaces to utilize this abstraction rather
- than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
- abstraction, but intended to be as lightweight as possible).
-
-(d) Direct access i/o:
-
-Direct access requests that do not contain bios would be submitted differently
-as discussed earlier in section 1.3.
-
-Aside:
-
- Kvec i/o:
-
- Ben LaHaise's aio code uses a slightly different structure instead
- of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
- tuples (very much like the networking code), together with a callback function
- and data pointer. This is embedded into a brw_cb structure when passed
- to brw_kvec_async().
-
- Now it should be possible to directly map these kvecs to a bio. Just as while
- cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
- array pointer to point to the veclet array in kvecs.
-
- TBD: In order for this to work, some changes are needed in the way multi-page
- bios are handled today. The values of the tuples in such a vector passed in
- from higher level code should not be modified by the block layer in the course
- of its request processing, since that would make it hard for the higher layer
- to continue to use the vector descriptor (kvec) after i/o completes. Instead,
- all such transient state should either be maintained in the request structure,
- and passed on in some way to the endio completion routine.
-
-
-4. The I/O scheduler
-====================
-
-I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
-queue and specific I/O schedulers. Unless stated otherwise, elevator is used
-to refer to both parts and I/O scheduler to specific I/O schedulers.
-
-Block layer implements generic dispatch queue in `block/*.c`.
-The generic dispatch queue is responsible for requeueing, handling non-fs
-requests and all other subtleties.
-
-Specific I/O schedulers are responsible for ordering normal filesystem
-requests. They can also choose to delay certain requests to improve
-throughput or whatever purpose. As the plural form indicates, there are
-multiple I/O schedulers. They can be built as modules but at least one should
-be built inside the kernel. Each queue can choose different one and can also
-change to another one dynamically.
-
-A block layer call to the i/o scheduler follows the convention elv_xxx(). This
-calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
-and xxx might not match exactly, but use your imagination. If an elevator
-doesn't implement a function, the switch does nothing or some minimal house
-keeping work.
-
-4.1. I/O scheduler API
-----------------------
-
-The functions an elevator may implement are: (* are mandatory)
-
-=============================== ================================================
-elevator_merge_fn called to query requests for merge with a bio
-
-elevator_merge_req_fn called when two requests get merged. the one
- which gets merged into the other one will be
- never seen by I/O scheduler again. IOW, after
- being merged, the request is gone.
-
-elevator_merged_fn called when a request in the scheduler has been
- involved in a merge. It is used in the deadline
- scheduler for example, to reposition the request
- if its sorting order has changed.
-
-elevator_allow_merge_fn called whenever the block layer determines
- that a bio can be merged into an existing
- request safely. The io scheduler may still
- want to stop a merge at this point if it
- results in some sort of conflict internally,
- this hook allows it to do that. Note however
- that two *requests* can still be merged at later
- time. Currently the io scheduler has no way to
- prevent that. It can only learn about the fact
- from elevator_merge_req_fn callback.
-
-elevator_dispatch_fn* fills the dispatch queue with ready requests.
- I/O schedulers are free to postpone requests by
- not filling the dispatch queue unless @force
- is non-zero. Once dispatched, I/O schedulers
- are not allowed to manipulate the requests -
- they belong to generic dispatch queue.
-
-elevator_add_req_fn* called to add a new request into the scheduler
-
-elevator_former_req_fn
-elevator_latter_req_fn These return the request before or after the
- one specified in disk sort order. Used by the
- block layer to find merge possibilities.
-
-elevator_completed_req_fn called when a request is completed.
-
-elevator_set_req_fn
-elevator_put_req_fn Must be used to allocate and free any elevator
- specific storage for a request.
-
-elevator_activate_req_fn Called when device driver first sees a request.
- I/O schedulers can use this callback to
- determine when actual execution of a request
- starts.
-elevator_deactivate_req_fn Called when device driver decides to delay
- a request by requeueing it.
-
-elevator_init_fn*
-elevator_exit_fn Allocate and free any elevator specific storage
- for a queue.
-=============================== ================================================
-
-4.2 Request flows seen by I/O schedulers
-----------------------------------------
-
-All requests seen by I/O schedulers strictly follow one of the following three
-flows.
-
- set_req_fn ->
-
- i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
- (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
- ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn
- iii. [none]
-
- -> put_req_fn
-
-4.3 I/O scheduler implementation
---------------------------------
-
-The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
-optimal disk scan and request servicing performance (based on generic
-principles and device capabilities), optimized for:
-
-i. improved throughput
-ii. improved latency
-iii. better utilization of h/w & CPU time
-
-Characteristics:
-
-i. Binary tree
-AS and deadline i/o schedulers use red black binary trees for disk position
-sorting and searching, and a fifo linked list for time-based searching. This
-gives good scalability and good availability of information. Requests are
-almost always dispatched in disk sort order, so a cache is kept of the next
-request in sort order to prevent binary tree lookups.
-
-This arrangement is not a generic block layer characteristic however, so
-elevators may implement queues as they please.
-
-ii. Merge hash
-AS and deadline use a hash table indexed by the last sector of a request. This
-enables merging code to quickly look up "back merge" candidates, even when
-multiple I/O streams are being performed at once on one disk.
-
-"Front merges", a new request being merged at the front of an existing request,
-are far less common than "back merges" due to the nature of most I/O patterns.
-Front merges are handled by the binary trees in AS and deadline schedulers.
-
-iii. Plugging the queue to batch requests in anticipation of opportunities for
- merge/sort optimizations
-
-Plugging is an approach that the current i/o scheduling algorithm resorts to so
-that it collects up enough requests in the queue to be able to take
-advantage of the sorting/merging logic in the elevator. If the
-queue is empty when a request comes in, then it plugs the request queue
-(sort of like plugging the bath tub of a vessel to get fluid to build up)
-till it fills up with a few more requests, before starting to service
-the requests. This provides an opportunity to merge/sort the requests before
-passing them down to the device. There are various conditions when the queue is
-unplugged (to open up the flow again), either through a scheduled task or
-could be on demand. For example wait_on_buffer sets the unplugging going
-through sync_buffer() running blk_run_address_space(mapping). Or the caller
-can do it explicity through blk_unplug(bdev). So in the read case,
-the queue gets explicitly unplugged as part of waiting for completion on that
-buffer.
-
-Aside:
- This is kind of controversial territory, as it's not clear if plugging is
- always the right thing to do. Devices typically have their own queues,
- and allowing a big queue to build up in software, while letting the device be
- idle for a while may not always make sense. The trick is to handle the fine
- balance between when to plug and when to open up. Also now that we have
- multi-page bios being queued in one shot, we may not need to wait to merge
- a big request from the broken up pieces coming by.
-
-4.4 I/O contexts
-----------------
-
-I/O contexts provide a dynamically allocated per process data area. They may
-be used in I/O schedulers, and in the block layer (could be used for IO statis,
-priorities for example). See `*io_context` in block/ll_rw_blk.c, and as-iosched.c
-for an example of usage in an i/o scheduler.
-
-
-5. Scalability related changes
-==============================
-
-5.1 Granular Locking: io_request_lock replaced by a per-queue lock
-------------------------------------------------------------------
-
-The global io_request_lock has been removed as of 2.5, to avoid
-the scalability bottleneck it was causing, and has been replaced by more
-granular locking. The request queue structure has a pointer to the
-lock to be used for that queue. As a result, locking can now be
-per-queue, with a provision for sharing a lock across queues if
-necessary (e.g the scsi layer sets the queue lock pointers to the
-corresponding adapter lock, which results in a per host locking
-granularity). The locking semantics are the same, i.e. locking is
-still imposed by the block layer, grabbing the lock before
-request_fn execution which it means that lots of older drivers
-should still be SMP safe. Drivers are free to drop the queue
-lock themselves, if required. Drivers that explicitly used the
-io_request_lock for serialization need to be modified accordingly.
-Usually it's as easy as adding a global lock::
-
- static DEFINE_SPINLOCK(my_driver_lock);
-
-and passing the address to that lock to blk_init_queue().
-
-5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
-----------------------------------------------------------------
-
-The sector number used in the bio structure has been changed to sector_t,
-which could be defined as 64 bit in preparation for 64 bit sector support.
-
-6. Other Changes/Implications
-=============================
-
-6.1 Partition re-mapping handled by the generic block layer
------------------------------------------------------------
-
-In 2.5 some of the gendisk/partition related code has been reorganized.
-Now the generic block layer performs partition-remapping early and thus
-provides drivers with a sector number relative to whole device, rather than
-having to take partition number into account in order to arrive at the true
-sector number. The routine blk_partition_remap() is invoked by
-submit_bio_noacct even before invoking the queue specific ->submit_bio,
-so the i/o scheduler also gets to operate on whole disk sector numbers. This
-should typically not require changes to block drivers, it just never gets
-to invoke its own partition sector offset calculations since all bios
-sent are offset from the beginning of the device.
-
-
-7. A Few Tips on Migration of older drivers
-===========================================
-
-Old-style drivers that just use CURRENT and ignores clustered requests,
-may not need much change. The generic layer will automatically handle
-clustered requests, multi-page bios, etc for the driver.
-
-For a low performance driver or hardware that is PIO driven or just doesn't
-support scatter-gather changes should be minimal too.
-
-The following are some points to keep in mind when converting old drivers
-to bio.
-
-Drivers should use elv_next_request to pick up requests and are no longer
-supposed to handle looping directly over the request list.
-(struct request->queue has been removed)
-
-Now end_that_request_first takes an additional number_of_sectors argument.
-It used to handle always just the first buffer_head in a request, now
-it will loop and handle as many sectors (on a bio-segment granularity)
-as specified.
-
-Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
-right thing to use is bio_endio(bio) instead.
-
-If the driver is dropping the io_request_lock from its request_fn strategy,
-then it just needs to replace that with q->queue_lock instead.
-
-As described in Sec 1.1, drivers can set max sector size, max segment size
-etc per queue now. Drivers that used to define their own merge functions i
-to handle things like this can now just use the blk_queue_* functions at
-blk_init_queue time.
-
-Drivers no longer have to map a {partition, sector offset} into the
-correct absolute location anymore, this is done by the block layer, so
-where a driver received a request ala this before::
-
- rq->rq_dev = mk_kdev(3, 5); /* /dev/hda5 */
- rq->sector = 0; /* first sector on hda5 */
-
-it will now see::
-
- rq->rq_dev = mk_kdev(3, 0); /* /dev/hda */
- rq->sector = 123128; /* offset from start of disk */
-
-As mentioned, there is no virtual mapping of a bio. For DMA, this is
-not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (dma_map_page for a single segment or
-use dma_map_sg for scatter gather) to be able to ship it to the driver. For
-PIO drivers (or drivers that need to revert to PIO transfer once in a
-while (IDE for example)), where the CPU is doing the actual data
-transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map
-a bio into the virtual address space.
-
-
-8. Prior/Related/Impacted patches
-=================================
-
-8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
------------------------------------------------------
-
-- orig kiobuf & raw i/o patches (now in 2.4 tree)
-- direct kiobuf based i/o to devices (no intermediate bh's)
-- page i/o using kiobuf
-- kiobuf splitting for lvm (mkp)
-- elevator support for kiobuf request merging (axboe)
-
-8.2. Zero-copy networking (Dave Miller)
----------------------------------------
-
-8.3. SGI XFS - pagebuf patches - use of kiobufs
------------------------------------------------
-8.4. Multi-page pioent patch for bio (Christoph Hellwig)
---------------------------------------------------------
-8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
---------------------------------------------------------------------
-8.6. Async i/o implementation patch (Ben LaHaise)
--------------------------------------------------
-8.7. EVMS layering design (IBM EVMS team)
------------------------------------------
-8.8. Larger page cache size patch (Ben LaHaise) and Large page size (Daniel Phillips)
--------------------------------------------------------------------------------------
-
- => larger contiguous physical memory buffers
-
-8.9. VM reservations patch (Ben LaHaise)
-----------------------------------------
-8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
-----------------------------------------------------------
-8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
----------------------------------------------------------------------------
-8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar, Badari)
--------------------------------------------------------------------------------
-8.13 Priority based i/o scheduler - prepatches (Arjan van de Ven)
-------------------------------------------------------------------
-8.14 IDE Taskfile i/o patch (Andre Hedrick)
---------------------------------------------
-8.15 Multi-page writeout and readahead patches (Andrew Morton)
----------------------------------------------------------------
-8.16 Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
------------------------------------------------------------------------
-
-9. Other References
-===================
-
-9.1 The Splice I/O Model
-------------------------
-
-Larry McVoy (and subsequent discussions on lkml, and Linus' comments - Jan 2001
-
-9.2 Discussions about kiobuf and bh design
-------------------------------------------
-
-On lkml between sct, linus, alan et al - Feb-March 2001 (many of the
-initial thoughts that led to bio were brought up in this discussion thread)
-
-9.3 Discussions on mempool on lkml - Dec 2001.
-----------------------------------------------
diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
index 160a5148b915..2ae7f064736a 100644
--- a/Documentation/block/capability.rst
+++ b/Documentation/block/capability.rst
@@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
``capability`` is a bitfield, printed in hexadecimal, indicating which
capabilities a specific block device supports:
-.. kernel-doc:: include/linux/genhd.h
+.. kernel-doc:: include/linux/blkdev.h
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index 3a41495dd77b..68f115f2b1c6 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -8,7 +8,6 @@ Block
:maxdepth: 1
bfq-iosched
- biodoc
biovecs
blk-mq
capability
diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst
index c5c957195a5a..43db58c50d29 100644
--- a/Documentation/cdrom/packet-writing.rst
+++ b/Documentation/cdrom/packet-writing.rst
@@ -11,7 +11,7 @@ Getting started quick
- Compile and install kernel and modules, reboot.
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
- Download from http://sourceforge.net/projects/linux-udf/
+ Download from https://github.com/pali/udftools
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
as appropriate)::
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
+this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
"pktcdvd" works similar to "pktsetup", e.g.::
diff --git a/Documentation/conf.py b/Documentation/conf.py
index f07f2e9b9f2c..072ee31a301d 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -409,135 +409,25 @@ latex_elements = {
# Additional stuff for the LaTeX preamble.
'preamble': '''
- % Prevent column squeezing of tabulary.
- \\setlength{\\tymin}{20em}
% Use some font with UTF-8 support with XeLaTeX
\\usepackage{fontspec}
\\setsansfont{DejaVu Sans}
\\setromanfont{DejaVu Serif}
\\setmonofont{DejaVu Sans Mono}
- % Adjust \\headheight for fancyhdr
- \\addtolength{\\headheight}{1.6pt}
- \\addtolength{\\topmargin}{-1.6pt}
- ''',
+ ''',
}
-# Translations have Asian (CJK) characters which are only displayed if
-# xeCJK is used
-
-latex_elements['preamble'] += '''
- \\IfFontExistsTF{Noto Sans CJK SC}{
- % This is needed for translations
- \\usepackage{xeCJK}
- \\IfFontExistsTF{Noto Serif CJK SC}{
- \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
- }{
- \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
- }
- \\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
- \\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
- % CJK Language-specific font choices
- \\IfFontExistsTF{Noto Serif CJK SC}{
- \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
- \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
- }{
- \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
- \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
- }
- \\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
- \\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
- \\IfFontExistsTF{Noto Serif CJK TC}{
- \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
- \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
- }{
- \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
- \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
- }
- \\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
- \\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
- \\IfFontExistsTF{Noto Serif CJK KR}{
- \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
- \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
- }{
- \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
- \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
- }
- \\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
- \\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
- \\IfFontExistsTF{Noto Serif CJK JP}{
- \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
- \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
- }{
- \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
- \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
- }
- \\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
- \\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
- % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
- \\providecommand{\\onehalfspacing}{}
- \\providecommand{\\singlespacing}{}
- % Define custom macros to on/off CJK
- \\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
- \\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
- \\newcommand{\\kerneldocBeginSC}{%
- \\begingroup%
- \\scmain%
- }
- \\newcommand{\\kerneldocEndSC}{\\endgroup}
- \\newcommand{\\kerneldocBeginTC}{%
- \\begingroup%
- \\tcmain%
- \\renewcommand{\\CJKrmdefault}{TCserif}%
- \\renewcommand{\\CJKsfdefault}{TCsans}%
- \\renewcommand{\\CJKttdefault}{TCmono}%
- }
- \\newcommand{\\kerneldocEndTC}{\\endgroup}
- \\newcommand{\\kerneldocBeginKR}{%
- \\begingroup%
- \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
- \\xeCJKDeclareCharClass{HalfRight}{`â€,`’}%
- \\krmain%
- \\renewcommand{\\CJKrmdefault}{KRserif}%
- \\renewcommand{\\CJKsfdefault}{KRsans}%
- \\renewcommand{\\CJKttdefault}{KRmono}%
- \\xeCJKsetup{CJKspace = true} % For inter-phrase space
- }
- \\newcommand{\\kerneldocEndKR}{\\endgroup}
- \\newcommand{\\kerneldocBeginJP}{%
- \\begingroup%
- \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
- \\xeCJKDeclareCharClass{HalfRight}{`â€,`’}%
- \\jpmain%
- \\renewcommand{\\CJKrmdefault}{JPserif}%
- \\renewcommand{\\CJKsfdefault}{JPsans}%
- \\renewcommand{\\CJKttdefault}{JPmono}%
- }
- \\newcommand{\\kerneldocEndJP}{\\endgroup}
- % Single spacing in literal blocks
- \\fvset{baselinestretch=1}
- % To customize \\sphinxtableofcontents
- \\usepackage{etoolbox}
- % Inactivate CJK after tableofcontents
- \\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
- }{ % No CJK font found
- % Custom macros to on/off CJK (Dummy)
- \\newcommand{\\kerneldocCJKon}{}
- \\newcommand{\\kerneldocCJKoff}{}
- \\newcommand{\\kerneldocBeginSC}{}
- \\newcommand{\\kerneldocEndSC}{}
- \\newcommand{\\kerneldocBeginTC}{}
- \\newcommand{\\kerneldocEndTC}{}
- \\newcommand{\\kerneldocBeginKR}{}
- \\newcommand{\\kerneldocEndKR}{}
- \\newcommand{\\kerneldocBeginJP}{}
- \\newcommand{\\kerneldocEndJP}{}
- }
-'''
-
# Fix reference escape troubles with Sphinx 1.4.x
if major == 1:
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
+
+# Load kerneldoc specific LaTeX settings
+latex_elements['preamble'] += '''
+ % Load kerneldoc specific LaTeX settings
+ \\input{kerneldoc-preamble.sty}
+'''
+
# With Sphinx 1.6, it is possible to change the Bg color directly
# by using:
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
@@ -599,6 +489,11 @@ for fn in os.listdir('.'):
# If false, no module index is generated.
#latex_domain_indices = True
+# Additional LaTeX stuff to be copied to build directory
+latex_additional_files = [
+ 'sphinx/kerneldoc-preamble.sty',
+]
+
# -- Options for manual page output ---------------------------------------
diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
new file mode 100644
index 000000000000..e12f22ab33c7
--- /dev/null
+++ b/Documentation/core-api/entry.rst
@@ -0,0 +1,279 @@
+Entry/exit handling for exceptions, interrupts, syscalls and KVM
+================================================================
+
+All transitions between execution domains require state updates which are
+subject to strict ordering constraints. State updates are required for the
+following:
+
+ * Lockdep
+ * RCU / Context tracking
+ * Preemption counter
+ * Tracing
+ * Time accounting
+
+The update order depends on the transition type and is explained below in
+the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
+exceptions`_, `NMI and NMI-like exceptions`_.
+
+Non-instrumentable code - noinstr
+---------------------------------
+
+Most instrumentation facilities depend on RCU, so intrumentation is prohibited
+for entry code before RCU starts watching and exit code after RCU stops
+watching. In addition, many architectures must save and restore register state,
+which means that (for example) a breakpoint in the breakpoint entry code would
+overwrite the debug registers of the initial breakpoint.
+
+Such code must be marked with the 'noinstr' attribute, placing that code into a
+special section inaccessible to instrumentation and debug facilities. Some
+functions are partially instrumentable, which is handled by marking them
+noinstr and using instrumentation_begin() and instrumentation_end() to flag the
+instrumentable ranges of code:
+
+.. code-block:: c
+
+ noinstr void entry(void)
+ {
+ handle_entry(); // <-- must be 'noinstr' or '__always_inline'
+ ...
+
+ instrumentation_begin();
+ handle_context(); // <-- instrumentable code
+ instrumentation_end();
+
+ ...
+ handle_exit(); // <-- must be 'noinstr' or '__always_inline'
+ }
+
+This allows verification of the 'noinstr' restrictions via objtool on
+supported architectures.
+
+Invoking non-instrumentable functions from instrumentable context has no
+restrictions and is useful to protect e.g. state switching which would
+cause malfunction if instrumented.
+
+All non-instrumentable entry/exit code sections before and after the RCU
+state transitions must run with interrupts disabled.
+
+Syscalls
+--------
+
+Syscall-entry code starts in assembly code and calls out into low-level C code
+after establishing low-level architecture-specific state and stack frames. This
+low-level C code must not be instrumented. A typical syscall handling function
+invoked from low-level assembly code looks like this:
+
+.. code-block:: c
+
+ noinstr void syscall(struct pt_regs *regs, int nr)
+ {
+ arch_syscall_enter(regs);
+ nr = syscall_enter_from_user_mode(regs, nr);
+
+ instrumentation_begin();
+ if (!invoke_syscall(regs, nr) && nr != -1)
+ result_reg(regs) = __sys_ni_syscall(regs);
+ instrumentation_end();
+
+ syscall_exit_to_user_mode(regs);
+ }
+
+syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
+establishes state in the following order:
+
+ * Lockdep
+ * RCU / Context tracking
+ * Tracing
+
+and then invokes the various entry work functions like ptrace, seccomp, audit,
+syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
+function can be invoked. The instrumentable code section then ends, after which
+syscall_exit_to_user_mode() is invoked.
+
+syscall_exit_to_user_mode() handles all work which needs to be done before
+returning to user space like tracing, audit, signals, task work etc. After
+that it invokes exit_to_user_mode() which again handles the state
+transition in the reverse order:
+
+ * Tracing
+ * RCU / Context tracking
+ * Lockdep
+
+syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
+available as fine grained subfunctions in cases where the architecture code
+has to do extra work between the various steps. In such cases it has to
+ensure that enter_from_user_mode() is called first on entry and
+exit_to_user_mode() is called last on exit.
+
+Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
+to print a warning.
+
+KVM
+---
+
+Entering or exiting guest mode is very similar to syscalls. From the host
+kernel point of view the CPU goes off into user space when entering the
+guest and returns to the kernel on exit.
+
+kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
+and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
+The state operations have the same ordering.
+
+Task work handling is done separately for guest at the boundary of the
+vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
+the work handled on return to user space.
+
+Do not nest KVM entry/exit transitions because doing so is nonsensical.
+
+Interrupts and regular exceptions
+---------------------------------
+
+Interrupts entry and exit handling is slightly more complex than syscalls
+and KVM transitions.
+
+If an interrupt is raised while the CPU executes in user space, the entry
+and exit handling is exactly the same as for syscalls.
+
+If the interrupt is raised while the CPU executes in kernel space the entry and
+exit handling is slightly different. RCU state is only updated when the
+interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
+already be watching. Lockdep and tracing have to be updated unconditionally.
+
+irqentry_enter() and irqentry_exit() provide the implementation for this.
+
+The architecture-specific part looks similar to syscall handling:
+
+.. code-block:: c
+
+ noinstr void interrupt(struct pt_regs *regs, int nr)
+ {
+ arch_interrupt_enter(regs);
+ state = irqentry_enter(regs);
+
+ instrumentation_begin();
+
+ irq_enter_rcu();
+ invoke_irq_handler(regs, nr);
+ irq_exit_rcu();
+
+ instrumentation_end();
+
+ irqentry_exit(regs, state);
+ }
+
+Note that the invocation of the actual interrupt handler is within a
+irq_enter_rcu() and irq_exit_rcu() pair.
+
+irq_enter_rcu() updates the preemption count which makes in_hardirq()
+return true, handles NOHZ tick state and interrupt time accounting. This
+means that up to the point where irq_enter_rcu() is invoked in_hardirq()
+returns false.
+
+irq_exit_rcu() handles interrupt time accounting, undoes the preemption
+count update and eventually handles soft interrupts and NOHZ tick state.
+
+In theory, the preemption count could be updated in irqentry_enter(). In
+practice, deferring this update to irq_enter_rcu() allows the preemption-count
+code to be traced, while also maintaining symmetry with irq_exit_rcu() and
+irqentry_exit(), which are described in the next paragraph. The only downside
+is that the early entry code up to irq_enter_rcu() must be aware that the
+preemption count has not yet been updated with the HARDIRQ_OFFSET state.
+
+Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
+before it handles soft interrupts, whose handlers must run in BH context rather
+than irq-disabled context. In addition, irqentry_exit() might schedule, which
+also requires that HARDIRQ_OFFSET has been removed from the preemption count.
+
+Even though interrupt handlers are expected to run with local interrupts
+disabled, interrupt nesting is common from an entry/exit perspective. For
+example, softirq handling happens within an irqentry_{enter,exit}() block with
+local interrupts enabled. Also, although uncommon, nothing prevents an
+interrupt handler from re-enabling interrupts.
+
+Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
+runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
+the entry code is shared between the two.
+
+NMI and NMI-like exceptions
+---------------------------
+
+NMIs and NMI-like exceptions (machine checks, double faults, debug
+interrupts, etc.) can hit any context and must be extra careful with
+the state.
+
+State changes for debug exceptions and machine-check exceptions depend on
+whether these exceptions happened in user-space (breakpoints or watchpoints) or
+in kernel mode (code patching). From user-space, they are treated like
+interrupts, while from kernel mode they are treated like NMIs.
+
+NMIs and other NMI-like exceptions handle state transitions without
+distinguishing between user-mode and kernel-mode origin.
+
+The state update on entry is handled in irqentry_nmi_enter() which updates
+state in the following order:
+
+ * Preemption counter
+ * Lockdep
+ * RCU / Context tracking
+ * Tracing
+
+The exit counterpart irqentry_nmi_exit() does the reverse operation in the
+reverse order.
+
+Note that the update of the preemption counter has to be the first
+operation on enter and the last operation on exit. The reason is that both
+lockdep and RCU rely on in_nmi() returning true in this case. The
+preemption count modification in the NMI entry/exit case must not be
+traced.
+
+Architecture-specific code looks like this:
+
+.. code-block:: c
+
+ noinstr void nmi(struct pt_regs *regs)
+ {
+ arch_nmi_enter(regs);
+ state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+ nmi_handler(regs);
+ instrumentation_end();
+
+ irqentry_nmi_exit(regs);
+ }
+
+and for e.g. a debug exception it can look like this:
+
+.. code-block:: c
+
+ noinstr void debug(struct pt_regs *regs)
+ {
+ arch_nmi_enter(regs);
+
+ debug_regs = save_debug_regs();
+
+ if (user_mode(regs)) {
+ state = irqentry_enter(regs);
+
+ instrumentation_begin();
+ user_mode_debug_handler(regs, debug_regs);
+ instrumentation_end();
+
+ irqentry_exit(regs, state);
+ } else {
+ state = irqentry_nmi_enter(regs);
+
+ instrumentation_begin();
+ kernel_mode_debug_handler(regs, debug_regs);
+ instrumentation_end();
+
+ irqentry_nmi_exit(regs, state);
+ }
+ }
+
+There is no combined irqentry_nmi_if_kernel() function available as the
+above cannot be handled in an exception-agnostic way.
+
+NMIs can happen in any context. For example, an NMI-like exception triggered
+while handling an NMI. So NMI entry code has to be reentrant and state updates
+need to handle nesting.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 5de2c7a4b1b3..972d46a5ddf6 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
timekeeping
errseq
+Low level entry and exit
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ entry
+
Concurrency primitives
======================
diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 395835f9289f..f5b2f92822c8 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -58,15 +58,30 @@ Virtually Contiguous Mappings
File Mapping and Page Cache
===========================
-.. kernel-doc:: mm/readahead.c
- :export:
+Filemap
+-------
.. kernel-doc:: mm/filemap.c
:export:
+Readahead
+---------
+
+.. kernel-doc:: mm/readahead.c
+ :doc: Readahead Overview
+
+.. kernel-doc:: mm/readahead.c
+ :export:
+
+Writeback
+---------
+
.. kernel-doc:: mm/page-writeback.c
:export:
+Truncate
+--------
+
.. kernel-doc:: mm/truncate.c
:export:
diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index fcf605be43d0..b18416f4500f 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct
pages* array, and the function then pins pages by incrementing each by a special
value: GUP_PIN_COUNTING_BIAS.
-For huge pages (and in fact, any compound page of more than 2 pages), the
-GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
-is achieved, by using the 3rd struct page in the compound page. A new struct
-page field, hpage_pinned_refcount, has been added in order to support this.
+For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
+an exact form of pin counting is achieved, by using the 2nd struct page
+in the compound page. A new struct page field, compound_pincount, has
+been added in order to support this.
This approach for compound pages avoids the counting upper limit problems that
are discussed below. Those limitations would have been aggravated severely by
huge pages, because each tail page adds a refcount to the head page. And in
-fact, testing revealed that, without a separate hpage_pinned_refcount field,
+fact, testing revealed that, without a separate compound_pincount field,
page overflows were seen in some huge page stress tests.
-This also means that huge pages and compound pages (of order > 1) do not suffer
+This also means that huge pages and compound pages do not suffer
from the false positives problem that is mentioned below.::
Function
@@ -264,9 +264,9 @@ place.)
Other diagnostics
=================
-dump_page() has been enhanced slightly, to handle these new counting fields, and
-to better report on compound pages in general. Specifically, for compound pages
-with order > 1, the exact (hpage_pinned_refcount) pincount is reported.
+dump_page() has been enhanced slightly, to handle these new counting
+fields, and to better report on compound pages in general. Specifically,
+for compound pages, the exact (compound_pincount) pincount is reported.
References
==========
diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst
index 3b32336a7803..d84ededb66f9 100644
--- a/Documentation/cpu-freq/cpu-drivers.rst
+++ b/Documentation/cpu-freq/cpu-drivers.rst
@@ -75,6 +75,9 @@ And optionally
.resume - A pointer to a per-policy resume function which is called
with interrupts disabled and _before_ the governor is started again.
+ .ready - A pointer to a per-policy ready function which is called after
+ the policy is fully initialized.
+
.attr - A pointer to a NULL-terminated list of "struct freq_attr" which
allow to export values to sysfs.
diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
index ac6b89d1a8c3..936f6aaa75c8 100644
--- a/Documentation/dev-tools/kfence.rst
+++ b/Documentation/dev-tools/kfence.rst
@@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
disables KFENCE.
+The sample interval controls a timer that sets up KFENCE allocations. By
+default, to keep the real sample interval predictable, the normal timer also
+causes CPU wake-ups when the system is completely idle. This may be undesirable
+on power-constrained systems. The boot parameter ``kfence.deferrable=1``
+instead switches to a "deferrable" timer which does not force CPU wake-ups on
+idle systems, at the risk of unpredictable sample intervals. The default is
+configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
+
+.. warning::
+ The KUnit test suite is very likely to fail when using a deferrable timer
+ since it currently causes very unpredictable sample intervals.
+
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
255), the number of available guarded objects can be controlled. Each object
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index dcefee707ccd..a833ecf12fbc 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -7,6 +7,14 @@ directory. These are intended to be small tests to exercise individual code
paths in the kernel. Tests are intended to be run after building, installing
and booting a kernel.
+Kselftest from mainline can be run on older stable kernels. Running tests
+from mainline offers the best coverage. Several test rings run mainline
+kselftest suite on stable releases. The reason is that when a new test
+gets added to test existing code to regression test a bug, we should be
+able to run that test on an older kernel. Hence, it is important to keep
+code that can still test an older kernel and make sure it skips the test
+gracefully on newer releases.
+
You can find additional information on Kselftest framework, how to
write new tests using the framework on Kselftest wiki:
diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
index 878530cb9c27..5ee735c6687f 100644
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-========================================
-The Kernel Test Anything Protocol (KTAP)
-========================================
+===================================================
+The Kernel Test Anything Protocol (KTAP), version 1
+===================================================
TAP, or the Test Anything Protocol is a format for specifying test results used
by a number of projects. It's website and specification are found at this `link
@@ -68,7 +68,7 @@ Test case result lines
Test case result lines indicate the final status of a test.
They are required and must have the format:
-.. code-block::
+.. code-block:: none
<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
@@ -117,32 +117,32 @@ separator.
Example result lines include:
-.. code-block::
+.. code-block:: none
ok 1 test_case_name
The test "test_case_name" passed.
-.. code-block::
+.. code-block:: none
not ok 1 test_case_name
The test "test_case_name" failed.
-.. code-block::
+.. code-block:: none
ok 1 test # SKIP necessary dependency unavailable
The test "test" was SKIPPED with the diagnostic message "necessary dependency
unavailable".
-.. code-block::
+.. code-block:: none
not ok 1 test # TIMEOUT 30 seconds
The test "test" timed out, with diagnostic data "30 seconds".
-.. code-block::
+.. code-block:: none
ok 5 check return code # rcode=0
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
the four formats for lines described above. This is allowed, however, they will
not influence the status of the tests.
+This is an important difference from TAP. Kernel tests may print messages
+to the system console or a log file. Both of these destinations may contain
+messages either from unrelated kernel or userspace activity, or kernel
+messages from non-test code that is invoked by the test. The kernel code
+invoked by the test likely is not aware that a test is in progress and
+thus can not print the message as a diagnostic message.
+
Nested tests
------------
@@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall
result. If one of the subtests fail, for example, the parent test should also
fail.
-Additionally, all result lines in a subtest should be indented. One level of
+Additionally, all lines in a subtest should be indented. One level of
indentation is two spaces: " ". The indentation should begin at the version
line and should end before the parent test's result line.
+"Unknown lines" are not considered to be lines in a subtest and thus are
+allowed to be either indented or not indented.
+
An example of a test with two nested subtests:
-.. code-block::
+.. code-block:: none
KTAP version 1
1..1
@@ -205,7 +215,7 @@ An example of a test with two nested subtests:
An example format with multiple levels of nested testing:
-.. code-block::
+.. code-block:: none
KTAP version 1
1..2
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
Major differences between TAP and KTAP
--------------------------------------
-Note the major differences between the TAP and KTAP specification:
-- yaml and json are not recommended in diagnostic messages
-- TODO directive not recognized
-- KTAP allows for an arbitrary number of tests to be nested
+================================================== ========= ===============
+Feature TAP KTAP
+================================================== ========= ===============
+yaml and json in diagnosic message ok not recommended
+TODO directive ok not recognized
+allows an arbitrary number of tests to be nested no yes
+"Unknown lines" are in category of "Anything else" yes no
+"Unknown lines" are incorrect allowed
+================================================== ========= ===============
The TAP14 specification does permit nested tests, but instead of using another
nested version line, uses a line of the form
@@ -235,7 +250,7 @@ nested version line, uses a line of the form
Example KTAP output
--------------------
-.. code-block::
+.. code-block:: none
KTAP version 1
1..1
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 76af931a332c..1c83e7d60a8a 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -242,7 +242,7 @@ example:
int rectangle_area(struct shape *this)
{
- struct rectangle *self = container_of(this, struct shape, parent);
+ struct rectangle *self = container_of(this, struct rectangle, parent);
return self->length * self->width;
};
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
index c612e1f48dba..ff91df04f9f4 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.yaml
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml
@@ -8,7 +8,8 @@ title: Atmel AT91 device tree bindings.
maintainers:
- Alexandre Belloni <alexandre.belloni@bootlin.com>
- - Ludovic Desroches <ludovic.desroches@microchip.com>
+ - Claudiu Beznea <claudiu.beznea@microchip.com>
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
description: |
Boards with a SoC of the Atmel AT91 or SMART family shall have the following
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
index b5cb374dc47d..10a91cc8b997 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
@@ -8,7 +8,7 @@ Required properties:
- compatible: Should contain a chip-specific compatible string,
Chip-specific strings are of the form "fsl,<chip>-dcfg",
The following <chip>s are known to be supported:
- ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+ ls1012a, ls1021a, ls1043a, ls1046a, ls2080a, lx2160a
- reg : should contain base address and length of DCFG memory-mapped registers
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index e77635c5422c..fa8b31660cad 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -119,6 +119,9 @@ Boards (incomplete list of examples):
- OMAP3 BeagleBoard : Low cost community board
compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
+- OMAP3 BeagleBoard A to B4 : Early BeagleBoard revisions A to B4 with a timer quirk
+ compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
+
- OMAP3 Tobi with Overo : Commercial expansion board with daughter board
compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3"
diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index 981bac451698..7a04b8aaaec3 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -20,6 +20,8 @@ properties:
items:
- enum:
- apm,potenza-pmu
+ - apple,firestorm-pmu
+ - apple,icestorm-pmu
- arm,armv8-pmuv3 # Only for s/w models
- arm,arm1136-pmu
- arm,arm1176-pmu
diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index 370aab274cd1..04ff0b55bb85 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -48,7 +48,6 @@ description: |
sdx65
sm7225
sm8150
- sdx65
sm8250
sm8350
sm8450
@@ -230,11 +229,6 @@ properties:
- items:
- enum:
- - qcom,sdx65-mtp
- - const: qcom,sdx65
-
- - items:
- - enum:
- qcom,ipq6018-cp01
- qcom,ipq6018-cp01-c1
- const: qcom,ipq6018
diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
index f7d48f23da44..10119d9ef4b1 100644
--- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt
+++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
@@ -44,6 +44,7 @@ Required properties:
* "fsl,ls1046a-clockgen"
* "fsl,ls1088a-clockgen"
* "fsl,ls2080a-clockgen"
+ * "fsl,lx2160a-clockgen"
Chassis-version clock strings include:
* "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks
* "fsl,qoriq-clockgen-2.0": for chassis 2.0 clocks
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
index 1d3e88daca04..25b5ef3f759c 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -91,22 +91,7 @@ properties:
$ref: /schemas/graph.yaml#/$defs/port-base
unevaluatedProperties: false
description:
- MIPI DSI/DPI input.
-
- properties:
- endpoint:
- $ref: /schemas/media/video-interfaces.yaml#
- type: object
- additionalProperties: false
-
- properties:
- remote-endpoint: true
-
- bus-type:
- enum: [1, 5]
- default: 1
-
- data-lanes: true
+ Video port for MIPI DSI input.
port@1:
$ref: /schemas/graph.yaml#/properties/port
@@ -155,8 +140,6 @@ examples:
reg = <0>;
anx7625_in: endpoint {
remote-endpoint = <&mipi_dsi>;
- bus-type = <5>;
- data-lanes = <0 1 2 3>;
};
};
diff --git a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml
new file mode 100644
index 000000000000..f9ffe3d6f957
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/extcon/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC Extcon
+
+maintainers:
+ - Chanwoo Choi <cw00.choi@samsung.com>
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX77843 MicroUSB
+ Integrated Circuit (MUIC).
+
+ See also Documentation/devicetree/bindings/mfd/maxim,max77843.yaml for
+ additional information and example.
+
+properties:
+ compatible:
+ const: maxim,max77843-muic
+
+ connector:
+ $ref: /schemas/connector/usb-connector.yaml#
+
+ ports:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+ Any connector to the data bus of this controller should be modelled using
+ the OF graph bindings specified
+ properties:
+ port:
+ $ref: /schemas/graph.yaml#/properties/port
+
+required:
+ - compatible
+ - connector
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
index e04349567eeb..427c5873f96a 100644
--- a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
@@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: SiFive GPIO controller
maintainers:
- - Yash Shah <yash.shah@sifive.com>
- Paul Walmsley <paul.walmsley@sifive.com>
properties:
diff --git a/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml b/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
index 223393d7cafd..ab87f51c5aef 100644
--- a/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
+++ b/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
@@ -37,6 +37,72 @@ properties:
description:
Shunt resistor value in micro-Ohm.
+ adi,volt-curr-sample-average:
+ description: |
+ Number of samples to be used to report voltage and current values.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8, 16, 32, 64, 128]
+
+ adi,power-sample-average:
+ description: |
+ Number of samples to be used to report power values.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8, 16, 32, 64, 128]
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - adi,adm1075
+ - adi,adm1276
+ then:
+ properties:
+ adi,volt-curr-sample-average:
+ default: 128
+ adi,power-sample-average: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - adi,adm1275
+ then:
+ properties:
+ adi,volt-curr-sample-average:
+ default: 16
+ adi,power-sample-average: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - adi,adm1272
+ then:
+ properties:
+ adi,volt-curr-sample-average:
+ default: 128
+ adi,power-sample-average:
+ default: 128
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - adi,adm1278
+ - adi,adm1293
+ - adi,adm1294
+ then:
+ properties:
+ adi,volt-curr-sample-average:
+ default: 128
+ adi,power-sample-average:
+ default: 1
+
required:
- compatible
- reg
@@ -53,5 +119,7 @@ examples:
compatible = "adi,adm1272";
reg = <0x10>;
shunt-resistor-micro-ohms = <500>;
+ adi,volt-curr-sample-average = <128>;
+ adi,power-sample-average = <128>;
};
};
diff --git a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
index 6e1d54ff5d5b..30db92977937 100644
--- a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
+++ b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
@@ -60,7 +60,6 @@ additionalProperties: false
examples:
- |
- #include <dt-bindings/gpio/tegra-gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
i2c {
@@ -71,8 +70,7 @@ examples:
compatible = "onnn,nct1008";
reg = <0x4c>;
vcc-supply = <&palmas_ldo6_reg>;
- interrupt-parent = <&gpio>;
- interrupts = <TEGRA_GPIO(O, 4) IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <4 IRQ_TYPE_LEVEL_LOW>;
#thermal-sensor-cells = <1>;
};
};
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
new file mode 100644
index 000000000000..801ca9ba7d34
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/ti,tmp464.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TMP464 and TMP468 temperature sensors
+
+maintainers:
+ - Agathe Porte <agathe.porte@nokia.com>
+
+description: |
+ ±0.0625°C Remote and Local temperature sensor
+ https://www.ti.com/lit/ds/symlink/tmp464.pdf
+ https://www.ti.com/lit/ds/symlink/tmp468.pdf
+
+properties:
+ compatible:
+ enum:
+ - ti,tmp464
+ - ti,tmp468
+
+ reg:
+ maxItems: 1
+
+ '#address-cells':
+ const: 1
+
+ '#size-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+patternProperties:
+ "^channel@([0-8])$":
+ type: object
+ description: |
+ Represents channels of the device and their specific configuration.
+
+ properties:
+ reg:
+ description: |
+ The channel number. 0 is local channel, 1-8 are remote channels.
+ items:
+ minimum: 0
+ maximum: 8
+
+ label:
+ description: |
+ A descriptive name for this channel, like "ambient" or "psu".
+
+ ti,n-factor:
+ description: |
+ The value (two's complement) to be programmed in the channel specific N correction register.
+ For remote channels only.
+ $ref: /schemas/types.yaml#/definitions/int32
+ items:
+ minimum: -128
+ maximum: 127
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sensor@4b {
+ compatible = "ti,tmp464";
+ reg = <0x4b>;
+ };
+ };
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sensor@4b {
+ compatible = "ti,tmp464";
+ reg = <0x4b>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ channel@0 {
+ reg = <0x0>;
+ label = "local";
+ };
+
+ channel@1 {
+ reg = <0x1>;
+ ti,n-factor = <(-10)>;
+ label = "external";
+ };
+
+ channel@2 {
+ reg = <0x2>;
+ ti,n-factor = <0x10>;
+ label = "somelabel";
+ };
+
+ channel@3 {
+ reg = <0x3>;
+ status = "disabled";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 23b18b92c558..bde63f8f090e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -18,6 +18,7 @@ Required properties:
"amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
"amlogic,meson-sm1-gpio-intc" for SM1 SoCs (S905D3, S905X3, S905Y3)
"amlogic,meson-a1-gpio-intc" for A1 SoCs (A113L)
+ "amlogic,meson-s4-gpio-intc" for S4 SoCs (S802X2, S905Y4, S805X2G, S905W2)
- reg : Specifies base physical address and size of the registers.
- interrupt-controller : Identifies the node as an interrupt controller.
- #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
index 97359024709a..85c85b694217 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@ -56,6 +56,8 @@ properties:
- 1: virtual HV timer
- 2: physical guest timer
- 3: virtual guest timer
+ - 4: 'efficient' CPU PMU
+ - 5: 'performance' CPU PMU
The 3rd cell contains the interrupt flags. This is normally
IRQ_TYPE_LEVEL_HIGH (4).
@@ -68,6 +70,35 @@ properties:
power-domains:
maxItems: 1
+ affinities:
+ type: object
+ additionalProperties: false
+ description:
+ FIQ affinity can be expressed as a single "affinities" node,
+ containing a set of sub-nodes, one per FIQ with a non-default
+ affinity.
+ patternProperties:
+ "^.+-affinity$":
+ type: object
+ additionalProperties: false
+ properties:
+ apple,fiq-index:
+ description:
+ The interrupt number specified as a FIQ, and for which
+ the affinity is not the default.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ maximum: 5
+
+ cpus:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ Should be a list of phandles to CPU nodes (as described in
+ Documentation/devicetree/bindings/arm/cpus.yaml).
+
+ required:
+ - fiq-index
+ - cpus
+
required:
- compatible
- '#interrupt-cells'
diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
new file mode 100644
index 000000000000..47a78a167aba
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/apple,aic2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple Interrupt Controller 2
+
+maintainers:
+ - Hector Martin <marcan@marcan.st>
+
+description: |
+ The Apple Interrupt Controller 2 is a simple interrupt controller present on
+ Apple ARM SoC platforms starting with t600x (M1 Pro and Max).
+
+ It provides the following features:
+
+ - Level-triggered hardware IRQs wired to SoC blocks
+ - Single mask bit per IRQ
+ - Automatic masking on event delivery (auto-ack)
+ - Software triggering (ORed with hw line)
+ - Automatic prioritization (single event/ack register per CPU, lower IRQs =
+ higher priority)
+ - Automatic masking on ack
+ - Support for multiple dies
+
+ This device also represents the FIQ interrupt sources on platforms using AIC,
+ which do not go through a discrete interrupt controller. It also handles
+ FIQ-based Fast IPIs.
+
+properties:
+ compatible:
+ items:
+ - const: apple,t6000-aic
+ - const: apple,aic2
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 4
+ description: |
+ The 1st cell contains the interrupt type:
+ - 0: Hardware IRQ
+ - 1: FIQ
+
+ The 2nd cell contains the die ID.
+
+ The next cell contains the interrupt number.
+ - HW IRQs: interrupt number
+ - FIQs:
+ - 0: physical HV timer
+ - 1: virtual HV timer
+ - 2: physical guest timer
+ - 3: virtual guest timer
+
+ The last cell contains the interrupt flags. This is normally
+ IRQ_TYPE_LEVEL_HIGH (4).
+
+ reg:
+ items:
+ - description: Address and size of the main AIC2 registers.
+ - description: Address and size of the AIC2 Event register.
+
+ reg-names:
+ items:
+ - const: core
+ - const: event
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#interrupt-cells'
+ - interrupt-controller
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aic: interrupt-controller@28e100000 {
+ compatible = "apple,t6000-aic", "apple,aic2";
+ #interrupt-cells = <4>;
+ interrupt-controller;
+ reg = <0x2 0x8e100000 0x0 0xc000>,
+ <0x2 0x8e10c000 0x0 0x4>;
+ reg-names = "core", "event";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
new file mode 100644
index 000000000000..509d20c091af
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/qcom,mpm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcom MPM Interrupt Controller
+
+maintainers:
+ - Shawn Guo <shawn.guo@linaro.org>
+
+description:
+ Qualcomm Technologies Inc. SoCs based on the RPM architecture have a
+ MSM Power Manager (MPM) that is in always-on domain. In addition to managing
+ resources during sleep, the hardware also has an interrupt controller that
+ monitors the interrupts when the system is asleep, wakes up the APSS when
+ one of these interrupts occur and replays it to GIC interrupt controller
+ after GIC becomes operational.
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ items:
+ - const: qcom,mpm
+
+ reg:
+ maxItems: 1
+ description:
+ Specifies the base address and size of vMPM registers in RPM MSG RAM.
+
+ interrupts:
+ maxItems: 1
+ description:
+ Specify the IRQ used by RPM to wakeup APSS.
+
+ mboxes:
+ maxItems: 1
+ description:
+ Specify the mailbox used to notify RPM for writing vMPM registers.
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 2
+ description:
+ The first cell is the MPM pin number for the interrupt, and the second
+ is the trigger type.
+
+ qcom,mpm-pin-count:
+ description:
+ Specify the total MPM pin count that a SoC supports.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ qcom,mpm-pin-map:
+ description:
+ A set of MPM pin numbers and the corresponding GIC SPIs.
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ items:
+ items:
+ - description: MPM pin number
+ - description: GIC SPI number for the MPM pin
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - mboxes
+ - interrupt-controller
+ - '#interrupt-cells'
+ - qcom,mpm-pin-count
+ - qcom,mpm-pin-map
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ mpm: interrupt-controller@45f01b8 {
+ compatible = "qcom,mpm";
+ interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
+ reg = <0x45f01b8 0x1000>;
+ mboxes = <&apcs_glb 1>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&intc>;
+ qcom,mpm-pin-count = <96>;
+ qcom,mpm-pin-map = <2 275>,
+ <5 296>,
+ <12 422>,
+ <24 79>,
+ <86 183>,
+ <90 260>,
+ <91 260>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
index 79d0358e2f61..620f01775e42 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
@@ -36,6 +36,7 @@ properties:
- renesas,intc-ex-r8a77980 # R-Car V3H
- renesas,intc-ex-r8a77990 # R-Car E3
- renesas,intc-ex-r8a77995 # R-Car D3
+ - renesas,intc-ex-r8a779a0 # R-Car V3U
- const: renesas,irqc
'#interrupt-cells':
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
index 28b6b17fe4b2..27092c6a86c4 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
@@ -35,6 +35,10 @@ description:
contains a specific memory layout, which is documented in chapter 8 of the
SiFive U5 Coreplex Series Manual <https://static.dev.sifive.com/U54-MC-RVCoreIP.pdf>.
+ The thead,c900-plic is different from sifive,plic-1.0.0 in opensbi, the
+ T-HEAD PLIC implementation requires setting a delegation bit to allow access
+ from S-mode. So add thead,c900-plic to distinguish them.
+
maintainers:
- Sagar Kadam <sagar.kadam@sifive.com>
- Paul Walmsley <paul.walmsley@sifive.com>
@@ -42,12 +46,17 @@ maintainers:
properties:
compatible:
- items:
- - enum:
- - sifive,fu540-c000-plic
- - starfive,jh7100-plic
- - canaan,k210-plic
- - const: sifive,plic-1.0.0
+ oneOf:
+ - items:
+ - enum:
+ - sifive,fu540-c000-plic
+ - starfive,jh7100-plic
+ - canaan,k210-plic
+ - const: sifive,plic-1.0.0
+ - items:
+ - enum:
+ - allwinner,sun20i-d1-plic
+ - const: thead,c900-plic
reg:
maxItems: 1
@@ -62,6 +71,7 @@ properties:
interrupts-extended:
minItems: 1
+ maxItems: 15872
description:
Specifies which contexts are connected to the PLIC, with "-1" specifying
that a context is not present. Each node pointed to should be a
@@ -90,12 +100,11 @@ examples:
#interrupt-cells = <1>;
compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 11
- &cpu1_intc 11 &cpu1_intc 9
- &cpu2_intc 11 &cpu2_intc 9
- &cpu3_intc 11 &cpu3_intc 9
- &cpu4_intc 11 &cpu4_intc 9>;
+ interrupts-extended = <&cpu0_intc 11>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>,
+ <&cpu2_intc 11>, <&cpu2_intc 9>,
+ <&cpu3_intc 11>, <&cpu3_intc 9>,
+ <&cpu4_intc 11>, <&cpu4_intc 9>;
reg = <0xc000000 0x4000000>;
riscv,ndev = <10>;
};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
index d19c881b4abc..e44daa09b137 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
@@ -20,6 +20,7 @@ properties:
- items:
- enum:
- st,stm32mp1-exti
+ - st,stm32mp13-exti
- const: syscon
"#interrupt-cells":
diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
index be4a2df71c25..b85819fbb07c 100644
--- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
+++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
@@ -39,7 +39,7 @@ patternProperties:
'^phy@[a-f0-9]+$':
$ref: ../phy/bcm-ns-usb2-phy.yaml
- '^pin-controller@[a-f0-9]+$':
+ '^pinctrl@[a-f0-9]+$':
$ref: ../pinctrl/brcm,ns-pinmux.yaml
'^syscon@[a-f0-9]+$':
@@ -94,7 +94,7 @@ examples:
reg = <0x180 0x4>;
};
- pin-controller@1c0 {
+ pinctrl@1c0 {
compatible = "brcm,bcm4708-pinmux";
reg = <0x1c0 0x24>;
reg-names = "cru_gpio_control";
diff --git a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
index c00ad3e21c21..ad285cb480c9 100644
--- a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
+++ b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
@@ -126,7 +126,7 @@ properties:
clock-frequency:
const: 12288000
- lochnagar-pinctrl:
+ pinctrl:
type: object
$ref: /schemas/pinctrl/cirrus,lochnagar.yaml#
@@ -255,7 +255,7 @@ required:
- reg
- reset-gpios
- lochnagar-clk
- - lochnagar-pinctrl
+ - pinctrl
additionalProperties: false
@@ -293,7 +293,7 @@ examples:
clock-frequency = <32768>;
};
- lochnagar-pinctrl {
+ pinctrl {
compatible = "cirrus,lochnagar-pinctrl";
gpio-controller;
diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index d1f53bd449f7..4caadf73fc4a 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -31,7 +31,7 @@ properties:
controller-data:
description:
- SPI controller data, see bindings/spi/spi-samsung.txt
+ SPI controller data, see bindings/spi/samsung,spi-peripheral-props.yaml
type: object
google,cros-ec-spi-pre-delay:
@@ -148,18 +148,21 @@ patternProperties:
required:
- compatible
-if:
- properties:
- compatible:
- contains:
- enum:
- - google,cros-ec-i2c
- - google,cros-ec-rpmsg
-then:
- properties:
- google,cros-ec-spi-pre-delay: false
- google,cros-ec-spi-msg-delay: false
- spi-max-frequency: false
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - google,cros-ec-i2c
+ - google,cros-ec-rpmsg
+ then:
+ properties:
+ google,cros-ec-spi-pre-delay: false
+ google,cros-ec-spi-msg-delay: false
+ spi-max-frequency: false
+ else:
+ $ref: /schemas/spi/spi-peripheral-props.yaml
additionalProperties: false
@@ -200,7 +203,7 @@ examples:
spi-max-frequency = <5000000>;
proximity {
- compatible = "google,cros-ec-mkbp-proximity";
+ compatible = "google,cros-ec-mkbp-proximity";
};
cbas {
diff --git a/Documentation/devicetree/bindings/mfd/max14577.txt b/Documentation/devicetree/bindings/mfd/max14577.txt
deleted file mode 100644
index be11943a0560..000000000000
--- a/Documentation/devicetree/bindings/mfd/max14577.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-Maxim MAX14577/77836 Multi-Function Device
-
-MAX14577 is a Multi-Function Device with Micro-USB Interface Circuit, Li+
-Battery Charger and SFOUT LDO output for powering USB devices. It is
-interfaced to host controller using I2C.
-
-MAX77836 additionally contains PMIC (with two LDO regulators) and Fuel Gauge.
-For the description of Fuel Gauge low SOC alert interrupt see:
-../power/supply/max17040_battery.txt
-
-
-Required properties:
-- compatible : Must be "maxim,max14577" or "maxim,max77836".
-- reg : I2C slave address for the max14577 chip (0x25 for max14577/max77836)
-- interrupts : IRQ line for the chip.
-
-
-Required nodes:
- - charger :
- Node for configuring the charger driver.
- Required properties:
- - compatible : "maxim,max14577-charger"
- or "maxim,max77836-charger"
- - maxim,fast-charge-uamp : Current in uA for Fast Charge;
- Valid values:
- - for max14577: 90000 - 950000;
- - for max77836: 45000 - 475000;
- - maxim,eoc-uamp : Current in uA for End-Of-Charge mode;
- Valid values:
- - for max14577: 50000 - 200000;
- - for max77836: 5000 - 100000;
- - maxim,ovp-uvolt : OverVoltage Protection Threshold in uV;
- In an overvoltage condition, INT asserts and charging
- stops. Valid values:
- - 6000000, 6500000, 7000000, 7500000;
- - maxim,constant-uvolt : Battery Constant Voltage in uV;
- Valid values:
- - 4000000 - 4280000 (step by 20000);
- - 4350000;
-
-
-Optional nodes:
-- max14577-muic/max77836-muic :
- Node used only by extcon consumers.
- Required properties:
- - compatible : "maxim,max14577-muic" or "maxim,max77836-muic"
-
-- regulators :
- Required properties:
- - compatible : "maxim,max14577-regulator"
- or "maxim,max77836-regulator"
-
- May contain a sub-node per regulator from the list below. Each
- sub-node should contain the constraints and initialization information
- for that regulator. See regulator.txt for a description of standard
- properties for these sub-nodes.
-
- List of valid regulator names:
- - for max14577: CHARGER, SAFEOUT.
- - for max77836: CHARGER, SAFEOUT, LDO1, LDO2.
-
- The SAFEOUT is a fixed voltage regulator so there is no need to specify
- voltages for it.
-
-
-Example:
-
-#include <dt-bindings/interrupt-controller/irq.h>
-
-max14577@25 {
- compatible = "maxim,max14577";
- reg = <0x25>;
- interrupt-parent = <&gpx1>;
- interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
-
- muic: max14577-muic {
- compatible = "maxim,max14577-muic";
- };
-
- regulators {
- compatible = "maxim,max14577-regulator";
-
- SAFEOUT {
- regulator-name = "SAFEOUT";
- };
- CHARGER {
- regulator-name = "CHARGER";
- regulator-min-microamp = <90000>;
- regulator-max-microamp = <950000>;
- regulator-boot-on;
- };
- };
-
- charger {
- compatible = "maxim,max14577-charger";
-
- maxim,constant-uvolt = <4350000>;
- maxim,fast-charge-uamp = <450000>;
- maxim,eoc-uamp = <50000>;
- maxim,ovp-uvolt = <6500000>;
- };
-};
-
-
-max77836@25 {
- compatible = "maxim,max77836";
- reg = <0x25>;
- interrupt-parent = <&gpx1>;
- interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
-
- muic: max77836-muic {
- compatible = "maxim,max77836-muic";
- };
-
- regulators {
- compatible = "maxim,max77836-regulator";
-
- SAFEOUT {
- regulator-name = "SAFEOUT";
- };
- CHARGER {
- regulator-name = "CHARGER";
- regulator-min-microamp = <90000>;
- regulator-max-microamp = <950000>;
- regulator-boot-on;
- };
- LDO1 {
- regulator-name = "LDO1";
- regulator-min-microvolt = <2700000>;
- regulator-max-microvolt = <2700000>;
- };
- LDO2 {
- regulator-name = "LDO2";
- regulator-min-microvolt = <800000>;
- regulator-max-microvolt = <3950000>;
- };
- };
-
- charger {
- compatible = "maxim,max77836-charger";
-
- maxim,constant-uvolt = <4350000>;
- maxim,fast-charge-uamp = <225000>;
- maxim,eoc-uamp = <7500>;
- maxim,ovp-uvolt = <6500000>;
- };
-};
diff --git a/Documentation/devicetree/bindings/mfd/max77802.txt b/Documentation/devicetree/bindings/mfd/max77802.txt
deleted file mode 100644
index 09decac20d91..000000000000
--- a/Documentation/devicetree/bindings/mfd/max77802.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Maxim MAX77802 multi-function device
-
-The Maxim MAX77802 is a Power Management IC (PMIC) that contains 10 high
-efficiency Buck regulators, 32 Low-DropOut (LDO) regulators used to power
-up application processors and peripherals, a 2-channel 32kHz clock outputs,
-a Real-Time-Clock (RTC) and a I2C interface to program the individual
-regulators, clocks outputs and the RTC.
-
-Bindings for the built-in 32k clock generator block and
-regulators are defined in ../clk/maxim,max77802.txt and
-../regulator/max77802.txt respectively.
-
-Required properties:
-- compatible : Must be "maxim,max77802"
-- reg : Specifies the I2C slave address of PMIC block.
-- interrupts : I2C device IRQ line connected to the main SoC.
-
-Example:
-
- max77802: pmic@9 {
- compatible = "maxim,max77802";
- interrupt-parent = <&intc>;
- interrupts = <26 IRQ_TYPE_NONE>;
- reg = <0x09>;
- };
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml
new file mode 100644
index 000000000000..27870b8760a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+ Integrated Circuit (MUIC).
+
+ The Maxim MAX14577 is a MicroUSB and Companion Power Management IC which
+ includes voltage safeout regulators, charger and MicroUSB management IC.
+
+ The Maxim MAX77836 is a MicroUSB and Companion Power Management IC which
+ includes voltage safeout and LDO regulators, charger, fuel-gauge and MicroUSB
+ management IC.
+
+properties:
+ compatible:
+ enum:
+ - maxim,max14577
+ - maxim,max77836
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ wakeup-source: true
+
+ charger:
+ $ref: /schemas/power/supply/maxim,max14577.yaml
+
+ extcon:
+ type: object
+ properties:
+ compatible:
+ enum:
+ - maxim,max14577-muic
+ - maxim,max77836-muic
+
+ required:
+ - compatible
+
+ regulators:
+ $ref: /schemas/regulator/maxim,max14577.yaml
+
+required:
+ - compatible
+ - interrupts
+ - reg
+ - charger
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: maxim,max14577
+ then:
+ properties:
+ charger:
+ properties:
+ compatible:
+ const: maxim,max14577-charger
+ extcon:
+ properties:
+ compatible:
+ const: maxim,max14577-muic
+ regulator:
+ properties:
+ compatible:
+ const: maxim,max14577-regulator
+ else:
+ properties:
+ charger:
+ properties:
+ compatible:
+ const: maxim,max77836-charger
+ extcon:
+ properties:
+ compatible:
+ const: maxim,max77836-muic
+ regulator:
+ properties:
+ compatible:
+ const: maxim,max77836-regulator
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@25 {
+ compatible = "maxim,max14577";
+ reg = <0x25>;
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
+
+ extcon {
+ compatible = "maxim,max14577-muic";
+ };
+
+ regulators {
+ compatible = "maxim,max14577-regulator";
+
+ SAFEOUT {
+ regulator-name = "SAFEOUT";
+ };
+
+ CHARGER {
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <90000>;
+ regulator-max-microamp = <950000>;
+ regulator-boot-on;
+ };
+ };
+
+ charger {
+ compatible = "maxim,max14577-charger";
+
+ maxim,constant-uvolt = <4350000>;
+ maxim,fast-charge-uamp = <450000>;
+ maxim,eoc-uamp = <50000>;
+ maxim,ovp-uvolt = <6500000>;
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@25 {
+ compatible = "maxim,max77836";
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 IRQ_TYPE_NONE>;
+ reg = <0x25>;
+ wakeup-source;
+
+ extcon {
+ compatible = "maxim,max77836-muic";
+ };
+
+ regulators {
+ compatible = "maxim,max77836-regulator";
+
+ SAFEOUT {
+ regulator-name = "SAFEOUT";
+ };
+
+ CHARGER {
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <45000>;
+ regulator-max-microamp = <475000>;
+ regulator-boot-on;
+ };
+
+ LDO1 {
+ regulator-name = "MOT_2.7V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <2700000>;
+ };
+
+ LDO2 {
+ regulator-name = "UNUSED_LDO2";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3950000>;
+ };
+ };
+
+ charger {
+ compatible = "maxim,max77836-charger";
+
+ maxim,constant-uvolt = <4350000>;
+ maxim,fast-charge-uamp = <225000>;
+ maxim,eoc-uamp = <7500>;
+ maxim,ovp-uvolt = <6500000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml
new file mode 100644
index 000000000000..baa1346ac5d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max77802.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77802 Power Management IC
+
+maintainers:
+ - Javier Martinez Canillas <javier@dowhile0.org>
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX77802 Power Management
+ Integrated Circuit (PMIC).
+
+ The Maxim MAX77802 is a Power Management IC which includes voltage and
+ current regulators (10 high efficiency Buck regulators and 32 Low-DropOut
+ (LDO)), RTC and clock outputs.
+
+ The MAX77802 provides two 32.768khz clock outputs that can be controlled
+ (gated/ungated) over I2C. The clock IDs are defined as preprocessor macros
+ in dt-bindings/clock/maxim,max77802.h.
+
+properties:
+ compatible:
+ const: maxim,max77802
+
+ '#clock-cells':
+ const: 1
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ regulators:
+ $ref: /schemas/regulator/maxim,max77802.yaml
+ description:
+ List of child nodes that specify the regulators.
+
+ inb1-supply:
+ description: Power supply for buck1
+ inb2-supply:
+ description: Power supply for buck2
+ inb3-supply:
+ description: Power supply for buck3
+ inb4-supply:
+ description: Power supply for buck4
+ inb5-supply:
+ description: Power supply for buck5
+ inb6-supply:
+ description: Power supply for buck6
+ inb7-supply:
+ description: Power supply for buck7
+ inb8-supply:
+ description: Power supply for buck8
+ inb9-supply:
+ description: Power supply for buck9
+ inb10-supply:
+ description: Power supply for buck10
+
+ inl1-supply:
+ description: Power supply for LDO8, LDO15
+ inl2-supply:
+ description: Power supply for LDO17, LDO27, LDO30, LDO35
+ inl3-supply:
+ description: Power supply for LDO3, LDO5, LDO7, LDO7
+ inl4-supply:
+ description: Power supply for LDO10, LDO11, LDO13, LDO14
+ inl5-supply:
+ description: Power supply for LDO9, LDO19
+ inl6-supply:
+ description: Power supply for LDO4, LDO21, LDO24, LDO33
+ inl7-supply:
+ description: Power supply for LDO18, LDO20, LDO28, LDO29
+ inl9-supply:
+ description: Power supply for LDO12, LDO23, LDO25, LDO26, LDO32, LDO34
+ inl10-supply:
+ description: Power supply for LDO1, LDO2
+
+ wakeup-source: true
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/regulator/maxim,max77802.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@9 {
+ compatible = "maxim,max77802";
+ interrupt-parent = <&gpx3>;
+ interrupts = <1 IRQ_TYPE_NONE>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&max77802_irq>, <&pmic_selb>,
+ <&pmic_dvs_1>, <&pmic_dvs_2>, <&pmic_dvs_3>;
+ wakeup-source;
+ reg = <0x9>;
+ #clock-cells = <1>;
+
+ inb1-supply = <&tps65090_dcdc2>;
+ inb2-supply = <&tps65090_dcdc1>;
+ inb3-supply = <&tps65090_dcdc2>;
+ inb4-supply = <&tps65090_dcdc2>;
+ inb5-supply = <&tps65090_dcdc1>;
+ inb6-supply = <&tps65090_dcdc2>;
+ inb7-supply = <&tps65090_dcdc1>;
+ inb8-supply = <&tps65090_dcdc1>;
+ inb9-supply = <&tps65090_dcdc1>;
+ inb10-supply = <&tps65090_dcdc1>;
+
+ inl1-supply = <&buck5_reg>;
+ inl2-supply = <&buck7_reg>;
+ inl3-supply = <&buck9_reg>;
+ inl4-supply = <&buck9_reg>;
+ inl5-supply = <&buck9_reg>;
+ inl6-supply = <&tps65090_dcdc2>;
+ inl7-supply = <&buck9_reg>;
+ inl9-supply = <&tps65090_dcdc2>;
+ inl10-supply = <&buck7_reg>;
+
+ regulators {
+ BUCK1 {
+ regulator-name = "vdd_mif";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-ramp-delay = <12500>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ BUCK2 {
+ regulator-name = "vdd_arm";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-ramp-delay = <12500>;
+ regulator-coupled-with = <&buck3_reg>;
+ regulator-coupled-max-spread = <300000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ // ...
+
+ BUCK10 {
+ regulator-name = "vdd_1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ LDO1 {
+ regulator-name = "vdd_1v0";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ regulator-initial-mode = <MAX77802_OPMODE_NORMAL>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-mode = <MAX77802_OPMODE_LP>;
+ };
+ };
+
+ // ...
+
+ LDO35 {
+ regulator-name = "ldo_35";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml
new file mode 100644
index 000000000000..61a0f9dcb983
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX77843 MicroUSB
+ Integrated Circuit (MUIC).
+
+ The Maxim MAX77843 is a MicroUSB and Companion Power Management IC which
+ includes voltage current regulators, charger, fuel-gauge, haptic motor driver
+ and MicroUSB management IC.
+
+properties:
+ compatible:
+ const: maxim,max77843
+
+ interrupts:
+ maxItems: 1
+
+ reg:
+ maxItems: 1
+
+ extcon:
+ $ref: /schemas/extcon/maxim,max77843.yaml
+
+ motor-driver:
+ type: object
+ properties:
+ compatible:
+ const: maxim,max77843-haptic
+
+ haptic-supply:
+ description: Power supply to the haptic motor
+
+ pwms:
+ maxItems: 1
+
+ required:
+ - compatible
+ - haptic-supply
+ - pwms
+
+ regulators:
+ $ref: /schemas/regulator/maxim,max77843.yaml
+
+required:
+ - compatible
+ - interrupts
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@66 {
+ compatible = "maxim,max77843";
+ interrupt-parent = <&gpa1>;
+ interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+ reg = <0x66>;
+
+ extcon {
+ compatible = "maxim,max77843-muic";
+
+ connector {
+ compatible = "samsung,usb-connector-11pin",
+ "usb-b-connector";
+ label = "micro-USB";
+ type = "micro";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ /*
+ * TODO: The DTS this is based on does not have
+ * port@0 which is a required property. The ports
+ * look incomplete and need fixing.
+ * Add a disabled port just to satisfy dtschema.
+ */
+ reg = <0>;
+ status = "disabled";
+ };
+
+ port@3 {
+ reg = <3>;
+ endpoint {
+ remote-endpoint = <&mhl_to_musb_con>;
+ };
+ };
+ };
+ };
+
+ ports {
+ port {
+ endpoint {
+ remote-endpoint = <&usb_to_muic>;
+ };
+ };
+ };
+ };
+
+ regulators {
+ compatible = "maxim,max77843-regulator";
+
+ SAFEOUT1 {
+ regulator-name = "SAFEOUT1";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <4950000>;
+ };
+
+ SAFEOUT2 {
+ regulator-name = "SAFEOUT2";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <4950000>;
+ };
+
+ CHARGER {
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <100000>;
+ regulator-max-microamp = <3150000>;
+ };
+ };
+
+ motor-driver {
+ compatible = "maxim,max77843-haptic";
+ haptic-supply = <&ldo38_reg>;
+ pwms = <&pwm 0 33670 0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml
index 272832e9f8f2..fa86691ebf16 100644
--- a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml
+++ b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml
@@ -20,7 +20,7 @@ description: |
maintainers:
- Kishon Vijay Abraham I <kishon@ti.com>
- - Roger Quadros <rogerq@ti.com
+ - Roger Quadros <rogerq@kernel.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
index 39421f7233e4..4abfb4cfc157 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
@@ -47,7 +47,8 @@ properties:
identified by the JEDEC READ ID opcode (0x9F).
reg:
- maxItems: 1
+ minItems: 1
+ maxItems: 2
spi-max-frequency: true
spi-rx-bus-width: true
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index 0968b40aef1e..e3501bfa22e9 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 {
#address-cells = <1>;
#size-cells = <1>;
spi-max-frequency = <10000000>;
- bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
+ bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
interrupt-parent = <&gpio1>;
interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index b86edf67ce62..58ecc62adfaa 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -107,6 +107,10 @@ properties:
- const: imem
- const: config
+ qcom,qmp:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: phandle to the AOSS side-channel message RAM
+
qcom,smem-states:
$ref: /schemas/types.yaml#/definitions/phandle-array
description: State bits used in by the AP to signal the modem.
@@ -222,6 +226,8 @@ examples:
"imem",
"config";
+ qcom,qmp = <&aoss_qmp>;
+
qcom,smem-states = <&ipa_smp2p_out 0>,
<&ipa_smp2p_out 1>;
qcom,smem-state-names = "ipa-clock-enabled-valid",
diff --git a/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
new file mode 100644
index 000000000000..a18dd0a8c43a
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/perf/marvell-cn10k-ddr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell CN10K DDR performance monitor
+
+maintainers:
+ - Bharat Bhushan <bbhushan2@marvell.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - marvell,cn10k-ddr-pmu
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pmu@87e1c0000000 {
+ compatible = "marvell,cn10k-ddr-pmu";
+ reg = <0x87e1 0xc0000000 0x0 0x10000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
index cbbf5e8b1197..f78d3246fbdc 100644
--- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
@@ -8,7 +8,7 @@ title: OMAP USB2 PHY
maintainers:
- Kishon Vijay Abraham I <kishon@ti.com>
- - Roger Quadros <rogerq@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml b/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
index 6107880e5246..02b76f15e717 100644
--- a/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
@@ -37,6 +37,12 @@ properties:
max bit rate supported in bps
minimum: 1
+ mux-states:
+ description:
+ mux controller node to route the signals from controller to
+ transceiver.
+ maxItems: 1
+
required:
- compatible
- '#phy-cells'
@@ -53,4 +59,5 @@ examples:
max-bitrate = <5000000>;
standby-gpios = <&wakeup_gpio1 16 GPIO_ACTIVE_LOW>;
enable-gpios = <&main_gpio1 67 GPIO_ACTIVE_HIGH>;
+ mux-states = <&mux0 1>;
};
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
index c85f759ae5a3..8a90d8273767 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
@@ -107,9 +107,6 @@ properties:
additionalProperties: false
-allOf:
- - $ref: "pinctrl.yaml#"
-
required:
- pinctrl-0
- pinctrl-names
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
new file mode 100644
index 000000000000..3978b48299de
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC Charger
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+ Integrated Circuit (MUIC).
+
+ See also Documentation/devicetree/bindings/mfd/maxim,max14577.yaml for
+ additional information and example.
+
+properties:
+ compatible:
+ enum:
+ - maxim,max14577-charger
+ - maxim,max77836-charger
+
+ maxim,constant-uvolt:
+ description:
+ Battery Constant Voltage in uV
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 4000000
+ maximum: 4350000
+
+ maxim,eoc-uamp:
+ description: |
+ Current in uA for End-Of-Charge mode.
+ MAX14577: 50000-20000
+ MAX77836: 5000-100000
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ maxim,fast-charge-uamp:
+ description: |
+ Current in uA for Fast Charge
+ MAX14577: 90000-950000
+ MAX77836: 45000-475000
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ maxim,ovp-uvolt:
+ description:
+ OverVoltage Protection Threshold in uV; In an overvoltage condition, INT
+ asserts and charging stops.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [6000000, 6500000, 7000000, 7500000]
+
+required:
+ - compatible
+ - maxim,constant-uvolt
+ - maxim,eoc-uamp
+ - maxim,fast-charge-uamp
+ - maxim,ovp-uvolt
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: maxim,max14577-charger
+ then:
+ properties:
+ maxim,eoc-uamp:
+ minimum: 50000
+ maximum: 200000
+ maxim,fast-charge-uamp:
+ minimum: 90000
+ maximum: 950000
+ else:
+ # max77836
+ properties:
+ maxim,eoc-uamp:
+ minimum: 5000
+ maximum: 100000
+ maxim,fast-charge-uamp:
+ minimum: 45000
+ maximum: 475000
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
index 84e66913d042..db41cd7bf150 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
+++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
@@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: SiFive PWM controller
maintainers:
- - Yash Shah <yash.shah@sifive.com>
- Sagar Kadam <sagar.kadam@sifive.com>
- Paul Walmsley <paul.walmsley@sifive.com>
diff --git a/Documentation/devicetree/bindings/regulator/max77802.txt b/Documentation/devicetree/bindings/regulator/max77802.txt
deleted file mode 100644
index b82943d83677..000000000000
--- a/Documentation/devicetree/bindings/regulator/max77802.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-Binding for Maxim MAX77802 regulators
-
-This is a part of device tree bindings of MAX77802 multi-function device.
-More information can be found in bindings/mfd/max77802.txt file.
-
-The MAX77802 PMIC has 10 high-efficiency Buck and 32 Low-dropout (LDO)
-regulators that can be controlled over I2C.
-
-Following properties should be present in main device node of the MFD chip.
-
-Optional properties:
-- inb1-supply: The input supply for BUCK1
-- inb2-supply: The input supply for BUCK2
-- inb3-supply: The input supply for BUCK3
-- inb4-supply: The input supply for BUCK4
-- inb5-supply: The input supply for BUCK5
-- inb6-supply: The input supply for BUCK6
-- inb7-supply: The input supply for BUCK7
-- inb8-supply: The input supply for BUCK8
-- inb9-supply: The input supply for BUCK9
-- inb10-supply: The input supply for BUCK10
-- inl1-supply: The input supply for LDO8 and LDO15
-- inl2-supply: The input supply for LDO17, LDO27, LDO30 and LDO35
-- inl3-supply: The input supply for LDO3, LDO5, LDO6 and LDO7
-- inl4-supply: The input supply for LDO10, LDO11, LDO13 and LDO14
-- inl5-supply: The input supply for LDO9 and LDO19
-- inl6-supply: The input supply for LDO4, LDO21, LDO24 and LDO33
-- inl7-supply: The input supply for LDO18, LDO20, LDO28 and LDO29
-- inl9-supply: The input supply for LDO12, LDO23, LDO25, LDO26, LDO32 and LDO34
-- inl10-supply: The input supply for LDO1 and LDO2
-
-Optional nodes:
-- regulators : The regulators of max77802 have to be instantiated
- under subnode named "regulators" using the following format.
-
- regulator-name {
- standard regulator constraints....
- };
- refer Documentation/devicetree/bindings/regulator/regulator.txt
-
-The regulator node name should be initialized with a string to get matched
-with their hardware counterparts as follow. The valid names are:
-
- -LDOn : for LDOs, where n can lie in ranges 1-15, 17-21, 23-30
- and 32-35.
- example: LDO1, LDO2, LDO35.
- -BUCKn : for BUCKs, where n can lie in range 1 to 10.
- example: BUCK1, BUCK5, BUCK10.
-
-The max77802 regulator supports two different operating modes: Normal and Low
-Power Mode. Some regulators support the modes to be changed at startup or by
-the consumers during normal operation while others only support to change the
-mode during system suspend. The standard regulator suspend states binding can
-be used to configure the regulator operating mode.
-
-The regulators that support the standard "regulator-initial-mode" property,
-changing their mode during normal operation are: LDOs 1, 3, 20 and 21.
-
-The possible values for "regulator-initial-mode" and "regulator-mode" are:
- 1: Normal regulator voltage output mode.
- 3: Low Power which reduces the quiescent current down to only 1uA
-
-The valid modes list is defined in the dt-bindings/regulator/maxim,max77802.h
-header and can be included by device tree source files.
-
-The standard "regulator-mode" property can only be used for regulators that
-support changing their mode to Low Power Mode during suspend. These regulators
-are: BUCKs 2-4 and LDOs 1-35. Also, it only takes effect if the regulator has
-been enabled for the given suspend state using "regulator-on-in-suspend" and
-has not been disabled for that state using "regulator-off-in-suspend".
-
-Example:
-
- max77802@9 {
- compatible = "maxim,max77802";
- interrupt-parent = <&wakeup_eint>;
- interrupts = <26 0>;
- reg = <0x09>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- inb1-supply = <&parent_reg>;
-
- regulators {
- ldo1_reg: LDO1 {
- regulator-name = "vdd_1v0";
- regulator-min-microvolt = <1000000>;
- regulator-max-microvolt = <1000000>;
- regulator-always-on;
- regulator-initial-mode = <MAX77802_OPMODE_LP>;
- };
-
- ldo11_reg: LDO11 {
- regulator-name = "vdd_ldo11";
- regulator-min-microvolt = <1900000>;
- regulator-max-microvolt = <1900000>;
- regulator-always-on;
- regulator-state-mem {
- regulator-on-in-suspend;
- regulator-mode = <MAX77802_OPMODE_LP>;
- };
- };
-
- buck1_reg: BUCK1 {
- regulator-name = "vdd_mif";
- regulator-min-microvolt = <950000>;
- regulator-max-microvolt = <1300000>;
- regulator-always-on;
- regulator-boot-on;
- };
- };
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml
new file mode 100644
index 000000000000..16f01886a601
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC regulators
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+ Integrated Circuit (MUIC).
+
+ See also Documentation/devicetree/bindings/mfd/maxim,max14577.yaml for
+ additional information and example.
+
+properties:
+ compatible:
+ enum:
+ - maxim,max14577-regulator
+ - maxim,max77836-regulator
+
+ CHARGER:
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description: |
+ Current regulator.
+
+ properties:
+ regulator-min-microvolt: false
+ regulator-max-microvolt: false
+
+ SAFEOUT:
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description: |
+ Safeout LDO regulator (fixed voltage).
+
+ properties:
+ regulator-min-microamp: false
+ regulator-max-microamp: false
+ regulator-min-microvolt:
+ const: 4900000
+ regulator-max-microvolt:
+ const: 4900000
+
+patternProperties:
+ "^LDO[12]$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description: |
+ Current regulator.
+
+ properties:
+ regulator-min-microamp: false
+ regulator-max-microamp: false
+ regulator-min-microvolt:
+ minimum: 800000
+ regulator-max-microvolt:
+ maximum: 3950000
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: maxim,max14577-regulator
+ then:
+ properties:
+ LDO1: false
+ LDO2: false
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml
new file mode 100644
index 000000000000..f2b4dd15a0f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max77802.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77802 Power Management IC regulators
+
+maintainers:
+ - Javier Martinez Canillas <javier@dowhile0.org>
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX77802 Power Management
+ Integrated Circuit (PMIC).
+
+ The Maxim MAX77686 provides 10 high-efficiency Buck and 32 Low-DropOut (LDO)
+ regulators.
+
+ See also Documentation/devicetree/bindings/mfd/maxim,max77802.yaml for
+ additional information and example.
+
+ Certain regulators support "regulator-initial-mode" and "regulator-mode".
+ The valid modes list is defined in the dt-bindings/regulator/maxim,max77802.h
+ and their meaning is::
+ 1 - Normal regulator voltage output mode.
+ 3 - Low Power which reduces the quiescent current down to only 1uA
+
+ The standard "regulator-mode" property can only be used for regulators that
+ support changing their mode to Low Power Mode during suspend. These
+ regulators are:: bucks 2-4 and LDOs 1-35. Also, it only takes effect if the
+ regulator has been enabled for the given suspend state using
+ "regulator-on-in-suspend" and has not been disabled for that state using
+ "regulator-off-in-suspend".
+
+patternProperties:
+ # LDO1, LDO3, LDO20, LDO21
+ "^LDO([13]|2[01])$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description:
+ LDOs supporting the regulator-initial-mode property and changing their
+ mode during normal operation.
+
+ # LDO2, LDO4-15, LDO17-19, LDO23-30, LDO32-35
+ "^LDO([24-9]|1[0-5789]|2[3-9]|3[02345])$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description:
+ LDOs supporting the regulator-mode property (changing mode to Low Power
+ Mode during suspend).
+
+ properties:
+ regulator-initial-mode: false
+
+ # buck2-4
+ "^BUCK[2-4]$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+ description:
+ bucks supporting the regulator-mode property (changing mode to Low Power
+ Mode during suspend).
+
+ properties:
+ regulator-initial-mode: false
+
+ # buck1, buck5-10
+ "^BUCK([15-9]|10)$":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ regulator-initial-mode: false
+
+ patternProperties:
+ regulator-state-(standby|mem|disk):
+ type: object
+ properties:
+ regulator-mode: false
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml
new file mode 100644
index 000000000000..a963025e96c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC regulators
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ This is a part of device tree bindings for Maxim MAX77843 MicroUSB Integrated
+ Circuit (MUIC).
+
+ See also Documentation/devicetree/bindings/mfd/maxim,max77843.yaml for
+ additional information and example.
+
+properties:
+ compatible:
+ const: maxim,max77843-regulator
+
+ CHARGER:
+ type: object
+ $ref: regulator.yaml#
+ additionalProperties: false
+ description: |
+ Current regulator.
+
+ properties:
+ regulator-name: true
+ regulator-always-on: true
+ regulator-boot-on: true
+ regulator-min-microamp:
+ minimum: 100000
+ regulator-max-microamp:
+ maximum: 3150000
+
+ required:
+ - regulator-name
+
+patternProperties:
+ "^SAFEOUT[12]$":
+ type: object
+ $ref: regulator.yaml#
+ additionalProperties: false
+ description: |
+ Safeout LDO regulator.
+
+ properties:
+ regulator-name: true
+ regulator-always-on: true
+ regulator-boot-on: true
+ regulator-min-microvolt:
+ minimum: 3300000
+ regulator-max-microvolt:
+ maximum: 4950000
+
+ required:
+ - regulator-name
+
+required:
+ - compatible
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
index 35c53e27f78c..5898dcf10f06 100644
--- a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
+++ b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
@@ -113,7 +113,7 @@ examples:
};
- |
- #include <dt-bindings/gpio/tegra-gpio.h>
+ #include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
i2c {
@@ -123,8 +123,7 @@ examples:
regulator@1b {
compatible = "maxim,max77621";
reg = <0x1b>;
- interrupt-parent = <&gpio>;
- interrupts = <TEGRA_GPIO(Y, 1) IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
regulator-always-on;
regulator-boot-on;
diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.yaml b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
index f578e72778a7..a26bbd68b729 100644
--- a/Documentation/devicetree/bindings/regulator/pfuze100.yaml
+++ b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
@@ -70,7 +70,11 @@ properties:
$ref: "regulator.yaml#"
type: object
- "^(vsnvs|vref|vrefddr|swbst|coin)$":
+ "^vldo[1-4]$":
+ $ref: "regulator.yaml#"
+ type: object
+
+ "^(vsnvs|vref|vrefddr|swbst|coin|v33|vccsd)$":
$ref: "regulator.yaml#"
type: object
diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index 5c73d3f639c7..e28ee9e46788 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -48,6 +48,7 @@ description: |
For PMI8998, bob
For PMR735A, smps1 - smps3, ldo1 - ldo7
For PMX55, smps1 - smps7, ldo1 - ldo16
+ For PMX65, smps1 - smps8, ldo1 - ldo21
properties:
compatible:
@@ -70,6 +71,7 @@ properties:
- qcom,pmm8155au-rpmh-regulators
- qcom,pmr735a-rpmh-regulators
- qcom,pmx55-rpmh-regulators
+ - qcom,pmx65-rpmh-regulators
qcom,pmic-id:
description: |
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
new file mode 100644
index 000000000000..28725c5467fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt5190a-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT5190A PMIC Regulator
+
+maintainers:
+ - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+ The RT5190A integrates 1 channel buck controller, 3 channels high efficiency
+ synchronous buck converters, 1 LDO, I2C control interface and peripherial
+ logical control.
+
+ It also supports mute AC OFF depop sound and quick setting storage while
+ input power is removed.
+
+properties:
+ compatible:
+ enum:
+ - richtek,rt5190a
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ vin2-supply:
+ description: phandle to buck2 input voltage.
+
+ vin3-supply:
+ description: phandle to buck3 input voltage.
+
+ vin4-supply:
+ description: phandle to buck4 input voltage.
+
+ vinldo-supply:
+ description: phandle to ldo input voltage
+
+ richtek,mute-enable:
+ description: |
+ The mute function uses 'mutein', 'muteout', and 'vdet' pins as the control
+ signal. When enabled, The normal behavior is to bypass the 'mutein' signal
+ 'muteout'. But if the power source removal is detected from 'vdet',
+ whatever the 'mutein' signal is, it will pull down the 'muteout' to force
+ speakers mute. this function is commonly used to prevent the speaker pop
+ noise during AC power turned off in the modern TV system design.
+ type: boolean
+
+ regulators:
+ type: object
+
+ patternProperties:
+ "^buck[1-4]$|^ldo$":
+ type: object
+ $ref: regulator.yaml#
+ description: |
+ regulator description for buck1 and buck4.
+
+ properties:
+ regulator-allowed-modes:
+ description: |
+ buck operating mode, only buck1/4 support mode operating.
+ 0: auto mode
+ 1: force pwm mode
+ items:
+ enum: [0, 1]
+
+ richtek,latchup-enable:
+ type: boolean
+ description: |
+ If specified, undervolt protection mode changes from the default
+ hiccup to latchup.
+
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - regulators
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/regulator/richtek,rt5190a-regulator.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@64 {
+ compatible = "richtek,rt5190a";
+ reg = <0x64>;
+ interrupts-extended = <&gpio26 0 IRQ_TYPE_LEVEL_LOW>;
+ vin2-supply = <&rt5190_buck1>;
+ vin3-supply = <&rt5190_buck1>;
+ vin4-supply = <&rt5190_buck1>;
+
+ regulators {
+ rt5190_buck1: buck1 {
+ regulator-name = "rt5190a-buck1";
+ regulator-min-microvolt = <5090000>;
+ regulator-max-microvolt = <5090000>;
+ regulator-allowed-modes = <RT5190A_OPMODE_AUTO RT5190A_OPMODE_FPWM>;
+ regulator-boot-on;
+ };
+ buck2 {
+ regulator-name = "rt5190a-buck2";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-boot-on;
+ };
+ buck3 {
+ regulator-name = "rt5190a-buck3";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-boot-on;
+ };
+ buck4 {
+ regulator-name = "rt5190a-buck4";
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+ regulator-allowed-modes = <RT5190A_OPMODE_AUTO RT5190A_OPMODE_FPWM>;
+ regulator-boot-on;
+ };
+ ldo {
+ regulator-name = "rt5190a-ldo";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-boot-on;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml
new file mode 100644
index 000000000000..12aeddedde05
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/ti,tps62360.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TPS6236x Voltage Regulators
+
+maintainers:
+ - Laxman Dewangan <ldewangan@nvidia.com>
+
+description: |
+ The TPS6236x are a family of step down dc-dc converter with
+ an input voltage range of 2.5V to 5.5V. The devices provide
+ up to 3A peak load current, and an output voltage range of
+ 0.77V to 1.4V (TPS62360/62) and 0.5V to 1.77V (TPS62361B/63).
+
+ Datasheet is available at:
+ https://www.ti.com/lit/gpn/tps62360
+
+allOf:
+ - $ref: "regulator.yaml#"
+
+properties:
+ compatible:
+ enum:
+ - ti,tps62360
+ - ti,tps62361
+ - ti,tps62362
+ - ti,tps62363
+
+ reg:
+ maxItems: 1
+
+ ti,vsel0-gpio:
+ description: |
+ GPIO for controlling VSEL0 line. If this property
+ is missing, then assume that there is no GPIO for
+ VSEL0 control.
+ maxItems: 1
+
+ ti,vsel1-gpio:
+ description: |
+ GPIO for controlling VSEL1 line. If this property
+ is missing, then assume that there is no GPIO for
+ VSEL1 control.
+ maxItems: 1
+
+ ti,enable-vout-discharge:
+ description: Enable output discharge.
+ type: boolean
+
+ ti,enable-pull-down:
+ description: Enable pull down.
+ type: boolean
+
+ ti,vsel0-state-high:
+ description: |
+ Initial state of VSEL0 input is high. If this property
+ is missing, then assume the state as low.
+ type: boolean
+
+ ti,vsel1-state-high:
+ description: |
+ Initial state of VSEL1 input is high. If this property
+ is missing, then assume the state as low.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ regulator@60 {
+ compatible = "ti,tps62361";
+ reg = <0x60>;
+ regulator-name = "tps62361-vout";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ ti,vsel0-gpio = <&gpio1 16 GPIO_ACTIVE_HIGH>;
+ ti,vsel1-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
+ ti,vsel0-state-high;
+ ti,vsel1-state-high;
+ ti,enable-pull-down;
+ ti,enable-vout-discharge;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
new file mode 100644
index 000000000000..0f29c75f42ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/ti,tps62864.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI TPS62864/TPS6286/TPS62868/TPS62869 voltage regulator
+
+maintainers:
+ - Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+properties:
+ compatible:
+ enum:
+ - ti,tps62864
+ - ti,tps62866
+ - ti,tps62868
+ - ti,tps62869
+
+ reg:
+ maxItems: 1
+
+ regulators:
+ type: object
+
+ properties:
+ "SW":
+ type: object
+ $ref: regulator.yaml#
+ unevaluatedProperties: false
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - regulators
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/regulator/ti,tps62864.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ regulator@48 {
+ compatible = "ti,tps62864";
+ reg = <0x48>;
+
+ regulators {
+ SW {
+ regulator-name = "+0.85V";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <890000>;
+ regulator-initial-mode = <TPS62864_MODE_FPWM>;
+ };
+ };
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt b/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
deleted file mode 100644
index 1b20c3dbcdb8..000000000000
--- a/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-TPS62360 Voltage regulators
-
-Required properties:
-- compatible: Must be one of the following.
- "ti,tps62360"
- "ti,tps62361",
- "ti,tps62362",
- "ti,tps62363",
-- reg: I2C slave address
-
-Optional properties:
-- ti,enable-vout-discharge: Enable output discharge. This is boolean value.
-- ti,enable-pull-down: Enable pull down. This is boolean value.
-- ti,vsel0-gpio: GPIO for controlling VSEL0 line.
- If this property is missing, then assume that there is no GPIO
- for vsel0 control.
-- ti,vsel1-gpio: Gpio for controlling VSEL1 line.
- If this property is missing, then assume that there is no GPIO
- for vsel1 control.
-- ti,vsel0-state-high: Initial state of vsel0 input is high.
- If this property is missing, then assume the state as low (0).
-- ti,vsel1-state-high: Initial state of vsel1 input is high.
- If this property is missing, then assume the state as low (0).
-
-Any property defined as part of the core regulator binding, defined in
-regulator.txt, can also be used.
-
-Example:
-
- abc: tps62360 {
- compatible = "ti,tps62361";
- reg = <0x60>;
- regulator-name = "tps62361-vout";
- regulator-min-microvolt = <500000>;
- regulator-max-microvolt = <1500000>;
- regulator-boot-on
- ti,vsel0-gpio = <&gpio1 16 0>;
- ti,vsel1-gpio = <&gpio1 17 0>;
- ti,vsel0-state-high;
- ti,vsel1-state-high;
- ti,enable-pull-down;
- ti,enable-force-pwm;
- ti,enable-vout-discharge;
- };
diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
index 2b1f91603897..e2d330bd4608 100644
--- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
+++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
@@ -9,7 +9,6 @@ title: SiFive L2 Cache Controller
maintainers:
- Sagar Kadam <sagar.kadam@sifive.com>
- - Yash Shah <yash.shah@sifive.com>
- Paul Walmsley <paul.walmsley@sifive.com>
description:
diff --git a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
index 273f2d95a043..e72b6a3fae99 100644
--- a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
+++ b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
@@ -22,7 +22,7 @@ description: |
[1] Documentation/devicetree/bindings/serial/samsung_uart.yaml
[2] Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
- [3] Documentation/devicetree/bindings/spi/spi-samsung.txt
+ [3] Documentation/devicetree/bindings/spi/samsung,spi.yaml
properties:
$nodename:
diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
index 77adbebed824..c3e9f3485449 100644
--- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
@@ -8,6 +8,7 @@ title: Audio codec controlled by ChromeOS EC
maintainers:
- Cheng-Yi Chiang <cychiang@chromium.org>
+ - Tzung-Bi Shih <tzungbi@google.com>
description: |
Google's ChromeOS EC codec is a digital mic codec provided by the
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
new file mode 100644
index 000000000000..818130b11bb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-mt65xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI Bus controller for MediaTek ARM SoCs
+
+maintainers:
+ - Leilk Liu <leilk.liu@mediatek.com>
+
+allOf:
+ - $ref: "/schemas/spi/spi-controller.yaml#"
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - mediatek,mt7629-spi
+ - const: mediatek,mt7622-spi
+ - items:
+ - enum:
+ - mediatek,mt8516-spi
+ - const: mediatek,mt2712-spi
+ - items:
+ - enum:
+ - mediatek,mt6779-spi
+ - mediatek,mt8186-spi
+ - mediatek,mt8192-spi
+ - mediatek,mt8195-spi
+ - const: mediatek,mt6765-spi
+ - items:
+ - enum:
+ - mediatek,mt7986-spi-ipm
+ - const: mediatek,spi-ipm
+ - items:
+ - enum:
+ - mediatek,mt2701-spi
+ - mediatek,mt2712-spi
+ - mediatek,mt6589-spi
+ - mediatek,mt6765-spi
+ - mediatek,mt6893-spi
+ - mediatek,mt7622-spi
+ - mediatek,mt8135-spi
+ - mediatek,mt8173-spi
+ - mediatek,mt8183-spi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: clock used for the parent clock
+ - description: clock used for the muxes clock
+ - description: clock used for the clock gate
+
+ clock-names:
+ items:
+ - const: parent-clk
+ - const: sel-clk
+ - const: spi-clk
+
+ mediatek,pad-select:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 4
+ items:
+ enum: [0, 1, 2, 3]
+ description:
+ specify which pins group(ck/mi/mo/cs) spi controller used.
+ This is an array.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - '#address-cells'
+ - '#size-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt8173-clk.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ spi@1100a000 {
+ compatible = "mediatek,mt8173-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1100a000 0x1000>;
+ interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&topckgen CLK_TOP_SYSPLL3_D2>,
+ <&topckgen CLK_TOP_SPI_SEL>,
+ <&pericfg CLK_PERI_SPI0>;
+ clock-names = "parent-clk", "sel-clk", "spi-clk";
+ cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>;
+ mediatek,pad-select = <1>, <0>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
index 4e4694e3d539..be3cc7faed53 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
@@ -30,6 +30,7 @@ properties:
- mediatek,mt7622-nor
- mediatek,mt7623-nor
- mediatek,mt7629-nor
+ - mediatek,mt8186-nor
- mediatek,mt8192-nor
- mediatek,mt8195-nor
- enum:
@@ -49,6 +50,8 @@ properties:
- description: clock used for controller
- description: clock used for nor dma bus. this depends on hardware
design, so this is optional.
+ - description: clock used for controller axi slave bus.
+ this depends on hardware design, so it is optional.
clock-names:
minItems: 2
@@ -56,6 +59,7 @@ properties:
- const: spi
- const: sf
- const: axi
+ - const: axi_s
required:
- compatible
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml
new file mode 100644
index 000000000000..7977799a8ee1
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-slave-mt27xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI Slave controller for MediaTek ARM SoCs
+
+maintainers:
+ - Leilk Liu <leilk.liu@mediatek.com>
+
+allOf:
+ - $ref: "/schemas/spi/spi-controller.yaml#"
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt2712-spi-slave
+ - mediatek,mt8195-spi-slave
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: spi
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/mt2712-clk.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ spi@10013000 {
+ compatible = "mediatek,mt2712-spi-slave";
+ reg = <0x10013000 0x100>;
+ interrupts = <GIC_SPI 283 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&infracfg CLK_INFRA_AO_SPI1>;
+ clock-names = "spi";
+ assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>;
+ assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
new file mode 100644
index 000000000000..ece261b8e963
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/microchip,mpfs-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip MPFS {Q,}SPI Controller Device Tree Bindings
+
+maintainers:
+ - Conor Dooley <conor.dooley@microchip.com>
+
+allOf:
+ - $ref: spi-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - microchip,mpfs-spi
+ - microchip,mpfs-qspi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clock-names:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include "dt-bindings/clock/microchip,mpfs-clock.h"
+ spi@20108000 {
+ compatible = "microchip,mpfs-spi";
+ reg = <0x20108000 0x1000>;
+ clocks = <&clkcfg CLK_SPI0>;
+ interrupt-parent = <&plic>;
+ interrupts = <54>;
+ spi-max-frequency = <25000000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
index 35a8045b2c70..0296edd1de22 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
@@ -19,6 +19,7 @@ properties:
- nvidia,tegra210-qspi
- nvidia,tegra186-qspi
- nvidia,tegra194-qspi
+ - nvidia,tegra234-qspi
reg:
maxItems: 1
@@ -106,7 +107,7 @@ examples:
dma-names = "rx", "tx";
flash@0 {
- compatible = "spi-nor";
+ compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <104000000>;
spi-tx-bus-width = <2>;
diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
index 76e6d9e52fc7..2c3c6bd6ec45 100644
--- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -22,7 +22,8 @@ properties:
- renesas,rspi-r7s72100 # RZ/A1H
- renesas,rspi-r7s9210 # RZ/A2
- renesas,r9a07g044-rspi # RZ/G2{L,LC}
- - const: renesas,rspi-rz # RZ/A and RZ/G2{L,LC}
+ - renesas,r9a07g054-rspi # RZ/V2L
+ - const: renesas,rspi-rz
- items:
- enum:
@@ -124,6 +125,7 @@ allOf:
enum:
- renesas,qspi
- renesas,r9a07g044-rspi
+ - renesas,r9a07g054-rspi
then:
required:
- resets
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
new file mode 100644
index 000000000000..f0db3fb3d688
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/samsung,spi-peripheral-props.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Peripheral-specific properties for Samsung S3C/S5P/Exynos SoC SPI controller
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description:
+ See spi-peripheral-props.yaml for more info.
+
+properties:
+ controller-data:
+ type: object
+ additionalProperties: false
+
+ properties:
+ samsung,spi-feedback-delay:
+ description: |
+ The sampling phase shift to be applied on the miso line (to account
+ for any lag in the miso line). Valid values:
+ - 0: No phase shift.
+ - 1: 90 degree phase shift sampling.
+ - 2: 180 degree phase shift sampling.
+ - 3: 270 degree phase shift sampling.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
new file mode 100644
index 000000000000..bf9a76d931d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/samsung,spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C/S5P/Exynos SoC SPI controller
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description:
+ All the SPI controller nodes should be represented in the aliases node using
+ the following format 'spi{n}' where n is a unique number for the alias.
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - samsung,s3c2443-spi # for S3C2443, S3C2416 and S3C2450
+ - samsung,s3c6410-spi
+ - samsung,s5pv210-spi # for S5PV210 and S5PC110
+ - samsung,exynos5433-spi
+ - tesla,fsd-spi
+ - const: samsung,exynos7-spi
+ deprecated: true
+
+ clocks:
+ minItems: 2
+ maxItems: 3
+
+ clock-names:
+ minItems: 2
+ maxItems: 3
+
+ cs-gpios: true
+
+ dmas:
+ minItems: 2
+ maxItems: 2
+
+ dma-names:
+ items:
+ - const: tx
+ - const: rx
+
+ interrupts:
+ maxItems: 1
+
+ no-cs-readback:
+ description:
+ The CS line is disconnected, therefore the device should not operate
+ based on CS signalling.
+ type: boolean
+
+ num-cs:
+ minimum: 1
+ maximum: 4
+ default: 1
+
+ samsung,spi-src-clk:
+ description:
+ If the spi controller includes a internal clock mux to select the clock
+ source for the spi bus clock, this property can be used to indicate the
+ clock to be used for driving the spi bus clock. If not specified, the
+ clock number 0 is used as default.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ default: 0
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - dmas
+ - dma-names
+ - interrupts
+ - reg
+
+allOf:
+ - $ref: spi-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5433-spi
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ items:
+ - const: spi
+ - enum:
+ - spi_busclk0
+ - spi_busclk1
+ - spi_busclk2
+ - spi_busclk3
+ - const: spi_ioclk
+ else:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ items:
+ - const: spi
+ - enum:
+ - spi_busclk0
+ - spi_busclk1
+ - spi_busclk2
+ - spi_busclk3
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos5433.h>
+ #include <dt-bindings/clock/samsung,s2mps11.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/gpio/gpio.h>
+
+ spi@14d30000 {
+ compatible = "samsung,exynos5433-spi";
+ reg = <0x14d30000 0x100>;
+ interrupts = <GIC_SPI 433 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&pdma0 11>, <&pdma0 10>;
+ dma-names = "tx", "rx";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&cmu_peric CLK_PCLK_SPI1>,
+ <&cmu_peric CLK_SCLK_SPI1>,
+ <&cmu_peric CLK_SCLK_IOCLK_SPI1>;
+ clock-names = "spi",
+ "spi_busclk0",
+ "spi_ioclk";
+ samsung,spi-src-clk = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_bus>;
+ num-cs = <1>;
+
+ cs-gpios = <&gpd6 3 GPIO_ACTIVE_HIGH>;
+
+ audio-codec@0 {
+ compatible = "wlf,wm5110";
+ reg = <0x0>;
+ spi-max-frequency = <20000000>;
+ interrupt-parent = <&gpa0>;
+ interrupts = <4 IRQ_TYPE_NONE>;
+ clocks = <&pmu_system_controller 0>,
+ <&s2mps13_osc S2MPS11_CLK_BT>;
+ clock-names = "mclk1", "mclk2";
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ wlf,micd-detect-debounce = <300>;
+ wlf,micd-bias-start-time = <0x1>;
+ wlf,micd-rate = <0x7>;
+ wlf,micd-dbtime = <0x2>;
+ wlf,micd-force-micbias;
+ wlf,micd-configs = <0x0 1 0>;
+ wlf,hpdet-channel = <1>;
+ wlf,gpsw = <0x1>;
+ wlf,inmode = <2 0 2 0>;
+
+ wlf,reset = <&gpc0 7 GPIO_ACTIVE_HIGH>;
+ wlf,ldoena = <&gpf0 0 GPIO_ACTIVE_HIGH>;
+
+ /* core supplies */
+ AVDD-supply = <&ldo18_reg>;
+ DBVDD1-supply = <&ldo18_reg>;
+ CPVDD-supply = <&ldo18_reg>;
+ DBVDD2-supply = <&ldo18_reg>;
+ DBVDD3-supply = <&ldo18_reg>;
+ SPKVDDL-supply = <&ldo18_reg>;
+ SPKVDDR-supply = <&ldo18_reg>;
+
+ controller-data {
+ samsung,spi-feedback-delay = <0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 36b72518f565..0f4d40218400 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -139,4 +139,11 @@ examples:
spi-max-frequency = <100000>;
reg = <1>;
};
+
+ flash@2 {
+ compatible = "jedec,spi-nor";
+ spi-max-frequency = <50000000>;
+ reg = <2>, <3>;
+ stacked-memories = /bits/ 64 <0x10000000 0x10000000>;
+ };
};
diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
deleted file mode 100644
index 2a24969159cc..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-Binding for MTK SPI controller
-
-Required properties:
-- compatible: should be one of the following.
- - mediatek,mt2701-spi: for mt2701 platforms
- - mediatek,mt2712-spi: for mt2712 platforms
- - mediatek,mt6589-spi: for mt6589 platforms
- - mediatek,mt6765-spi: for mt6765 platforms
- - mediatek,mt7622-spi: for mt7622 platforms
- - "mediatek,mt7629-spi", "mediatek,mt7622-spi": for mt7629 platforms
- - mediatek,mt8135-spi: for mt8135 platforms
- - mediatek,mt8173-spi: for mt8173 platforms
- - mediatek,mt8183-spi: for mt8183 platforms
- - mediatek,mt6893-spi: for mt6893 platforms
- - "mediatek,mt8192-spi", "mediatek,mt6765-spi": for mt8192 platforms
- - "mediatek,mt8195-spi", "mediatek,mt6765-spi": for mt8195 platforms
- - "mediatek,mt8516-spi", "mediatek,mt2712-spi": for mt8516 platforms
- - "mediatek,mt6779-spi", "mediatek,mt6765-spi": for mt6779 platforms
-
-- #address-cells: should be 1.
-
-- #size-cells: should be 0.
-
-- reg: Address and length of the register set for the device
-
-- interrupts: Should contain spi interrupt
-
-- clocks: phandles to input clocks.
- The first should be one of the following. It's PLL.
- - <&clk26m>: specify parent clock 26MHZ.
- - <&topckgen CLK_TOP_SYSPLL3_D2>: specify parent clock 109MHZ.
- It's the default one.
- - <&topckgen CLK_TOP_SYSPLL4_D2>: specify parent clock 78MHZ.
- - <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ.
- - <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ.
- The second should be <&topckgen CLK_TOP_SPI_SEL>. It's clock mux.
- The third is <&pericfg CLK_PERI_SPI0>. It's clock gate.
-
-- clock-names: shall be "parent-clk" for the parent clock, "sel-clk" for the
- muxes clock, and "spi-clk" for the clock gate.
-
-Optional properties:
--cs-gpios: see spi-bus.txt.
-
-- mediatek,pad-select: specify which pins group(ck/mi/mo/cs) spi
- controller used. This is an array, the element value should be 0~3,
- only required for MT8173.
- 0: specify GPIO69,70,71,72 for spi pins.
- 1: specify GPIO102,103,104,105 for spi pins.
- 2: specify GPIO128,129,130,131 for spi pins.
- 3: specify GPIO5,6,7,8 for spi pins.
-
-Example:
-
-- SoC Specific Portion:
-spi: spi@1100a000 {
- compatible = "mediatek,mt8173-spi";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0 0x1100a000 0 0x1000>;
- interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&topckgen CLK_TOP_SYSPLL3_D2>,
- <&topckgen CLK_TOP_SPI_SEL>,
- <&pericfg CLK_PERI_SPI0>;
- clock-names = "parent-clk", "sel-clk", "spi-clk";
- cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>;
- mediatek,pad-select = <1>, <0>;
-};
diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
index 283815d59e85..1b552c298277 100644
--- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: NXP Flex Serial Peripheral Interface (FSPI)
maintainers:
- - Kuldeep Singh <kuldeep.singh@nxp.com>
+ - Han Xu <han.xu@nxp.com>
+ - Kuldeep Singh <singh.kuldeep87k@gmail.com>
allOf:
- $ref: "spi-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
index 5dd209206e88..5e32928c4fc3 100644
--- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
@@ -23,8 +23,9 @@ properties:
minItems: 1
maxItems: 256
items:
- minimum: 0
- maximum: 256
+ items:
+ - minimum: 0
+ maximum: 256
description:
Chip select used by the device.
@@ -82,8 +83,34 @@ properties:
description:
Delay, in microseconds, after a write transfer.
+ stacked-memories:
+ description: Several SPI memories can be wired in stacked mode.
+ This basically means that either a device features several chip
+ selects, or that different devices must be seen as a single
+ bigger chip. This basically doubles (or more) the total address
+ space with only a single additional wire, while still needing
+ to repeat the commands when crossing a chip boundary. The size of
+ each chip should be provided as members of the array.
+ $ref: /schemas/types.yaml#/definitions/uint64-array
+ minItems: 2
+ maxItems: 4
+
+ parallel-memories:
+ description: Several SPI memories can be wired in parallel mode.
+ The devices are physically on a different buses but will always
+ act synchronously as each data word is spread across the
+ different memories (eg. even bits are stored in one memory, odd
+ bits in the other). This basically doubles the address space and
+ the throughput while greatly complexifying the wiring because as
+ many busses as devices must be wired. The size of each chip should
+ be provided as members of the array.
+ $ref: /schemas/types.yaml#/definitions/uint64-array
+ minItems: 2
+ maxItems: 4
+
# The controller specific properties go here.
allOf:
- $ref: cdns,qspi-nor-peripheral-props.yaml#
+ - $ref: samsung,spi-peripheral-props.yaml#
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
index 6d633728fc2b..bda45ff3d294 100644
--- a/Documentation/devicetree/bindings/spi/spi-pl022.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
@@ -38,9 +38,7 @@ properties:
clock-names:
items:
- - enum:
- - SSPCLK
- - sspclk
+ - const: sspclk
- const: apb_pclk
pl022,autosuspend-delay:
diff --git a/Documentation/devicetree/bindings/spi/spi-samsung.txt b/Documentation/devicetree/bindings/spi/spi-samsung.txt
deleted file mode 100644
index 49028a4f5df1..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-samsung.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-* Samsung SPI Controller
-
-The Samsung SPI controller is used to interface with various devices such as flash
-and display controllers using the SPI communication interface.
-
-Required SoC Specific Properties:
-
-- compatible: should be one of the following.
- - samsung,s3c2443-spi: for s3c2443, s3c2416 and s3c2450 platforms
- - samsung,s3c6410-spi: for s3c6410 platforms
- - samsung,s5pv210-spi: for s5pv210 and s5pc110 platforms
- - samsung,exynos5433-spi: for exynos5433 compatible controllers
- - samsung,exynos7-spi: for exynos7 platforms <DEPRECATED>
-
-- reg: physical base address of the controller and length of memory mapped
- region.
-
-- interrupts: The interrupt number to the cpu. The interrupt specifier format
- depends on the interrupt controller.
-
-- dmas : Two or more DMA channel specifiers following the convention outlined
- in bindings/dma/dma.txt
-
-- dma-names: Names for the dma channels. There must be at least one channel
- named "tx" for transmit and named "rx" for receive.
-
-- clocks: specifies the clock IDs provided to the SPI controller; they are
- required for interacting with the controller itself, for synchronizing the bus
- and as I/O clock (the latter is required by exynos5433 and exynos7).
-
-- clock-names: string names of the clocks in the 'clocks' property; for all the
- the devices the names must be "spi", "spi_busclkN" (where N is determined by
- "samsung,spi-src-clk"), while Exynos5433 should specify a third clock
- "spi_ioclk" for the I/O clock.
-
-Required Board Specific Properties:
-
-- #address-cells: should be 1.
-- #size-cells: should be 0.
-
-Optional Board Specific Properties:
-
-- samsung,spi-src-clk: If the spi controller includes a internal clock mux to
- select the clock source for the spi bus clock, this property can be used to
- indicate the clock to be used for driving the spi bus clock. If not specified,
- the clock number 0 is used as default.
-
-- num-cs: Specifies the number of chip select lines supported. If
- not specified, the default number of chip select lines is set to 1.
-
-- cs-gpios: should specify GPIOs used for chipselects (see spi-bus.txt)
-
-- no-cs-readback: the CS line is disconnected, therefore the device should not
- operate based on CS signalling.
-
-SPI Controller specific data in SPI slave nodes:
-
-- The spi slave nodes should provide the following information which is required
- by the spi controller.
-
- - samsung,spi-feedback-delay: The sampling phase shift to be applied on the
- miso line (to account for any lag in the miso line). The following are the
- valid values.
-
- - 0: No phase shift.
- - 1: 90 degree phase shift sampling.
- - 2: 180 degree phase shift sampling.
- - 3: 270 degree phase shift sampling.
-
-Aliases:
-
-- All the SPI controller nodes should be represented in the aliases node using
- the following format 'spi{n}' where n is a unique number for the alias.
-
-
-Example:
-
-- SoC Specific Portion:
-
- spi_0: spi@12d20000 {
- compatible = "samsung,exynos4210-spi";
- reg = <0x12d20000 0x100>;
- interrupts = <0 66 0>;
- dmas = <&pdma0 5
- &pdma0 4>;
- dma-names = "tx", "rx";
- #address-cells = <1>;
- #size-cells = <0>;
- };
-
-- Board Specific Portion:
-
- spi_0: spi@12d20000 {
- #address-cells = <1>;
- #size-cells = <0>;
- pinctrl-names = "default";
- pinctrl-0 = <&spi0_bus>;
- cs-gpios = <&gpa2 5 0>;
-
- w25q80bw@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "w25x80";
- reg = <0>;
- spi-max-frequency = <10000>;
-
- controller-data {
- samsung,spi-feedback-delay = <0>;
- };
-
- partition@0 {
- label = "U-Boot";
- reg = <0x0 0x40000>;
- read-only;
- };
-
- partition@40000 {
- label = "Kernel";
- reg = <0x40000 0xc0000>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt b/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt
deleted file mode 100644
index 9192724540fd..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Binding for MTK SPI Slave controller
-
-Required properties:
-- compatible: should be one of the following.
- - mediatek,mt2712-spi-slave: for mt2712 platforms
- - mediatek,mt8195-spi-slave: for mt8195 platforms
-- reg: Address and length of the register set for the device.
-- interrupts: Should contain spi interrupt.
-- clocks: phandles to input clocks.
- It's clock gate, and should be <&infracfg CLK_INFRA_AO_SPI1>.
-- clock-names: should be "spi" for the clock gate.
-
-Optional properties:
-- assigned-clocks: it's mux clock, should be <&topckgen CLK_TOP_SPISLV_SEL>.
-- assigned-clock-parents: parent of mux clock.
- It's PLL, and should be one of the following.
- - <&topckgen CLK_TOP_UNIVPLL1_D2>: specify parent clock 312MHZ.
- It's the default one.
- - <&topckgen CLK_TOP_UNIVPLL1_D4>: specify parent clock 156MHZ.
- - <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ.
- - <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ.
-
-Example:
-- SoC Specific Portion:
-spis1: spi@10013000 {
- compatible = "mediatek,mt2712-spi-slave";
- reg = <0 0x10013000 0 0x100>;
- interrupts = <GIC_SPI 283 IRQ_TYPE_LEVEL_LOW>;
- clocks = <&infracfg CLK_INFRA_AO_SPI1>;
- clock-names = "spi";
- assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>;
- assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>;
-};
diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
new file mode 100644
index 000000000000..3a58cf0f1ec8
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-sunplus-sp7021.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus sp7021 SPI controller
+
+allOf:
+ - $ref: "spi-controller.yaml"
+
+maintainers:
+ - Li-hao Kuo <lhjeff911@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - sunplus,sp7021-spi
+
+ reg:
+ items:
+ - description: the SPI master registers
+ - description: the SPI slave registers
+
+ reg-names:
+ items:
+ - const: master
+ - const: slave
+
+ interrupt-names:
+ items:
+ - const: dma_w
+ - const: master_risc
+ - const: slave_risc
+
+ interrupts:
+ minItems: 3
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - interrupt-names
+ - clocks
+ - resets
+ - pinctrl-names
+ - pinctrl-0
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ spi@9C002D80 {
+ compatible = "sunplus,sp7021-spi";
+ reg = <0x9C002D80 0x80>, <0x9C002E00 0x80>;
+ reg-names = "master", "slave";
+ interrupt-parent = <&intc>;
+ interrupt-names = "dma_w",
+ "master_risc",
+ "slave_risc";
+ interrupts = <144 IRQ_TYPE_LEVEL_HIGH>,
+ <146 IRQ_TYPE_LEVEL_HIGH>,
+ <145 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clkc 0x32>;
+ resets = <&rstc 0x22>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_spi0>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
deleted file mode 100644
index 33004ce7e5df..000000000000
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-* Exynos Thermal Management Unit (TMU)
-
-** Required properties:
-
-- compatible : One of the following:
- "samsung,exynos3250-tmu"
- "samsung,exynos4412-tmu"
- "samsung,exynos4210-tmu"
- "samsung,exynos5250-tmu"
- "samsung,exynos5260-tmu"
- "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
- "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
- Exynos5420 (Must pass triminfo base and triminfo clock)
- "samsung,exynos5433-tmu"
- "samsung,exynos7-tmu"
-- reg : Address range of the thermal registers. For soc's which has multiple
- instances of TMU and some registers are shared across all TMU's like
- interrupt related then 2 set of register has to supplied. First set
- belongs to register set of TMU instance and second set belongs to
- registers shared with the TMU instance.
-
- NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
- channels 2, 3 and 4
- Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
- register, also provide clock to access that base.
-
- TRIMINFO at 0x1006c000 contains data for TMU channel 3
- TRIMINFO at 0x100a0000 contains data for TMU channel 4
- TRIMINFO at 0x10068000 contains data for TMU channel 2
-
-- interrupts : Should contain interrupt for thermal system
-- clocks : The main clocks for TMU device
- -- 1. operational clock for TMU channel
- -- 2. optional clock to access the shared registers of TMU channel
- -- 3. optional special clock for functional operation
-- clock-names : Thermal system clock name
- -- "tmu_apbif" operational clock for current TMU channel
- -- "tmu_triminfo_apbif" clock to access the shared triminfo register
- for current TMU channel
- -- "tmu_sclk" clock for functional operation of the current TMU
- channel
-
-The Exynos TMU supports generating interrupts when reaching given
-temperature thresholds. Number of supported thermal trip points depends
-on the SoC (only first trip points defined in DT will be configured):
- - most of SoC: 4
- - samsung,exynos5433-tmu: 8
- - samsung,exynos7-tmu: 8
-
-** Optional properties:
-
-- vtmu-supply: This entry is optional and provides the regulator node supplying
- voltage to TMU. If needed this entry can be placed inside
- board/platform specific dts file.
-
-Example 1):
-
- tmu@100c0000 {
- compatible = "samsung,exynos4412-tmu";
- interrupt-parent = <&combiner>;
- reg = <0x100C0000 0x100>;
- interrupts = <2 4>;
- clocks = <&clock 383>;
- clock-names = "tmu_apbif";
- vtmu-supply = <&tmu_regulator_node>;
- #thermal-sensor-cells = <0>;
- };
-
-Example 2): (In case of Exynos5420 "with misplaced TRIMINFO register")
- tmu_cpu2: tmu@10068000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x10068000 0x100>, <0x1006c000 0x4>;
- interrupts = <0 184 0>;
- clocks = <&clock 318>, <&clock 318>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
- tmu_cpu3: tmu@1006c000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
- interrupts = <0 185 0>;
- clocks = <&clock 318>, <&clock 319>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
- tmu_gpu: tmu@100a0000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x100a0000 0x100>, <0x10068000 0x4>;
- interrupts = <0 215 0>;
- clocks = <&clock 319>, <&clock 318>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
-Note: For multi-instance tmu each instance should have an alias correctly
-numbered in "aliases" node.
-
-Example:
-
-aliases {
- tmuctrl0 = &tmuctrl_0;
- tmuctrl1 = &tmuctrl_1;
- tmuctrl2 = &tmuctrl_2;
-};
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
index 289e9a845600..a9b7388ca9ac 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -19,6 +19,7 @@ properties:
compatible:
enum:
- qcom,sdm845-lmh
+ - qcom,sm8150-lmh
reg:
items:
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index d3b9e9b600a2..b6406bcc683f 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -43,6 +43,7 @@ properties:
- description: v2 of TSENS
items:
- enum:
+ - qcom,msm8953-tsens
- qcom,msm8996-tsens
- qcom,msm8998-tsens
- qcom,sc7180-tsens
diff --git a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
new file mode 100644
index 000000000000..17129f75d962
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/samsung,exynos-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Thermal Management Unit (TMU)
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ For multi-instance tmu each instance should have an alias correctly numbered
+ in "aliases" node.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos3250-tmu
+ - samsung,exynos4412-tmu
+ - samsung,exynos4210-tmu
+ - samsung,exynos5250-tmu
+ - samsung,exynos5260-tmu
+ # For TMU channel 0, 1 on Exynos5420:
+ - samsung,exynos5420-tmu
+ # For TMU channels 2, 3 and 4 of Exynos5420:
+ - samsung,exynos5420-tmu-ext-triminfo
+ - samsung,exynos5433-tmu
+ - samsung,exynos7-tmu
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+ interrupts:
+ description: |
+ The Exynos TMU supports generating interrupts when reaching given
+ temperature thresholds. Number of supported thermal trip points depends
+ on the SoC (only first trip points defined in DT will be configured)::
+ - most of SoC: 4
+ - samsung,exynos5433-tmu: 8
+ - samsung,exynos7-tmu: 8
+ maxItems: 1
+
+ reg:
+ items:
+ - description: TMU instance registers.
+ - description: |
+ Shared TMU registers.
+
+ Note:: On Exynos5420, the TRIMINFO register is misplaced for TMU
+ channels 2, 3 and 4 Use "samsung,exynos5420-tmu-ext-triminfo" in
+ cases, there is a misplaced register, also provide clock to access
+ that base.
+ TRIMINFO at 0x1006c000 contains data for TMU channel 3
+ TRIMINFO at 0x100a0000 contains data for TMU channel 4
+ TRIMINFO at 0x10068000 contains data for TMU channel 2
+ minItems: 1
+
+ '#thermal-sensor-cells': true
+
+ vtmu-supply:
+ description: The regulator node supplying voltage to TMU.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - interrupts
+ - reg
+
+allOf:
+ - $ref: /schemas/thermal/thermal-sensor.yaml
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5420-tmu-ext-triminfo
+ then:
+ properties:
+ clocks:
+ items:
+ - description:
+ Operational clock for TMU channel.
+ - description:
+ Optional clock to access the shared registers (e.g. TRIMINFO) of TMU
+ channel.
+ clock-names:
+ items:
+ - const: tmu_apbif
+ - const: tmu_triminfo_apbif
+ reg:
+ minItems: 2
+ maxItems: 2
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos5433-tmu
+ - samsung,exynos7-tmu
+ then:
+ properties:
+ clocks:
+ items:
+ - description:
+ Operational clock for TMU channel.
+ - description:
+ Optional special clock for functional operation of TMU channel.
+ clock-names:
+ items:
+ - const: tmu_apbif
+ - const: tmu_sclk
+ reg:
+ minItems: 1
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos3250-tmu
+ - samsung,exynos4412-tmu
+ - samsung,exynos4210-tmu
+ - samsung,exynos5250-tmu
+ - samsung,exynos5260-tmu
+ - samsung,exynos5420-tmu
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ reg:
+ minItems: 1
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos4.h>
+
+ tmu@100c0000 {
+ compatible = "samsung,exynos4412-tmu";
+ reg = <0x100C0000 0x100>;
+ interrupt-parent = <&combiner>;
+ interrupts = <2 4>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&clock CLK_TMU_APBIF>;
+ clock-names = "tmu_apbif";
+ vtmu-supply = <&ldo10_reg>;
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ tmu@10068000 {
+ compatible = "samsung,exynos5420-tmu-ext-triminfo";
+ reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&clock 318>, <&clock 318>; /* CLK_TMU */
+ clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ vtmu-supply = <&ldo7_reg>;
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ tmu@10060000 {
+ compatible = "samsung,exynos5433-tmu";
+ reg = <0x10060000 0x200>;
+ interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&cmu_peris 3>, /* CLK_PCLK_TMU0_APBIF */
+ <&cmu_peris 35>; /* CLK_SCLK_TMU0 */
+ clock-names = "tmu_apbif", "tmu_sclk";
+ vtmu-supply = <&ldo3_reg>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
new file mode 100644
index 000000000000..b78209cd0f28
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: NVIDIA Tegra timer
+
+maintainers:
+ - Stephen Warren <swarren@nvidia.com>
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: nvidia,tegra210-timer
+ then:
+ properties:
+ interrupts:
+ # Either a single combined interrupt or up to 14 individual interrupts
+ minItems: 1
+ maxItems: 14
+ description: >
+ A list of 14 interrupts; one per each timer channels 0 through 13
+
+ - if:
+ properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - nvidia,tegra114-timer
+ - nvidia,tegra124-timer
+ - nvidia,tegra132-timer
+ - const: nvidia,tegra30-timer
+ - items:
+ - const: nvidia,tegra30-timer
+ - const: nvidia,tegra20-timer
+ then:
+ properties:
+ interrupts:
+ # Either a single combined interrupt or up to 6 individual interrupts
+ minItems: 1
+ maxItems: 6
+ description: >
+ A list of 6 interrupts; one per each of timer channels 1 through 5,
+ and one for the shared interrupt for the remaining channels.
+
+ - if:
+ properties:
+ compatible:
+ const: nvidia,tegra20-timer
+ then:
+ properties:
+ interrupts:
+ # Either a single combined interrupt or up to 4 individual interrupts
+ minItems: 1
+ maxItems: 4
+ description: |
+ A list of 4 interrupts; one per timer channel.
+
+properties:
+ compatible:
+ oneOf:
+ - const: nvidia,tegra210-timer
+ description: >
+ The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
+ timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
+ from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
+ (TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
+ or watchdog interrupts.
+ - items:
+ - enum:
+ - nvidia,tegra114-timer
+ - nvidia,tegra124-timer
+ - nvidia,tegra132-timer
+ - const: nvidia,tegra30-timer
+ - items:
+ - const: nvidia,tegra30-timer
+ - const: nvidia,tegra20-timer
+ description: >
+ The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
+ running counter, and 5 watchdog modules. The first two channels may also
+ trigger a legacy watchdog reset.
+ - const: nvidia,tegra20-timer
+ description: >
+ The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
+ running counter. The first two channels may also trigger a watchdog reset.
+
+ reg:
+ maxItems: 1
+
+ interrupts: true
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: timer
+
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ timer@60005000 {
+ compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
+ reg = <0x60005000 0x400>;
+ interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
+ <0 1 IRQ_TYPE_LEVEL_HIGH>,
+ <0 41 IRQ_TYPE_LEVEL_HIGH>,
+ <0 42 IRQ_TYPE_LEVEL_HIGH>,
+ <0 121 IRQ_TYPE_LEVEL_HIGH>,
+ <0 122 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car 214>;
+ };
+ - |
+ #include <dt-bindings/clock/tegra210-car.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ timer@60005000 {
+ compatible = "nvidia,tegra210-timer";
+ reg = <0x60005000 0x400>;
+ interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+ clock-names = "timer";
+ };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
deleted file mode 100644
index 4a864bd10d3d..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-NVIDIA Tegra20 timer
-
-The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
-running counter. The first two channels may also trigger a watchdog reset.
-
-Required properties:
-
-- compatible : should be "nvidia,tegra20-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 4 interrupts; one per timer channel.
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
-
-Example:
-
-timer {
- compatible = "nvidia,tegra20-timer";
- reg = <0x60005000 0x60>;
- interrupts = <0 0 0x04
- 0 1 0x04
- 0 41 0x04
- 0 42 0x04>;
- clocks = <&tegra_car 132>;
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
deleted file mode 100644
index 032cda96fe0d..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-NVIDIA Tegra210 timer
-
-The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
-timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
-from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
-(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
-or watchdog interrupts.
-
-Required properties:
-- compatible : "nvidia,tegra210-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 14 interrupts; one per each timer channels 0 through
- 13.
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
-
-timer@60005000 {
- compatible = "nvidia,tegra210-timer";
- reg = <0x0 0x60005000 0x0 0x400>;
- interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&tegra_car TEGRA210_CLK_TIMER>;
- clock-names = "timer";
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
deleted file mode 100644
index 1761f53ee36f..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-NVIDIA Tegra30 timer
-
-The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
-running counter, and 5 watchdog modules. The first two channels may also
-trigger a legacy watchdog reset.
-
-Required properties:
-
-- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise,
- must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where
- <chip> is tegra124 or tegra132.
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 6 interrupts; one per each of timer channels 1
- through 5, and one for the shared interrupt for the remaining channels.
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
-
-timer {
- compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
- reg = <0x60005000 0x400>;
- interrupts = <0 0 0x04
- 0 1 0x04
- 0 41 0x04
- 0 42 0x04
- 0 121 0x04
- 0 122 0x04>;
- clocks = <&tegra_car 214>;
-};
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 091792ba993e..da929cb08463 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -137,6 +137,8 @@ properties:
- infineon,slb9645tt
# Infineon TLV493D-A1B6 I2C 3D Magnetic Sensor
- infineon,tlv493d-a1b6
+ # Infineon Multi-phase Digital VR Controller xdpe11280
+ - infineon,xdpe11280
# Infineon Multi-phase Digital VR Controller xdpe12254
- infineon,xdpe12254
# Infineon Multi-phase Digital VR Controller xdpe12284
@@ -337,6 +339,7 @@ properties:
# Thermometer with SPI interface
- ti,tmp121
- ti,tmp122
+ - ti,tmp125
# Digital Temperature Sensor
- ti,tmp275
# TI DC-DC converter on PMBus
@@ -354,6 +357,8 @@ properties:
- ti,tps544c25
# Winbond/Nuvoton H/W Monitor
- winbond,w83793
+ # Vicor Corporation Digital Supervisor
+ - vicor,pli1209bc
# i2c trusted platform module (TPM)
- winbond,wpct301
diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml
index f00867ebc147..481aaa09f3f2 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.yaml
+++ b/Documentation/devicetree/bindings/usb/dwc2.yaml
@@ -53,6 +53,7 @@ properties:
- const: st,stm32mp15-hsotg
- const: snps,dwc2
- const: samsung,s3c6400-hsotg
+ - const: intel,socfpga-agilex-hsotg
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
index a634774c537c..eedde385d299 100644
--- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
@@ -7,7 +7,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller
maintainers:
- - Roger Quadros <rogerq@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml
index f6e91a5fd8fe..4f7a212fddd3 100644
--- a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: TI Keystone Soc USB Controller
maintainers:
- - Roger Quadros <rogerq@ti.com>
+ - Roger Quadros <rogerq@kernel.org>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 294093d45a23..047a83a089ce 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1298,6 +1298,8 @@ patternProperties:
description: Vertexcom Technologies, Inc.
"^via,.*":
description: VIA Technologies, Inc.
+ "^vicor,.*":
+ description: Vicor Corporation
"^videostrong,.*":
description: Videostrong Technology Co., Ltd.
"^virtio,.*":
diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index 191fa867826a..4e3adf31c8d1 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -71,14 +71,14 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
Device (FOO) {
Name (_CRS, ResourceTemplate () {
- GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
- "\\_SB.GPI0") {15} // red
- GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
- "\\_SB.GPI0") {16} // green
- GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
- "\\_SB.GPI0") {17} // blue
- GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
- "\\_SB.GPI0") {1} // power
+ GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+ "\\_SB.GPI0", 0, ResourceConsumer) { 15 } // red
+ GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+ "\\_SB.GPI0", 0, ResourceConsumer) { 16 } // green
+ GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+ "\\_SB.GPI0", 0, ResourceConsumer) { 17 } // blue
+ GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+ "\\_SB.GPI0", 0, ResourceConsumer) { 1 } // power
})
Name (_DSD, Package () {
@@ -92,10 +92,7 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
^FOO, 2, 0, 1,
}
},
- Package () {
- "power-gpios",
- Package () {^FOO, 3, 0, 0},
- },
+ Package () { "power-gpios", Package () { ^FOO, 3, 0, 0 } },
}
})
}
diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst
index 436ba5a851d7..6a4278f409d7 100644
--- a/Documentation/driver-api/mtd/index.rst
+++ b/Documentation/driver-api/mtd/index.rst
@@ -7,6 +7,6 @@ Memory Technology Device (MTD)
.. toctree::
:maxdepth: 1
- intel-spi
+ spi-intel
nand_ecc
spi-nor
diff --git a/Documentation/driver-api/mtd/intel-spi.rst b/Documentation/driver-api/mtd/spi-intel.rst
index 0465f6879262..df854f20ead1 100644
--- a/Documentation/driver-api/mtd/intel-spi.rst
+++ b/Documentation/driver-api/mtd/spi-intel.rst
@@ -1,5 +1,5 @@
==============================
-Upgrading BIOS using intel-spi
+Upgrading BIOS using spi-intel
==============================
Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
@@ -11,12 +11,12 @@ avoid accidental (or on purpose) overwrite of the content.
Not all manufacturers protect the SPI serial flash, mainly because it
allows upgrading the BIOS image directly from an OS.
-The intel-spi driver makes it possible to read and write the SPI serial
+The spi-intel driver makes it possible to read and write the SPI serial
flash, if certain protection bits are not set and locked. If it finds
any of them set, the whole MTD device is made read-only to prevent
partial overwrites. By default the driver exposes SPI serial flash
contents as read-only but it can be changed from kernel command line,
-passing "intel-spi.writeable=1".
+passing "spi_intel.writeable=1".
Please keep in mind that overwriting the BIOS image on SPI serial flash
might render the machine unbootable and requires special equipment like
@@ -32,7 +32,7 @@ Linux.
serial flash. Distros like Debian and Fedora have this prepackaged with
name "mtd-utils".
- 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
+ 3) Add "spi_intel.writeable=1" to the kernel command line and reboot
the board (you can also reload the driver passing "writeable=1" as
module parameter to modprobe).
diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 31bd4e16fb1f..06ec04ba086f 100644
--- a/Documentation/driver-api/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -311,7 +311,7 @@ hardware.
This call must not sleep
set_ldisc(port,termios)
- Notifier for discipline change. See Documentation/driver-api/serial/tty.rst.
+ Notifier for discipline change. See Documentation/tty/tty_ldisc.rst.
Locking: caller holds tty_port->mutex
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 4cb0b9b6bfb8..030306ffa408 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -17,3 +17,4 @@ Thermal
intel_powerclamp
nouveau_thermal
x86_pkg_temperature_thermal
+ intel_dptf
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
new file mode 100644
index 000000000000..96668dca753a
--- /dev/null
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -0,0 +1,272 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface
+===============================================================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+Intel(R) Dynamic Platform and Thermal Framework (DPTF) is a platform
+level hardware/software solution for power and thermal management.
+
+As a container for multiple power/thermal technologies, DPTF provides
+a coordinated approach for different policies to effect the hardware
+state of a system.
+
+Since it is a platform level framework, this has several components.
+Some parts of the technology is implemented in the firmware and uses
+ACPI and PCI devices to expose various features for monitoring and
+control. Linux has a set of kernel drivers exposing hardware interface
+to user space. This allows user space thermal solutions like
+"Linux Thermal Daemon" to read platform specific thermal and power
+tables to deliver adequate performance while keeping the system under
+thermal limits.
+
+DPTF ACPI Drivers interface
+----------------------------
+
+:file:`/sys/bus/platform/devices/<N>/uuids`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``available_uuids`` (RO)
+ A set of UUIDs strings presenting available policies
+ which should be notified to the firmware when the
+ user space can support those policies.
+
+ UUID strings:
+
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3" : Passive 1
+
+ "3A95C389-E4B8-4629-A526-C52C88626BAE" : Active
+
+ "97C68AE7-15FA-499c-B8C9-5DA81D606E0A" : Critical
+
+ "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D" : Adaptive performance
+
+ "5349962F-71E6-431D-9AE8-0A635B710AEE" : Emergency call
+
+ "9E04115A-AE87-4D1C-9500-0F3E340BFE75" : Passive 2
+
+ "F5A35014-C209-46A4-993A-EB56DE7530A1" : Power Boss
+
+ "6ED722A7-9240-48A5-B479-31EEF723D7CF" : Virtual Sensor
+
+ "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531" : Cooling mode
+
+ "BE84BABF-C4D4-403D-B495-3128FD44dAC1" : HDC
+
+``current_uuid`` (RW)
+ User space can write strings from available UUIDs, one at a
+ time.
+
+:file:`/sys/bus/platform/devices/<N>/`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``imok`` (WO)
+ User space daemon write 1 to respond to firmware event
+ for sending keep alive notification. User space receives
+ THERMAL_EVENT_KEEP_ALIVE kobject uevent notification when
+ firmware calls for user space to respond with imok ACPI
+ method.
+
+``odvp*`` (RO)
+ Firmware thermal status variable values. Thermal tables
+ calls for different processing based on these variable
+ values.
+
+``data_vault`` (RO)
+ Binary thermal table. Refer to
+ https:/github.com/intel/thermal_daemon for decoding
+ thermal table.
+
+
+ACPI Thermal Relationship table interface
+------------------------------------------
+
+:file:`/dev/acpi_thermal_rel`
+
+ This device provides IOCTL interface to read standard ACPI
+ thermal relationship tables via ACPI methods _TRT and _ART.
+ These IOCTLs are defined in
+ drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
+
+ IOCTLs:
+
+ ACPI_THERMAL_GET_TRT_LEN: Get length of TRT table
+
+ ACPI_THERMAL_GET_ART_LEN: Get length of ART table
+
+ ACPI_THERMAL_GET_TRT_COUNT: Number of records in TRT table
+
+ ACPI_THERMAL_GET_ART_COUNT: Number of records in ART table
+
+ ACPI_THERMAL_GET_TRT: Read binary TRT table, length to read is
+ provided via argument to ioctl().
+
+ ACPI_THERMAL_GET_ART: Read binary ART table, length to read is
+ provided via argument to ioctl().
+
+DPTF ACPI Sensor drivers
+-------------------------
+
+DPTF Sensor drivers are presented as standard thermal sysfs thermal_zone.
+
+
+DPTF ACPI Cooling drivers
+--------------------------
+
+DPTF cooling drivers are presented as standard thermal sysfs cooling_device.
+
+
+DPTF Processor thermal PCI Driver interface
+--------------------------------------------
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/power_limits/`
+
+Refer to Documentation/power/powercap/powercap.rst for powercap
+ABI.
+
+``power_limit_0_max_uw`` (RO)
+ Maximum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_step_uw`` (RO)
+ Power limit increment/decrements for Intel RAPL constraint 0 power limit
+
+``power_limit_0_min_uw`` (RO)
+ Minimum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_tmin_us`` (RO)
+ Minimum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_0_tmax_us`` (RO)
+ Maximum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_1_max_uw`` (RO)
+ Maximum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_step_uw`` (RO)
+ Power limit increment/decrements for Intel RAPL constraint 1 power limit
+
+``power_limit_1_min_uw`` (RO)
+ Minimum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_tmin_us`` (RO)
+ Minimum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+``power_limit_1_tmax_us`` (RO)
+ Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/`
+
+``tcc_offset_degree_celsius`` (RW)
+ TCC offset from the critical temperature where hardware will throttle
+ CPU.
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/workload_request`
+
+``workload_available_types`` (RO)
+ Available workload types. User space can specify one of the workload type
+ it is currently executing via workload_type. For example: idle, bursty,
+ sustained etc.
+
+``workload_type`` (RW)
+ User space can specify any one of the available workload type using
+ this interface.
+
+DPTF Processor thermal RFIM interface
+--------------------------------------------
+
+RFIM interface allows adjustment of FIVR (Fully Integrated Voltage Regulator)
+and DDR (Double Data Rate)frequencies to avoid RF interference with WiFi and 5G.
+
+Switching voltage regulators (VR) generate radiated EMI or RFI at the
+fundamental frequency and its harmonics. Some harmonics may interfere
+with very sensitive wireless receivers such as Wi-Fi and cellular that
+are integrated into host systems like notebook PCs. One of mitigation
+methods is requesting SOC integrated VR (IVR) switching frequency to a
+small % and shift away the switching noise harmonic interference from
+radio channels. OEM or ODMs can use the driver to control SOC IVR
+operation within the range where it does not impact IVR performance.
+
+DRAM devices of DDR IO interface and their power plane can generate EMI
+at the data rates. Similar to IVR control mechanism, Intel offers a
+mechanism by which DDR data rates can be changed if several conditions
+are met: there is strong RFI interference because of DDR; CPU power
+management has no other restriction in changing DDR data rates;
+PC ODMs enable this feature (real time DDR RFI Mitigation referred to as
+DDR-RFIM) for Wi-Fi from BIOS.
+
+
+FIVR attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/fivr/`
+
+``vco_ref_code_lo`` (RW)
+ The VCO reference code is an 11-bit field and controls the FIVR
+ switching frequency. This is the 3-bit LSB field.
+
+``vco_ref_code_hi`` (RW)
+ The VCO reference code is an 11-bit field and controls the FIVR
+ switching frequency. This is the 8-bit MSB field.
+
+``spread_spectrum_pct`` (RW)
+ Set the FIVR spread spectrum clocking percentage
+
+``spread_spectrum_clk_enable`` (RW)
+ Enable/disable of the FIVR spread spectrum clocking feature
+
+``rfi_vco_ref_code`` (RW)
+ This field is a read only status register which reflects the
+ current FIVR switching frequency
+
+``fivr_fffc_rev`` (RW)
+ This field indicated the revision of the FIVR HW.
+
+
+DVFS attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/dvfs/`
+
+``rfi_restriction_run_busy`` (RW)
+ Request the restriction of specific DDR data rate and set this
+ value 1. Self reset to 0 after operation.
+
+``rfi_restriction_err_code`` (RW)
+ 0 :Request is accepted, 1:Feature disabled,
+ 2: the request restricts more points than it is allowed
+
+``rfi_restriction_data_rate_Delta`` (RW)
+ Restricted DDR data rate for RFI protection: Lower Limit
+
+``rfi_restriction_data_rate_Base`` (RW)
+ Restricted DDR data rate for RFI protection: Upper Limit
+
+``ddr_data_rate_point_0`` (RO)
+ DDR data rate selection 1st point
+
+``ddr_data_rate_point_1`` (RO)
+ DDR data rate selection 2nd point
+
+``ddr_data_rate_point_2`` (RO)
+ DDR data rate selection 3rd point
+
+``ddr_data_rate_point_3`` (RO)
+ DDR data rate selection 4th point
+
+``rfi_disable (RW)``
+ Disable DDR rate change feature
+
+DPTF Power supply and Battery Interface
+----------------------------------------
+
+Refer to Documentation/ABI/testing/sysfs-platform-dptf
+
+DPTF Fan Control
+----------------------------------------
+
+Refer to Documentation/admin-guide/acpi/fan_performance_states.rst
diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst
index f84e9ffdf0b4..5066113acad5 100644
--- a/Documentation/filesystems/caching/netfs-api.rst
+++ b/Documentation/filesystems/caching/netfs-api.rst
@@ -345,8 +345,9 @@ The following facilities are provided to manage this:
To support this, the following functions are provided::
- int fscache_set_page_dirty(struct page *page,
- struct fscache_cookie *cookie);
+ bool fscache_dirty_folio(struct address_space *mapping,
+ struct folio *folio,
+ struct fscache_cookie *cookie);
void fscache_unpin_writeback(struct writeback_control *wbc,
struct fscache_cookie *cookie);
void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
@@ -354,7 +355,7 @@ To support this, the following functions are provided::
const void *aux);
The *set* function is intended to be called from the filesystem's
-``set_page_dirty`` address space operation. If ``I_PINNING_FSCACHE_WB`` is not
+``dirty_folio`` address space operation. If ``I_PINNING_FSCACHE_WB`` is not
set, it sets that flag and increments the use count on the cookie (the caller
must already have called ``fscache_use_cookie()``).
diff --git a/Documentation/filesystems/dax.rst b/Documentation/filesystems/dax.rst
index e3b30429d703..c04609d8ee24 100644
--- a/Documentation/filesystems/dax.rst
+++ b/Documentation/filesystems/dax.rst
@@ -23,11 +23,11 @@ on it as usual. The `DAX` code currently only supports files with a block
size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block
size when creating the filesystem.
-Currently 4 filesystems support `DAX`: ext2, ext4, xfs and virtiofs.
+Currently 5 filesystems support `DAX`: ext2, ext4, xfs, virtiofs and erofs.
Enabling `DAX` on them is different.
-Enabling DAX on ext2
---------------------
+Enabling DAX on ext2 and erofs
+------------------------------
When mounting the filesystem, use the ``-o dax`` option on the command line or
add 'dax' to the options in ``/etc/fstab``. This works to enable `DAX` on all files
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 7119aa213be7..bef6d3040ce4 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -40,7 +40,7 @@ Here is the main features of EROFS:
Inode metadata size 32 bytes 64 bytes
Max file size 4 GB 16 EB (also limited by max. vol size)
Max uids/gids 65536 4294967296
- File change time no yes (64 + 32-bit timestamp)
+ Per-inode timestamp no yes (64 + 32-bit timestamp)
Max hardlinks 65536 4294967296
Metadata reserved 4 bytes 14 bytes
===================== ============ =====================================
diff --git a/Documentation/filesystems/ext4/blocks.rst b/Documentation/filesystems/ext4/blocks.rst
index bd722ecd92d6..b0f80ea87c90 100644
--- a/Documentation/filesystems/ext4/blocks.rst
+++ b/Documentation/filesystems/ext4/blocks.rst
@@ -39,7 +39,7 @@ For 32-bit filesystems, limits are as follows:
- 4TiB
- 8TiB
- 16TiB
- - 256PiB
+ - 256TiB
* - Blocks Per Block Group
- 8,192
- 16,384
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 4d5d50dca65c..6ccd5efb25b7 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1047,8 +1047,8 @@ astute users may notice some differences in behavior:
may be used to overwrite the source files but isn't guaranteed to be
effective on all filesystems and storage devices.
-- Direct I/O is not supported on encrypted files. Attempts to use
- direct I/O on such files will fall back to buffered I/O.
+- Direct I/O is supported on encrypted files only under some
+ circumstances. For details, see `Direct I/O support`_.
- The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
@@ -1179,6 +1179,27 @@ Inline encryption doesn't affect the ciphertext or other aspects of
the on-disk format, so users may freely switch back and forth between
using "inlinecrypt" and not using "inlinecrypt".
+Direct I/O support
+==================
+
+For direct I/O on an encrypted file to work, the following conditions
+must be met (in addition to the conditions for direct I/O on an
+unencrypted file):
+
+* The file must be using inline encryption. Usually this means that
+ the filesystem must be mounted with ``-o inlinecrypt`` and inline
+ encryption hardware must be present. However, a software fallback
+ is also available. For details, see `Inline encryption support`_.
+
+* The I/O request must be fully aligned to the filesystem block size.
+ This means that the file position the I/O is targeting, the lengths
+ of all I/O segments, and the memory addresses of all I/O buffers
+ must be multiples of this value. Note that the filesystem block
+ size may be greater than the logical block size of the block device.
+
+If either of the above conditions is not met, then direct I/O on the
+encrypted file will fall back to buffered I/O.
+
Implementation details
======================
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 3f9b1497ebb8..2998cec9af4b 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -239,7 +239,7 @@ prototypes::
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
- int (*set_page_dirty)(struct page *page);
+ bool (*dirty_folio)(struct address_space *, struct folio *folio);
void (*readahead)(struct readahead_control *);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
@@ -250,21 +250,21 @@ prototypes::
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
- void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+ void (*invalidate_folio) (struct folio *, size_t start, size_t len);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
bool (*isolate_page) (struct page *, isolate_mode_t);
int (*migratepage)(struct address_space *, struct page *, struct page *);
void (*putback_page) (struct page *);
- int (*launder_page)(struct page *);
- int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
+ int (*launder_folio)(struct folio *);
+ bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
int (*error_remove_page)(struct address_space *, struct page *);
int (*swap_activate)(struct file *);
int (*swap_deactivate)(struct file *);
locking rules:
- All except set_page_dirty and freepage may block
+ All except dirty_folio and freepage may block
====================== ======================== ========= ===============
ops PageLocked(page) i_rwsem invalidate_lock
@@ -272,20 +272,20 @@ ops PageLocked(page) i_rwsem invalidate_lock
writepage: yes, unlocks (see below)
readpage: yes, unlocks shared
writepages:
-set_page_dirty no
+dirty_folio maybe
readahead: yes, unlocks shared
readpages: no shared
write_begin: locks the page exclusive
write_end: yes, unlocks exclusive
bmap:
-invalidatepage: yes exclusive
+invalidate_folio: yes exclusive
releasepage: yes
freepage: yes
direct_IO:
isolate_page: yes
migratepage: yes (both)
putback_page: yes
-launder_page: yes
+launder_folio: yes
is_partially_uptodate: yes
error_remove_page: yes
swap_activate: no
@@ -361,22 +361,22 @@ If nr_to_write is NULL, all dirty pages must be written.
writepages should _only_ write pages which are present on
mapping->io_pages.
-->set_page_dirty() is called from various places in the kernel
-when the target page is marked as needing writeback. It may be called
-under spinlock (it cannot block) and is sometimes called with the page
-not locked.
+->dirty_folio() is called from various places in the kernel when
+the target folio is marked as needing writeback. The folio cannot be
+truncated because either the caller holds the folio lock, or the caller
+has found the folio while holding the page table lock which will block
+truncation.
->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
filesystems and by the swapper. The latter will eventually go away. Please,
keep it that way and don't breed new callers.
-->invalidatepage() is called when the filesystem must attempt to drop
+->invalidate_folio() is called when the filesystem must attempt to drop
some or all of the buffers from the page when it is being truncated. It
-returns zero on success. If ->invalidatepage is zero, the kernel uses
-block_invalidatepage() instead. The filesystem must exclusively acquire
-invalidate_lock before invalidating page cache in truncate / hole punch path
-(and thus calling into ->invalidatepage) to block races between page cache
-invalidation and page cache filling functions (fault, read, ...).
+returns zero on success. The filesystem must exclusively acquire
+invalidate_lock before invalidating page cache in truncate / hole punch
+path (and thus calling into ->invalidate_folio) to block races between page
+cache invalidation and page cache filling functions (fault, read, ...).
->releasepage() is called when the kernel is about to try to drop the
buffers from the page in preparation for freeing it. It returns zero to
@@ -386,9 +386,9 @@ the kernel assumes that the fs has no private interest in the buffers.
->freepage() is called when the kernel is done dropping the page
from the page cache.
-->launder_page() may be called prior to releasing a page if
-it is still found to be dirty. It returns zero if the page was successfully
-cleaned, or an error value if not. Note that in order to prevent the page
+->launder_folio() may be called prior to releasing a folio if
+it is still found to be dirty. It returns zero if the folio was successfully
+cleaned, or an error value if not. Note that in order to prevent the folio
getting mapped back in and redirtied, it needs to be kept locked
across the entire operation.
@@ -438,13 +438,13 @@ prototypes::
locking rules:
====================== ============= ================= =========
-ops inode->i_lock blocked_lock_lock may block
+ops flc_lock blocked_lock_lock may block
====================== ============= ================= =========
-lm_notify: yes yes no
+lm_notify: no yes no
lm_grant: no no no
lm_break: yes no no
lm_change yes no no
-lm_breaker_owns_lease: no no no
+lm_breaker_owns_lease: yes no no
====================== ============= ================= =========
buffer_head
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 136f8da3d0e2..4f373a8ec47b 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -462,6 +462,10 @@ operation table looks like the following::
struct iov_iter *iter,
netfs_io_terminated_t term_func,
void *term_func_priv);
+
+ int (*query_occupancy)(struct netfs_cache_resources *cres,
+ loff_t start, size_t len, size_t granularity,
+ loff_t *_data_start, size_t *_data_len);
};
With a termination handler function pointer::
@@ -536,6 +540,18 @@ The methods defined in the table are:
indicating whether the termination is definitely happening in the caller's
context.
+ * ``query_occupancy()``
+
+ [Required] Called to find out where the next piece of data is within a
+ particular region of the cache. The start and length of the region to be
+ queried are passed in, along with the granularity to which the answer needs
+ to be aligned. The function passes back the start and length of the data,
+ if any, available within that region. Note that there may be a hole at the
+ front.
+
+ It returns 0 if some data was found, -ENODATA if there was no usable data
+ within the region or -ENOBUFS if there is no caching on this file.
+
Note that these methods are passed a pointer to the cache resource structure,
not the read request structure as they could be used in other situations where
there isn't a read request structure as well, such as writing dirty data to the
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index bf19fd6b86e7..7c1583dbeb59 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -45,6 +45,12 @@ typically between calling iget_locked() and unlocking the inode.
At some point that will become mandatory.
+**mandatory**
+
+The foo_inode_info should always be allocated through alloc_inode_sb() rather
+than kmem_cache_alloc() or kmalloc() related to set up the inode reclaim context
+correctly.
+
---
**mandatory**
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index bf5c48066fac..4f14edf93941 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -658,7 +658,7 @@ pages, however the address_space has finer control of write sizes.
The read process essentially only requires 'readpage'. The write
process is more complicated and uses write_begin/write_end or
-set_page_dirty to write data into the address_space, and writepage and
+dirty_folio to write data into the address_space, and writepage and
writepages to writeback data to storage.
Adding and removing pages to/from an address_space is protected by the
@@ -724,7 +724,7 @@ cache in your filesystem. The following members are defined:
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
- int (*set_page_dirty)(struct page *page);
+ bool (*dirty_folio)(struct address_space *, struct folio *);
void (*readahead)(struct readahead_control *);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
@@ -735,7 +735,7 @@ cache in your filesystem. The following members are defined:
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
- void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+ void (*invalidate_folio) (struct folio *, size_t start, size_t len);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
@@ -745,10 +745,10 @@ cache in your filesystem. The following members are defined:
int (*migratepage) (struct page *, struct page *);
/* put migration-failed page back to right list */
void (*putback_page) (struct page *);
- int (*launder_page) (struct page *);
+ int (*launder_folio) (struct folio *);
- int (*is_partially_uptodate) (struct page *, unsigned long,
- unsigned long);
+ bool (*is_partially_uptodate) (struct folio *, size_t from,
+ size_t count);
void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
int (*swap_activate)(struct file *);
@@ -793,25 +793,29 @@ cache in your filesystem. The following members are defined:
This will choose pages from the address space that are tagged as
DIRTY and will pass them to ->writepage.
-``set_page_dirty``
- called by the VM to set a page dirty. This is particularly
- needed if an address space attaches private data to a page, and
- that data needs to be updated when a page is dirtied. This is
+``dirty_folio``
+ called by the VM to mark a folio as dirty. This is particularly
+ needed if an address space attaches private data to a folio, and
+ that data needs to be updated when a folio is dirtied. This is
called, for example, when a memory mapped page gets modified.
- If defined, it should set the PageDirty flag, and the
- PAGECACHE_TAG_DIRTY tag in the radix tree.
+ If defined, it should set the folio dirty flag, and the
+ PAGECACHE_TAG_DIRTY search mark in i_pages.
``readahead``
Called by the VM to read pages associated with the address_space
object. The pages are consecutive in the page cache and are
locked. The implementation should decrement the page refcount
after starting I/O on each page. Usually the page will be
- unlocked by the I/O completion handler. If the filesystem decides
- to stop attempting I/O before reaching the end of the readahead
- window, it can simply return. The caller will decrement the page
- refcount and unlock the remaining pages for you. Set PageUptodate
- if the I/O completes successfully. Setting PageError on any page
- will be ignored; simply unlock the page if an I/O error occurs.
+ unlocked by the I/O completion handler. The set of pages are
+ divided into some sync pages followed by some async pages,
+ rac->ra->async_size gives the number of async pages. The
+ filesystem should attempt to read all sync pages but may decide
+ to stop once it reaches the async pages. If it does decide to
+ stop attempting I/O, it can simply return. The caller will
+ remove the remaining pages from the address space, unlock them
+ and decrement the page refcount. Set PageUptodate if the I/O
+ completes successfully. Setting PageError on any page will be
+ ignored; simply unlock the page if an I/O error occurs.
``readpages``
called by the VM to read pages associated with the address_space
@@ -868,15 +872,15 @@ cache in your filesystem. The following members are defined:
to find out where the blocks in the file are and uses those
addresses directly.
-``invalidatepage``
- If a page has PagePrivate set, then invalidatepage will be
- called when part or all of the page is to be removed from the
+``invalidate_folio``
+ If a folio has private data, then invalidate_folio will be
+ called when part or all of the folio is to be removed from the
address space. This generally corresponds to either a
truncation, punch hole or a complete invalidation of the address
space (in the latter case 'offset' will always be 0 and 'length'
- will be PAGE_SIZE). Any private data associated with the page
+ will be folio_size()). Any private data associated with the page
should be updated to reflect this truncation. If offset is 0
- and length is PAGE_SIZE, then the private data should be
+ and length is folio_size(), then the private data should be
released, because the page must be able to be completely
discarded. This may be done by calling the ->releasepage
function, but in this case the release MUST succeed.
@@ -930,16 +934,16 @@ cache in your filesystem. The following members are defined:
``putback_page``
Called by the VM when isolated page's migration fails.
-``launder_page``
- Called before freeing a page - it writes back the dirty page.
- To prevent redirtying the page, it is kept locked during the
+``launder_folio``
+ Called before freeing a folio - it writes back the dirty folio.
+ To prevent redirtying the folio, it is kept locked during the
whole operation.
``is_partially_uptodate``
Called by the VM when reading a file through the pagecache when
- the underlying blocksize != pagesize. If the required block is
- up to date then the read can complete without needing the IO to
- bring the whole page up to date.
+ the underlying blocksize is smaller than the size of the folio.
+ If the required block is up to date then the read can complete
+ without needing I/O to bring the whole page up to date.
``is_dirty_writeback``
Called by the VM when attempting to reclaim a page. The VM uses
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 74b830b2fd59..c414646a1bb4 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -19,16 +19,17 @@ possible we decided to do following:
platform devices.
- Devices behind real busses where there is a connector resource
- are represented as struct spi_device or struct i2c_device
- (standard UARTs are not busses so there is no struct uart_device).
+ are represented as struct spi_device or struct i2c_device. Note
+ that standard UARTs are not busses so there is no struct uart_device,
+ although some of them may be represented by sturct serdev_device.
As both ACPI and Device Tree represent a tree of devices (and their
resources) this implementation follows the Device Tree way as much as
possible.
-The ACPI implementation enumerates devices behind busses (platform, SPI and
-I2C), creates the physical devices and binds them to their ACPI handle in
-the ACPI namespace.
+The ACPI implementation enumerates devices behind busses (platform, SPI,
+I2C, and in some cases UART), creates the physical devices and binds them
+to their ACPI handle in the ACPI namespace.
This means that when ACPI_HANDLE(dev) returns non-NULL the device was
enumerated from ACPI namespace. This handle can be used to extract other
@@ -46,18 +47,16 @@ some minor changes.
Adding ACPI support for an existing driver should be pretty
straightforward. Here is the simplest example::
- #ifdef CONFIG_ACPI
static const struct acpi_device_id mydrv_acpi_match[] = {
/* ACPI IDs here */
{ }
};
MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match);
- #endif
static struct platform_driver my_driver = {
...
.driver = {
- .acpi_match_table = ACPI_PTR(mydrv_acpi_match),
+ .acpi_match_table = mydrv_acpi_match,
},
};
@@ -155,7 +154,7 @@ Here is what the ACPI namespace for a SPI slave might look like::
Device (EEP0)
{
Name (_ADR, 1)
- Name (_CID, Package() {
+ Name (_CID, Package () {
"ATML0025",
"AT25",
})
@@ -172,59 +171,51 @@ The SPI device drivers only need to add ACPI IDs in a similar way than with
the platform device drivers. Below is an example where we add ACPI support
to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
- #ifdef CONFIG_ACPI
static const struct acpi_device_id at25_acpi_match[] = {
{ "AT25", 0 },
- { },
+ { }
};
MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
- #endif
static struct spi_driver at25_driver = {
.driver = {
...
- .acpi_match_table = ACPI_PTR(at25_acpi_match),
+ .acpi_match_table = at25_acpi_match,
},
};
Note that this driver actually needs more information like page size of the
-eeprom etc. but at the time writing this there is no standard way of
-passing those. One idea is to return this in _DSM method like::
+eeprom, etc. This information can be passed via _DSD method like::
Device (EEP0)
{
...
- Method (_DSM, 4, NotSerialized)
+ Name (_DSD, Package ()
{
- Store (Package (6)
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package ()
{
- "byte-len", 1024,
- "addr-mode", 2,
- "page-size, 32
- }, Local0)
-
- // Check UUIDs etc.
-
- Return (Local0)
- }
-
-Then the at25 SPI driver can get this configuration by calling _DSM on its
-ACPI handle like::
-
- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_object_list input;
- acpi_status status;
+ Package () { "size", 1024 },
+ Package () { "pagesize", 32 },
+ Package () { "address-width", 16 },
+ }
+ })
+ }
- /* Fill in the input buffer */
+Then the at25 SPI driver can get this configuration by calling device property
+APIs during ->probe() phase like::
- status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM",
- &input, &output);
- if (ACPI_FAILURE(status))
- /* Handle the error */
+ err = device_property_read_u32(dev, "size", &size);
+ if (err)
+ ...error handling...
- /* Extract the data here */
+ err = device_property_read_u32(dev, "pagesize", &page_size);
+ if (err)
+ ...error handling...
- kfree(output.pointer);
+ err = device_property_read_u32(dev, "address-width", &addr_width);
+ if (err)
+ ...error handling...
I2C serial bus support
======================
@@ -237,26 +228,24 @@ registered.
Below is an example of how to add ACPI support to the existing mpu3050
input driver::
- #ifdef CONFIG_ACPI
static const struct acpi_device_id mpu3050_acpi_match[] = {
{ "MPU3050", 0 },
- { },
+ { }
};
MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
- #endif
static struct i2c_driver mpu3050_i2c_driver = {
.driver = {
.name = "mpu3050",
- .owner = THIS_MODULE,
.pm = &mpu3050_pm,
.of_match_table = mpu3050_of_match,
- .acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
+ .acpi_match_table = mpu3050_acpi_match,
},
.probe = mpu3050_probe,
.remove = mpu3050_remove,
.id_table = mpu3050_ids,
};
+ module_i2c_driver(mpu3050_i2c_driver);
Reference to PWM device
=======================
@@ -282,9 +271,9 @@ introduced, i.e.::
}
}
}
-
})
...
+ }
In the above example the PWM-based LED driver references to the PWM channel 0
of \_SB.PCI0.PWM device with initial period setting equal to 600 ms (note that
@@ -306,26 +295,13 @@ For example::
{
Name (SBUF, ResourceTemplate()
{
- ...
// Used to power on/off the device
- GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
- IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
- 0x00, ResourceConsumer,,)
- {
- // Pin List
- 0x0055
- }
+ GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+ "\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 85 }
// Interrupt for the device
- GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
- 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
- {
- // Pin list
- 0x0058
- }
-
- ...
-
+ GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone, 0,
+ "\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 88 }
}
Return (SBUF)
@@ -337,11 +313,12 @@ For example::
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package ()
{
- Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
- Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
+ Package () { "power-gpios", Package () { ^DEV, 0, 0, 0 } },
+ Package () { "irq-gpios", Package () { ^DEV, 1, 0, 0 } },
}
})
...
+ }
These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
specifies the path to the controller. In order to use these GPIOs in Linux
@@ -460,10 +437,10 @@ namespace link::
Device (TMP0)
{
Name (_HID, "PRP0001")
- Name (_DSD, Package() {
+ Name (_DSD, Package () {
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package () {
- Package (2) { "compatible", "ti,tmp75" },
+ Package () { "compatible", "ti,tmp75" },
}
})
Method (_CRS, 0, Serialized)
diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst
index df4b711053ee..eaec732cc77c 100644
--- a/Documentation/firmware-guide/acpi/gpio-properties.rst
+++ b/Documentation/firmware-guide/acpi/gpio-properties.rst
@@ -21,18 +21,18 @@ index, like the ASL example below shows::
Name (_CRS, ResourceTemplate ()
{
GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
- "\\_SB.GPO0", 0, ResourceConsumer) {15}
+ "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
- "\\_SB.GPO0", 0, ResourceConsumer) {27, 31}
+ "\\_SB.GPO0", 0, ResourceConsumer) { 27, 31 }
})
Name (_DSD, Package ()
{
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package ()
- {
- Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
- Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
+ {
+ Package () { "reset-gpios", Package () { ^BTH, 1, 1, 0 } },
+ Package () { "shutdown-gpios", Package () { ^BTH, 0, 0, 0 } },
}
})
}
@@ -123,17 +123,17 @@ Example::
// _DSD Hierarchical Properties Extension UUID
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
Package () {
- Package () {"hog-gpio8", "G8PU"}
+ Package () { "hog-gpio8", "G8PU" }
}
})
Name (G8PU, Package () {
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
Package () {
- Package () {"gpio-hog", 1},
- Package () {"gpios", Package () {8, 0}},
- Package () {"output-high", 1},
- Package () {"line-name", "gpio8-pullup"},
+ Package () { "gpio-hog", 1 },
+ Package () { "gpios", Package () { 8, 0 } },
+ Package () { "output-high", 1 },
+ Package () { "line-name", "gpio8-pullup" },
}
})
@@ -266,15 +266,17 @@ have a device like below::
Name (_CRS, ResourceTemplate () {
GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
- "\\_SB.GPO0", 0, ResourceConsumer) {15}
+ "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
- "\\_SB.GPO0", 0, ResourceConsumer) {27}
+ "\\_SB.GPO0", 0, ResourceConsumer) { 27 }
})
}
The driver might expect to get the right GPIO when it does::
desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(desc))
+ ...error handling...
but since there is no way to know the mapping between "reset" and
the GpioIo() in _CRS desc will hold ERR_PTR(-ENOENT).
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index da138dd39883..a1212b5b3026 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -300,30 +300,6 @@ Contact: Daniel Vetter, Noralf Tronnes
Level: Advanced
-Garbage collect fbdev scrolling acceleration
---------------------------------------------
-
-Scroll acceleration has been disabled in fbcon. Now it works as the old
-SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was
-removed from fbcon_ops.
-Remaining tasks:
-
-- a bunch of the hooks in fbcon_ops could be removed or simplified by calling
- directly instead of the function table (with a switch on p->rotate)
-
-- fb_copyarea is unused after this, and can be deleted from all drivers
-
-- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as
- well as cfb_copyarea
-
-Note that not all acceleration code can be deleted, since clearing and cursor
-support is still accelerated, which might be good candidates for further
-deletion projects.
-
-Contact: Daniel Vetter
-
-Level: Intermediate
-
idr_init_base()
---------------
diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst
index 1f4bb4ba2e4b..3373e27b707d 100644
--- a/Documentation/hwmon/aquacomputer_d5next.rst
+++ b/Documentation/hwmon/aquacomputer_d5next.rst
@@ -6,22 +6,21 @@ Kernel driver aquacomputer-d5next
Supported devices:
* Aquacomputer D5 Next watercooling pump
+* Aquacomputer Farbwerk 360 RGB controller
Author: Aleksa Savic
Description
-----------
-This driver exposes hardware sensors of the Aquacomputer D5 Next watercooling
-pump, which communicates through a proprietary USB HID protocol.
+This driver exposes hardware sensors of listed Aquacomputer devices, which
+communicate through proprietary USB HID protocols.
-Available sensors are pump and fan speed, power, voltage and current, as
-well as coolant temperature. Also available through debugfs are the serial
-number, firmware version and power-on count.
-
-Attaching a fan is optional and allows it to be controlled using temperature
-curves directly from the pump. If it's not connected, the fan-related sensors
-will report zeroes.
+For the D5 Next pump, available sensors are pump and fan speed, power, voltage
+and current, as well as coolant temperature. Also available through debugfs are
+the serial number, firmware version and power-on count. Attaching a fan to it is
+optional and allows it to be controlled using temperature curves directly from the
+pump. If it's not connected, the fan-related sensors will report zeroes.
The pump can be configured either through software or via its physical
interface. Configuring the pump through this driver is not implemented, as it
@@ -29,33 +28,31 @@ seems to require sending it a complete configuration. That includes addressable
RGB LEDs, for which there is no standard sysfs interface. Thus, that task is
better suited for userspace tools.
+The Farbwerk 360 exposes four temperature sensors. Depending on the device,
+not all sysfs and debugfs entries will be available.
+
Usage notes
-----------
-The pump communicates via HID reports. The driver is loaded automatically by
+The devices communicate via HID reports. The driver is loaded automatically by
the kernel and supports hotswapping.
Sysfs entries
-------------
-============ =============================================
-temp1_input Coolant temperature (in millidegrees Celsius)
-fan1_input Pump speed (in RPM)
-fan2_input Fan speed (in RPM)
-power1_input Pump power (in micro Watts)
-power2_input Fan power (in micro Watts)
-in0_input Pump voltage (in milli Volts)
-in1_input Fan voltage (in milli Volts)
-in2_input +5V rail voltage (in milli Volts)
-curr1_input Pump current (in milli Amperes)
-curr2_input Fan current (in milli Amperes)
-============ =============================================
+================ =============================================
+temp[1-4]_input Temperature sensors (in millidegrees Celsius)
+fan[1-2]_input Pump/fan speed (in RPM)
+power[1-2]_input Pump/fan power (in micro Watts)
+in[0-2]_input Pump/fan voltage (in milli Volts)
+curr[1-2]_input Pump/fan current (in milli Amperes)
+================ =============================================
Debugfs entries
---------------
-================ ===============================================
-serial_number Serial number of the pump
+================ =================================================
+serial_number Serial number of the device
firmware_version Version of installed firmware
-power_cycles Count of how many times the pump was powered on
-================ ===============================================
+power_cycles Count of how many times the device was powered on
+================ =================================================
diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
new file mode 100644
index 000000000000..e7e8f1640f45
--- /dev/null
+++ b/Documentation/hwmon/asus_ec_sensors.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver asus_ec_sensors
+=================================
+
+Supported boards:
+ * PRIME X570-PRO,
+ * Pro WS X570-ACE,
+ * ROG CROSSHAIR VIII DARK HERO,
+ * ROG CROSSHAIR VIII HERO (WI-FI)
+ * ROG CROSSHAIR VIII FORMULA,
+ * ROG CROSSHAIR VIII HERO,
+ * ROG CROSSHAIR VIII IMPACT,
+ * ROG STRIX B550-E GAMING,
+ * ROG STRIX B550-I GAMING,
+ * ROG STRIX X570-E GAMING,
+ * ROG STRIX X570-F GAMING,
+ * ROG STRIX X570-I GAMING
+
+Authors:
+ - Eugene Shalygin <eugene.shalygin@gmail.com>
+
+Description:
+------------
+ASUS mainboards publish hardware monitoring information via Super I/O
+chip and the ACPI embedded controller (EC) registers. Some of the sensors
+are only available via the EC.
+
+The driver is aware of and reads the following sensors:
+
+1. Chipset (PCH) temperature
+2. CPU package temperature
+3. Motherboard temperature
+4. Readings from the T_Sensor header
+5. VRM temperature
+6. CPU_Opt fan RPM
+7. VRM heatsink fan RPM
+8. Chipset fan RPM
+9. Readings from the "Water flow meter" header (RPM)
+10. Readings from the "Water In" and "Water Out" temperature headers
+11. CPU current
+12. CPU core voltage
+
+Sensor values are read from EC registers, and to avoid race with the board
+firmware the driver acquires ACPI mutex, the one used by the WMI when its
+methods access the EC.
+
+Module Parameters
+-----------------
+ * mutex_path: string
+ The driver holds path to the ACPI mutex for each board (actually,
+ the path is mostly identical for them). If ASUS changes this path
+ in a future BIOS update, this parameter can be used to override
+ the stored in the driver value until it gets updated.
diff --git a/Documentation/hwmon/dell-smm-hwmon.rst b/Documentation/hwmon/dell-smm-hwmon.rst
index beec88491171..d3323a96665d 100644
--- a/Documentation/hwmon/dell-smm-hwmon.rst
+++ b/Documentation/hwmon/dell-smm-hwmon.rst
@@ -165,3 +165,183 @@ obtain the same information and to control the fan status. The ioctl
interface can be accessed from C programs or from shell using the
i8kctl utility. See the source file of ``i8kutils`` for more
information on how to use the ioctl interface.
+
+SMM Interface
+-------------
+
+.. warning:: The SMM interface was reverse-engineered by trial-and-error
+ since Dell did not provide any Documentation,
+ please keep that in mind.
+
+The driver uses the SMM interface to send commands to the system BIOS.
+This interface is normally used by Dell's 32-bit diagnostic program or
+on newer notebook models by the buildin BIOS diagnostics.
+The SMM is triggered by writing to the special ioports ``0xb2`` and ``0x84``,
+and may cause short hangs when the BIOS code is taking too long to
+execute.
+
+The SMM handler inside the system BIOS looks at the contents of the
+``eax``, ``ebx``, ``ecx``, ``edx``, ``esi`` and ``edi`` registers.
+Each register has a special purpose:
+
+=============== ==================================
+Register Purpose
+=============== ==================================
+eax Holds the command code before SMM,
+ holds the first result after SMM.
+ebx Holds the arguments.
+ecx Unknown, set to 0.
+edx Holds the second result after SMM.
+esi Unknown, set to 0.
+edi Unknown, set to 0.
+=============== ==================================
+
+The SMM handler can signal a failure by either:
+
+- setting the lower sixteen bits of ``eax`` to ``0xffff``
+- not modifying ``eax`` at all
+- setting the carry flag
+
+SMM command codes
+-----------------
+
+=============== ======================= ================================================
+Command Code Command Name Description
+=============== ======================= ================================================
+``0x0025`` Get Fn key status Returns the Fn key pressed after SMM:
+
+ - 9th bit in ``eax`` indicates Volume up
+ - 10th bit in ``eax`` indicates Volume down
+ - both bits indicate Volume mute
+
+``0xa069`` Get power status Returns current power status after SMM:
+
+ - 1st bit in ``eax`` indicates Battery connected
+ - 3th bit in ``eax`` indicates AC connected
+
+``0x00a3`` Get fan state Returns current fan state after SMM:
+
+ - 1st byte in ``eax`` holds the current
+ fan state (0 - 2 or 3)
+
+``0x01a3`` Set fan state Sets the fan speed:
+
+ - 1st byte in ``ebx`` holds the fan number
+ - 2nd byte in ``ebx`` holds the desired
+ fan state (0 - 2 or 3)
+
+``0x02a3`` Get fan speed Returns the current fan speed in RPM:
+
+ - 1st byte in ``ebx`` holds the fan number
+ - 1st word in ``eax`` holds the current
+ fan speed in RPM (after SMM)
+
+``0x03a3`` Get fan type Returns the fan type:
+
+ - 1st byte in ``ebx`` holds the fan number
+ - 1st byte in ``eax`` holds the
+ fan type (after SMM):
+
+ - 5th bit indicates docking fan
+ - 1 indicates Processor fan
+ - 2 indicates Motherboard fan
+ - 3 indicates Video fan
+ - 4 indicates Power supply fan
+ - 5 indicates Chipset fan
+ - 6 indicates other fan type
+
+``0x04a3`` Get nominal fan speed Returns the nominal RPM in each fan state:
+
+ - 1st byte in ``ebx`` holds the fan number
+ - 2nd byte in ``ebx`` holds the fan state
+ in question (0 - 2 or 3)
+ - 1st word in ``eax`` holds the nominal
+ fan speed in RPM (after SMM)
+
+``0x05a3`` Get fan speed tolerance Returns the speed tolerance for each fan state:
+
+ - 1st byte in ``ebx`` holds the fan number
+ - 2nd byte in ``ebx`` holds the fan state
+ in question (0 - 2 or 3)
+ - 1st byte in ``eax`` returns the speed
+ tolerance
+
+``0x10a3`` Get sensor temperature Returns the measured temperature:
+
+ - 1st byte in ``ebx`` holds the sensor number
+ - 1st byte in ``eax`` holds the measured
+ temperature (after SMM)
+
+``0x11a3`` Get sensor type Returns the sensor type:
+
+ - 1st byte in ``ebx`` holds the sensor number
+ - 1st byte in ``eax`` holds the
+ temperature type (after SMM):
+
+ - 1 indicates CPU sensor
+ - 2 indicates GPU sensor
+ - 3 indicates SODIMM sensor
+ - 4 indicates other sensor type
+ - 5 indicates Ambient sensor
+ - 6 indicates other sensor type
+
+``0xfea3`` Get SMM signature Returns Dell signature if interface
+ is supported (after SMM):
+
+ - ``eax`` holds 1145651527
+ (0x44494147 or "DIAG")
+ - ``edx`` holds 1145392204
+ (0x44454c4c or "DELL")
+
+``0xffa3`` Get SMM signature Same as ``0xfea3``, check both.
+=============== ======================= ================================================
+
+There are additional commands for enabling (``0x31a3`` or ``0x35a3``) and
+disabling (``0x30a3`` or ``0x34a3``) automatic fan speed control.
+The commands are however causing severe sideeffects on many machines, so
+they are not used by default.
+
+On several machines (Inspiron 3505, Precision 490, Vostro 1720, ...), the
+fans supports a 4th "magic" state, which signals the BIOS that automatic
+fan control should be enabled for a specific fan.
+However there are also some machines who do support a 4th regular fan state too,
+but in case of the "magic" state, the nominal RPM reported for this state is a
+placeholder value, which however is not always detectable.
+
+Firmware Bugs
+-------------
+
+The SMM calls can behave erratic on some machines:
+
+======================================================= =================
+Firmware Bug Affected Machines
+======================================================= =================
+Reading of fan states return spurious errors. Precision 490
+
+Reading of fan types causes erratic fan behaviour. Studio XPS 8000
+
+ Studio XPS 8100
+
+ Inspiron 580
+
+Fan-related SMM calls take too long (about 500ms). Inspiron 7720
+
+ Vostro 3360
+
+ XPS 13 9333
+
+ XPS 15 L502X
+======================================================= =================
+
+In case you experience similar issues on your Dell machine, please
+submit a bugreport on bugzilla to we can apply workarounds.
+
+Limitations
+-----------
+
+The SMM calls can take too long to execute on some machines, causing
+short hangs and/or audio glitches.
+Also the fan state needs to be restored after suspend, as well as
+the automatic mode settings.
+When reading a temperature sensor, values above 127 degrees indicate
+a BIOS read error or a deactivated sensor.
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index df20022c741f..9d2787a12a69 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -43,6 +43,7 @@ Hardware Monitoring Kernel Drivers
asb100
asc7621
aspeed-pwm-tacho
+ asus_ec_sensors
asus_wmi_ec_sensors
asus_wmi_sensors
bcm54140
@@ -160,6 +161,7 @@ Hardware Monitoring Kernel Drivers
pc87427
pcf8591
pim4328
+ pli1209bc
pm6764tr
pmbus
powr1220
@@ -193,6 +195,7 @@ Hardware Monitoring Kernel Drivers
tmp108
tmp401
tmp421
+ tmp464
tmp513
tps23861
tps40422
diff --git a/Documentation/hwmon/lm70.rst b/Documentation/hwmon/lm70.rst
index 6ddc5b67ccb5..11303a7e16a8 100644
--- a/Documentation/hwmon/lm70.rst
+++ b/Documentation/hwmon/lm70.rst
@@ -15,6 +15,10 @@ Supported chips:
Information: https://www.ti.com/product/tmp122
+ * Texas Instruments TMP125
+
+ Information: https://www.ti.com/product/tmp125
+
* National Semiconductor LM71
Datasheet: https://www.ti.com/product/LM71
@@ -53,6 +57,9 @@ The LM74 and TMP121/TMP122/TMP123/TMP124 are very similar; main difference is
The TMP122/TMP124 also feature configurable temperature thresholds.
+The TMP125 is less accurate and provides 10-bit temperature data
+with 0.25 degrees Celsius resolution.
+
The LM71 is also very similar; main difference is 14-bit temperature
data (0.03125 degrees celsius resolution).
diff --git a/Documentation/hwmon/max6639.rst b/Documentation/hwmon/max6639.rst
index 3da54225f83c..c85d285a3489 100644
--- a/Documentation/hwmon/max6639.rst
+++ b/Documentation/hwmon/max6639.rst
@@ -9,7 +9,7 @@ Supported chips:
Addresses scanned: I2C 0x2c, 0x2e, 0x2f
- Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6639.pdf
+ Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX6639-MAX6639F.pdf
Authors:
- He Changqing <hechangqing@semptian.com>
diff --git a/Documentation/hwmon/pli1209bc.rst b/Documentation/hwmon/pli1209bc.rst
new file mode 100644
index 000000000000..ea5b3f68a515
--- /dev/null
+++ b/Documentation/hwmon/pli1209bc.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver pli1209bc
+=======================
+
+Supported chips:
+
+ * Digital Supervisor PLI1209BC
+
+ Prefix: 'pli1209bc'
+
+ Addresses scanned: 0x50 - 0x5F
+
+ Datasheet: https://www.vicorpower.com/documents/datasheets/ds-PLI1209BCxyzz-VICOR.pdf
+
+Authors:
+ - Marcello Sylvester Bauer <sylv@sylv.io>
+
+Description
+-----------
+
+The Vicor PLI1209BC is an isolated digital power system supervisor that provides
+a communication interface between a host processor and one Bus Converter Module
+(BCM). The PLI communicates with a system controller via a PMBus compatible
+interface over an isolated UART interface. Through the PLI, the host processor
+can configure, set protection limits, and monitor the BCM.
+
+Sysfs entries
+-------------
+
+======================= ========================================================
+in1_label "vin2"
+in1_input Input voltage.
+in1_rated_min Minimum rated input voltage.
+in1_rated_max Maximum rated input voltage.
+in1_max Maximum input voltage.
+in1_max_alarm Input voltage high alarm.
+in1_crit Critical input voltage.
+in1_crit_alarm Input voltage critical alarm.
+
+in2_label "vout2"
+in2_input Output voltage.
+in2_rated_min Minimum rated output voltage.
+in2_rated_max Maximum rated output voltage.
+in2_alarm Output voltage alarm
+
+curr1_label "iin2"
+curr1_input Input current.
+curr1_max Maximum input current.
+curr1_max_alarm Maximum input current high alarm.
+curr1_crit Critical input current.
+curr1_crit_alarm Input current critical alarm.
+
+curr2_label "iout2"
+curr2_input Output current.
+curr2_crit Critical output current.
+curr2_crit_alarm Output current critical alarm.
+curr2_max Maximum output current.
+curr2_max_alarm Output current high alarm.
+
+power1_label "pin2"
+power1_input Input power.
+power1_alarm Input power alarm.
+
+power2_label "pout2"
+power2_input Output power.
+power2_rated_max Maximum rated output power.
+
+temp1_input Die temperature.
+temp1_alarm Die temperature alarm.
+temp1_max Maximum die temperature.
+temp1_max_alarm Die temperature high alarm.
+temp1_crit Critical die temperature.
+temp1_crit_alarm Die temperature critical alarm.
+======================= ========================================================
diff --git a/Documentation/hwmon/sch5627.rst b/Documentation/hwmon/sch5627.rst
index 187682e99114..ecb4fc84d045 100644
--- a/Documentation/hwmon/sch5627.rst
+++ b/Documentation/hwmon/sch5627.rst
@@ -20,6 +20,10 @@ Description
SMSC SCH5627 Super I/O chips include complete hardware monitoring
capabilities. They can monitor up to 5 voltages, 4 fans and 8 temperatures.
+In addition, the SCH5627 exports data describing which temperature sensors
+affect the speed of each fan. Setting pwmX_auto_channels_temp to 0 forces
+the corresponding fan to full speed until another value is written.
+
The SMSC SCH5627 hardware monitoring part also contains an integrated
watchdog. In order for this watchdog to function some motherboard specific
initialization most be done by the BIOS, so if the watchdog is not enabled
diff --git a/Documentation/hwmon/sysfs-interface.rst b/Documentation/hwmon/sysfs-interface.rst
index 85652a6aaa3e..209626fb2405 100644
--- a/Documentation/hwmon/sysfs-interface.rst
+++ b/Documentation/hwmon/sysfs-interface.rst
@@ -99,6 +99,10 @@ Global attributes
`name`
The chip name.
+`label`
+ A descriptive label that allows to uniquely identify a device
+ within the system.
+
`update_interval`
The interval at which the chip will update readings.
diff --git a/Documentation/hwmon/tmp464.rst b/Documentation/hwmon/tmp464.rst
new file mode 100644
index 000000000000..7596e7623d06
--- /dev/null
+++ b/Documentation/hwmon/tmp464.rst
@@ -0,0 +1,73 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver tmp464
+====================
+
+Supported chips:
+
+ * Texas Instruments TMP464
+
+ Prefix: 'tmp464'
+
+ Addresses scanned: I2C 0x48, 0x49, 0x4a and 0x4b
+
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp464.html
+
+ * Texas Instruments TMP468
+
+ Prefix: 'tmp468'
+
+ Addresses scanned: I2C 0x48, 0x49, 0x4a and 0x4b
+
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp468.html
+
+Authors:
+
+ Agathe Porte <agathe.porte@nokia.com>
+ Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for Texas Instruments TMP464 and TMP468
+temperature sensor chips. TMP464 provides one local and four remote
+sensors. TMP468 provides one local and eight remote sensors.
+Temperature is measured in degrees Celsius. The chips are wired over
+I2C/SMBus and specified over a temperature range of -40 to +125 degrees
+Celsius. Resolution for both the local and remote channels is 0.0625
+degree C.
+
+The chips support only temperature measurements. The driver exports
+temperature values, limits, and alarms via the following sysfs files:
+
+**temp[1-9]_input**
+
+**temp[1-9]_max**
+
+**temp[1-9]_max_hyst**
+
+**temp[1-9]_max_alarm**
+
+**temp[1-9]_crit**
+
+**temp[1-9]_crit_alarm**
+
+**temp[1-9]_crit_hyst**
+
+**temp[2-9]_offset**
+
+**temp[2-9]_fault**
+
+Each sensor can be individually disabled via Devicetree or from sysfs
+via:
+
+**temp[1-9]_enable**
+
+If labels were specified in Devicetree, additional sysfs files will
+be present:
+
+**temp[1-9]_label**
+
+The update interval is configurable with the following sysfs attribute.
+
+**update_interval**
diff --git a/Documentation/hwmon/xdpe12284.rst b/Documentation/hwmon/xdpe12284.rst
index 67d1f87808e5..a224dc74ad35 100644
--- a/Documentation/hwmon/xdpe12284.rst
+++ b/Documentation/hwmon/xdpe12284.rst
@@ -5,6 +5,10 @@ Kernel driver xdpe122
Supported chips:
+ * Infineon XDPE11280
+
+ Prefix: 'xdpe11280'
+
* Infineon XDPE12254
Prefix: 'xdpe12254'
@@ -20,10 +24,10 @@ Authors:
Description
-----------
-This driver implements support for Infineon Multi-phase XDPE122 family
-dual loop voltage regulators.
-The family includes XDPE12284 and XDPE12254 devices.
-The devices from this family complaint with:
+This driver implements support for Infineon Multi-phase XDPE112 and XDPE122
+family dual loop voltage regulators.
+These families include XDPE11280, XDPE12284 and XDPE12254 devices.
+The devices from this family compliant with:
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
converter specification.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 2b4de3926858..b58692d687f6 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -166,6 +166,7 @@ to ReStructured Text format, or are simply too old.
.. toctree::
:maxdepth: 2
+ tools/index
staging/index
watch_queue
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index e6cd40663ea5..4cbd50edf277 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -295,7 +295,7 @@ Pete Zaitcev gives the following summary:
- If you are in a process context (any syscall) and want to lock other
process out, use a mutex. You can take a mutex and sleep
- (``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``).
+ (``copy_from_user()`` or ``kmalloc(x,GFP_KERNEL)``).
- Otherwise (== data can be touched in an interrupt), use
spin_lock_irqsave() and
diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst
index 4fd7b70fcde1..bfa75ea1b66a 100644
--- a/Documentation/locking/locktypes.rst
+++ b/Documentation/locking/locktypes.rst
@@ -247,7 +247,7 @@ based on rt_mutex which changes the semantics:
Non-PREEMPT_RT kernels disable preemption to get this effect.
PREEMPT_RT kernels use a per-CPU lock for serialization which keeps
- preemption disabled. The lock disables softirq handlers and also
+ preemption enabled. The lock disables softirq handlers and also
prevents reentrancy due to task preemption.
PREEMPT_RT kernels preserve all other spinlock_t semantics:
diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst
index c2121c1e55d7..c269f5e1a0a3 100644
--- a/Documentation/process/applying-patches.rst
+++ b/Documentation/process/applying-patches.rst
@@ -249,6 +249,10 @@ The 5.x.y (-stable) and 5.x patches live at
https://www.kernel.org/pub/linux/kernel/v5.x/
+The 5.x.y incremental patches live at
+
+ https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+
The -rc patches are not stored on the webserver but are generated on
demand from git tags such as
@@ -308,12 +312,11 @@ versions.
If no 5.x.y kernel is available, then the highest numbered 5.x kernel is
the current stable kernel.
-.. note::
+The -stable team provides normal as well as incremental patches. Below is
+how to apply these patches.
- The -stable team usually do make incremental patches available as well
- as patches against the latest mainline release, but I only cover the
- non-incremental ones below. The incremental ones can be found at
- https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+Normal patches
+~~~~~~~~~~~~~~
These patches are not incremental, meaning that for example the 5.7.3
patch does not apply on top of the 5.7.2 kernel source, but rather on top
@@ -331,6 +334,21 @@ Here's a small example::
$ cd ..
$ mv linux-5.7.2 linux-5.7.3 # rename the kernel source dir
+Incremental patches
+~~~~~~~~~~~~~~~~~~~
+
+Incremental patches are different: instead of being applied on top
+of base 5.x kernel, they are applied on top of previous stable kernel
+(5.x.y-1).
+
+Here's the example to apply these::
+
+ $ cd ~/linux-5.7.2 # change to the kernel source dir
+ $ patch -p1 < ../patch-5.7.2-3 # apply the new 5.7.3 patch
+ $ cd ..
+ $ mv linux-5.7.2 linux-5.7.3 # rename the kernel source dir
+
+
The -rc kernels
===============
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 388cb19f5dbb..a6e36d9c3d14 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -71,6 +71,9 @@ Instead, the 2-factor form of the allocator should be used::
foo = kmalloc_array(count, size, GFP_KERNEL);
+Specifically, kmalloc() can be replaced with kmalloc_array(), and
+kzalloc() can be replaced with kcalloc().
+
If no 2-factor form is available, the saturate-on-overflow helpers should
be used::
@@ -91,9 +94,20 @@ Instead, use the helper::
array usage and switch to a `flexible array member
<#zero-length-and-one-element-arrays>`_ instead.
-See array_size(), array3_size(), and struct_size(),
-for more details as well as the related check_add_overflow() and
-check_mul_overflow() family of functions.
+For other calculations, please compose the use of the size_mul(),
+size_add(), and size_sub() helpers. For example, in the case of::
+
+ foo = krealloc(current_size + chunk_size * (count - 3), GFP_KERNEL);
+
+Instead, use the helpers::
+
+ foo = krealloc(size_add(current_size,
+ size_mul(chunk_size,
+ size_sub(count, 3))), GFP_KERNEL);
+
+For more details, also see array3_size() and flex_array_size(),
+as well as the related check_mul_overflow(), check_add_overflow(),
+check_sub_overflow(), and check_shl_overflow() family of functions.
simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
----------------------------------------------------------------------
diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst
new file mode 100644
index 000000000000..abb741b1aeee
--- /dev/null
+++ b/Documentation/process/handling-regressions.rst
@@ -0,0 +1,746 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. See the bottom of this file for additional redistribution information.
+
+Handling regressions
+++++++++++++++++++++
+
+*We don't cause regressions* -- this document describes what this "first rule of
+Linux kernel development" means in practice for developers. It complements
+Documentation/admin-guide/reporting-regressions.rst, which covers the topic from a
+user's point of view; if you never read that text, go and at least skim over it
+before continuing here.
+
+The important bits (aka "The TL;DR")
+====================================
+
+#. Ensure subscribers of the `regression mailing list <https://lore.kernel.org/regressions/>`_
+ (regressions@lists.linux.dev) quickly become aware of any new regression
+ report:
+
+ * When receiving a mailed report that did not CC the list, bring it into the
+ loop by immediately sending at least a brief "Reply-all" with the list
+ CCed.
+
+ * Forward or bounce any reports submitted in bug trackers to the list.
+
+#. Make the Linux kernel regression tracking bot "regzbot" track the issue (this
+ is optional, but recommended):
+
+ * For mailed reports, check if the reporter included a line like ``#regzbot
+ introduced v5.13..v5.14-rc1``. If not, send a reply (with the regressions
+ list in CC) containing a paragraph like the following, which tells regzbot
+ when the issue started to happen::
+
+ #regzbot ^introduced 1f2e3d4c5b6a
+
+ * When forwarding reports from a bug tracker to the regressions list (see
+ above), include a paragraph like the following::
+
+ #regzbot introduced: v5.13..v5.14-rc1
+ #regzbot from: Some N. Ice Human <some.human@example.com>
+ #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+#. When submitting fixes for regressions, add "Link:" tags to the patch
+ description pointing to all places where the issue was reported, as
+ mandated by Documentation/process/submitting-patches.rst and
+ :ref:`Documentation/process/5.Posting.rst <development_posting>`.
+
+#. Try to fix regressions quickly once the culprit has been identified; fixes
+ for most regressions should be merged within two weeks, but some need to be
+ resolved within two or three days.
+
+
+All the details on Linux kernel regressions relevant for developers
+===================================================================
+
+
+The important basics in more detail
+-----------------------------------
+
+
+What to do when receiving regression reports
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ensure the Linux kernel's regression tracker and others subscribers of the
+`regression mailing list <https://lore.kernel.org/regressions/>`_
+(regressions@lists.linux.dev) become aware of any newly reported regression:
+
+ * When you receive a report by mail that did not CC the list, immediately bring
+ it into the loop by sending at least a brief "Reply-all" with the list CCed;
+ try to ensure it gets CCed again in case you reply to a reply that omitted
+ the list.
+
+ * If a report submitted in a bug tracker hits your Inbox, forward or bounce it
+ to the list. Consider checking the list archives beforehand, if the reporter
+ already forwarded the report as instructed by
+ Documentation/admin-guide/reporting-issues.rst.
+
+When doing either, consider making the Linux kernel regression tracking bot
+"regzbot" immediately start tracking the issue:
+
+ * For mailed reports, check if the reporter included a "regzbot command" like
+ ``#regzbot introduced 1f2e3d4c5b6a``. If not, send a reply (with the
+ regressions list in CC) with a paragraph like the following:::
+
+ #regzbot ^introduced: v5.13..v5.14-rc1
+
+ This tells regzbot the version range in which the issue started to happen;
+ you can specify a range using commit-ids as well or state a single commit-id
+ in case the reporter bisected the culprit.
+
+ Note the caret (^) before the "introduced": it tells regzbot to treat the
+ parent mail (the one you reply to) as the initial report for the regression
+ you want to see tracked; that's important, as regzbot will later look out
+ for patches with "Link:" tags pointing to the report in the archives on
+ lore.kernel.org.
+
+ * When forwarding a regressions reported to a bug tracker, include a paragraph
+ with these regzbot commands::
+
+ #regzbot introduced: 1f2e3d4c5b6a
+ #regzbot from: Some N. Ice Human <some.human@example.com>
+ #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+ Regzbot will then automatically associate patches with the report that
+ contain "Link:" tags pointing to your mail or the mentioned ticket.
+
+What's important when fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You don't need to do anything special when submitting fixes for regression, just
+remember to do what Documentation/process/submitting-patches.rst,
+:ref:`Documentation/process/5.Posting.rst <development_posting>`, and
+Documentation/process/stable-kernel-rules.rst already explain in more detail:
+
+ * Point to all places where the issue was reported using "Link:" tags::
+
+ Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+ Link: https://bugzilla.kernel.org/show_bug.cgi?id=1234567890
+
+ * Add a "Fixes:" tag to specify the commit causing the regression.
+
+ * If the culprit was merged in an earlier development cycle, explicitly mark
+ the fix for backporting using the ``Cc: stable@vger.kernel.org`` tag.
+
+All this is expected from you and important when it comes to regression, as
+these tags are of great value for everyone (you included) that might be looking
+into the issue weeks, months, or years later. These tags are also crucial for
+tools and scripts used by other kernel developers or Linux distributions; one of
+these tools is regzbot, which heavily relies on the "Link:" tags to associate
+reports for regression with changes resolving them.
+
+Prioritize work on fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+In the end though, developers should give their best to prevent users from
+running into situations where a regression leaves them only three options: "run
+a kernel with a regression that seriously impacts usage", "continue running an
+outdated and thus potentially insecure kernel version for more than two weeks
+after a regression's culprit was identified", and "downgrade to a still
+supported kernel series that lack required features".
+
+How to realize this depends a lot on the situation. Here are a few rules of
+thumb for you, in order or importance:
+
+ * Prioritize work on handling regression reports and fixing regression over all
+ other Linux kernel work, unless the latter concerns acute security issues or
+ bugs causing data loss or damage.
+
+ * Always consider reverting the culprit commits and reapplying them later
+ together with necessary fixes, as this might be the least dangerous and
+ quickest way to fix a regression.
+
+ * Developers should handle regressions in all supported kernel series, but are
+ free to delegate the work to the stable team, if the issue probably at no
+ point in time occurred with mainline.
+
+ * Try to resolve any regressions introduced in the current development before
+ its end. If you fear a fix might be too risky to apply only days before a new
+ mainline release, let Linus decide: submit the fix separately to him as soon
+ as possible with the explanation of the situation. He then can make a call
+ and postpone the release if necessary, for example if multiple such changes
+ show up in his inbox.
+
+ * Address regressions in stable, longterm, or proper mainline releases with
+ more urgency than regressions in mainline pre-releases. That changes after
+ the release of the fifth pre-release, aka "-rc5": mainline then becomes as
+ important, to ensure all the improvements and fixes are ideally tested
+ together for at least one week before Linus releases a new mainline version.
+
+ * Fix regressions within two or three days, if they are critical for some
+ reason -- for example, if the issue is likely to affect many users of the
+ kernel series in question on all or certain architectures. Note, this
+ includes mainline, as issues like compile errors otherwise might prevent many
+ testers or continuous integration systems from testing the series.
+
+ * Aim to fix regressions within one week after the culprit was identified, if
+ the issue was introduced in either:
+
+ * a recent stable/longterm release
+
+ * the development cycle of the latest proper mainline release
+
+ In the latter case (say Linux v5.14), try to address regressions even
+ quicker, if the stable series for the predecessor (v5.13) will be abandoned
+ soon or already was stamped "End-of-Life" (EOL) -- this usually happens about
+ three to four weeks after a new mainline release.
+
+ * Try to fix all other regressions within two weeks after the culprit was
+ found. Two or three additional weeks are acceptable for performance
+ regressions and other issues which are annoying, but don't prevent anyone
+ from running Linux (unless it's an issue in the current development cycle,
+ as those should ideally be addressed before the release). A few weeks in
+ total are acceptable if a regression can only be fixed with a risky change
+ and at the same time is affecting only a few users; as much time is
+ also okay if the regression is already present in the second newest longterm
+ kernel series.
+
+Note: The aforementioned time frames for resolving regressions are meant to
+include getting the fix tested, reviewed, and merged into mainline, ideally with
+the fix being in linux-next at least briefly. This leads to delays you need to
+account for.
+
+Subsystem maintainers are expected to assist in reaching those periods by doing
+timely reviews and quick handling of accepted patches. They thus might have to
+send git-pull requests earlier or more often than usual; depending on the fix,
+it might even be acceptable to skip testing in linux-next. Especially fixes for
+regressions in stable and longterm kernels need to be handled quickly, as fixes
+need to be merged in mainline before they can be backported to older series.
+
+
+More aspects regarding regressions developers should be aware of
+----------------------------------------------------------------
+
+
+How to deal with changes where a risk of regression is known
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Evaluate how big the risk of regressions is, for example by performing a code
+search in Linux distributions and Git forges. Also consider asking other
+developers or projects likely to be affected to evaluate or even test the
+proposed change; if problems surface, maybe some solution acceptable for all
+can be found.
+
+If the risk of regressions in the end seems to be relatively small, go ahead
+with the change, but let all involved parties know about the risk. Hence, make
+sure your patch description makes this aspect obvious. Once the change is
+merged, tell the Linux kernel's regression tracker and the regressions mailing
+list about the risk, so everyone has the change on the radar in case reports
+trickle in. Depending on the risk, you also might want to ask the subsystem
+maintainer to mention the issue in his mainline pull request.
+
+What else is there to known about regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out Documentation/admin-guide/reporting-regressions.rst, it covers a lot
+of other aspects you want might want to be aware of:
+
+ * the purpose of the "no regressions rule"
+
+ * what issues actually qualify as regression
+
+ * who's in charge for finding the root cause of a regression
+
+ * how to handle tricky situations, e.g. when a regression is caused by a
+ security fix or when fixing a regression might cause another one
+
+Whom to ask for advice when it comes to regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
+CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+More about regression tracking and regzbot
+------------------------------------------
+
+
+Why the Linux kernel has a regression tracker, and why is regzbot used?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for the Linux kernel as well. That's why Thorsten Leemhuis volunteered to
+keep an eye on things as the Linux kernel's regression tracker, who's
+occasionally helped by other people. Neither of them are paid to do this,
+that's why regression tracking is done on a best effort basis.
+
+Earlier attempts to manually track regressions have shown it's an exhausting and
+frustrating work, which is why they were abandoned after a while. To prevent
+this from happening again, Thorsten developed regzbot to facilitate the work,
+with the long term goal to automate regression tracking as much as possible for
+everyone involved.
+
+How does regression tracking work with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot watches for replies to reports of tracked regressions. Additionally,
+it's looking out for posted or committed patches referencing such reports
+with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+Regzbot tries to do its job with as little overhead as possible for both
+reporters and developers. In fact, only reporters are burdened with an extra
+duty: they need to tell regzbot about the regression report using the ``#regzbot
+introduced`` command outlined above; if they don't do that, someone else can
+take care of that using ``#regzbot ^introduced``.
+
+For developers there normally is no extra work involved, they just need to make
+sure to do something that was expected long before regzbot came to light: add
+"Link:" tags to the patch description pointing to all reports about the issue
+fixed.
+
+Do I have to use regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's in the interest of everyone if you do, as kernel maintainers like Linus
+Torvalds partly rely on regzbot's tracking in their work -- for example when
+deciding to release a new version or extend the development phase. For this they
+need to be aware of all unfixed regression; to do that, Linus is known to look
+into the weekly reports sent by regzbot.
+
+Do I have to tell regzbot about every regression I stumble upon?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ideally yes: we are all humans and easily forget problems when something more
+important unexpectedly comes up -- for example a bigger problem in the Linux
+kernel or something in real life that's keeping us away from keyboards for a
+while. Hence, it's best to tell regzbot about every regression, except when you
+immediately write a fix and commit it to a tree regularly merged to the affected
+kernel series.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_
+for the latest info; alternatively, `search for the latest regression report
+<https://lore.kernel.org/lkml/?q=%22Linux+regressions+report%22+f%3Aregzbot>`_,
+which regzbot normally sends out once a week on Sunday evening (UTC), which is a
+few hours before Linus usually publishes new (pre-)releases.
+
+What places is regzbot monitoring?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Regzbot is watching the most important Linux mailing lists as well as the git
+repositories of linux-next, mainline, and stable/longterm.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+use regzbot to track severe issues, like reports about hangs, corrupted data,
+or internal errors (Panic, Oops, BUG(), warning, ...).
+
+Can I add regressions found by CI systems to regzbot's tracking?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Feel free to do so, if the particular regression likely has impact on practical
+use cases and thus might be noticed by users; hence, please don't involve
+regzbot for theoretical regressions unlikely to show themselves in real world
+usage.
+
+How to interact with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+regression report. These commands need to be in their own paragraph (IOW: they
+need to be separated from the rest of the mail using blank lines).
+
+One such command is ``#regzbot introduced <version or commit>``, which makes
+regzbot consider your mail as a regressions report added to the tracking, as
+already described above; ``#regzbot ^introduced <version or commit>`` is another
+such command, which makes regzbot consider the parent mail as a report for a
+regression which it starts to track.
+
+Once one of those two commands has been utilized, other regzbot commands can be
+used in direct or indirect replies to the report. You can write them below one
+of the `introduced` commands or in replies to the mail that used one of them
+or itself is a reply to that mail:
+
+ * Set or update the title::
+
+ #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+ the issue or a fix are discussed -- for example the posting of a patch fixing
+ the regression::
+
+ #regzbot monitor: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/
+
+ Monitoring only works for lore.kernel.org and bugzilla.kernel.org; regzbot
+ will consider all messages in that thread or ticket as related to the fixing
+ process.
+
+ * Point to a place with further details of interest, like a mailing list post
+ or a ticket in a bug tracker that are slightly related, but about a different
+ topic::
+
+ #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as fixed by a commit that is heading upstream or already
+ landed::
+
+ #regzbot fixed-by: 1f2e3d4c5d
+
+ * Mark a regression as a duplicate of another one already tracked by regzbot::
+
+ #regzbot dup-of: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/
+
+ * Mark a regression as invalid::
+
+ #regzbot invalid: wasn't a regression, problem has always existed
+
+Is there more to tell about regzbot and its commands?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+More detailed and up-to-date information about the Linux
+kernel's regression tracking bot can be found on its
+`project page <https://gitlab.com/knurd42/regzbot>`_, which among others
+contains a `getting started guide <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_
+and `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+which both cover more details than the above section.
+
+Quotes from Linus about regression
+----------------------------------
+
+Find below a few real life examples of how Linus Torvalds expects regressions to
+be handled:
+
+ * From `2017-10-26 (1/2)
+ <https://lore.kernel.org/lkml/CA+55aFwiiQYJ+YoLKCXjN_beDVfu38mg=Ggg5LFOcqHE8Qi7Zw@mail.gmail.com/>`_::
+
+ If you break existing user space setups THAT IS A REGRESSION.
+
+ It's not ok to say "but we'll fix the user space setup".
+
+ Really. NOT OK.
+
+ [...]
+
+ The first rule is:
+
+ - we don't cause regressions
+
+ and the corollary is that when regressions *do* occur, we admit to
+ them and fix them, instead of blaming user space.
+
+ The fact that you have apparently been denying the regression now for
+ three weeks means that I will revert, and I will stop pulling apparmor
+ requests until the people involved understand how kernel development
+ is done.
+
+ * From `2017-10-26 (2/2)
+ <https://lore.kernel.org/lkml/CA+55aFxW7NMAMvYhkvz1UPbUTUJewRt6Yb51QAx5RtrWOwjebg@mail.gmail.com/>`_::
+
+ People should basically always feel like they can update their kernel
+ and simply not have to worry about it.
+
+ I refuse to introduce "you can only update the kernel if you also
+ update that other program" kind of limitations. If the kernel used to
+ work for you, the rule is that it continues to work for you.
+
+ There have been exceptions, but they are few and far between, and they
+ generally have some major and fundamental reasons for having happened,
+ that were basically entirely unavoidable, and people _tried_hard_ to
+ avoid them. Maybe we can't practically support the hardware any more
+ after it is decades old and nobody uses it with modern kernels any
+ more. Maybe there's a serious security issue with how we did things,
+ and people actually depended on that fundamentally broken model. Maybe
+ there was some fundamental other breakage that just _had_ to have a
+ flag day for very core and fundamental reasons.
+
+ And notice that this is very much about *breaking* peoples environments.
+
+ Behavioral changes happen, and maybe we don't even support some
+ feature any more. There's a number of fields in /proc/<pid>/stat that
+ are printed out as zeroes, simply because they don't even *exist* in
+ the kernel any more, or because showing them was a mistake (typically
+ an information leak). But the numbers got replaced by zeroes, so that
+ the code that used to parse the fields still works. The user might not
+ see everything they used to see, and so behavior is clearly different,
+ but things still _work_, even if they might no longer show sensitive
+ (or no longer relevant) information.
+
+ But if something actually breaks, then the change must get fixed or
+ reverted. And it gets fixed in the *kernel*. Not by saying "well, fix
+ your user space then". It was a kernel change that exposed the
+ problem, it needs to be the kernel that corrects for it, because we
+ have a "upgrade in place" model. We don't have a "upgrade with new
+ user space".
+
+ And I seriously will refuse to take code from people who do not
+ understand and honor this very simple rule.
+
+ This rule is also not going to change.
+
+ And yes, I realize that the kernel is "special" in this respect. I'm
+ proud of it.
+
+ I have seen, and can point to, lots of projects that go "We need to
+ break that use case in order to make progress" or "you relied on
+ undocumented behavior, it sucks to be you" or "there's a better way to
+ do what you want to do, and you have to change to that new better
+ way", and I simply don't think that's acceptable outside of very early
+ alpha releases that have experimental users that know what they signed
+ up for. The kernel hasn't been in that situation for the last two
+ decades.
+
+ We do API breakage _inside_ the kernel all the time. We will fix
+ internal problems by saying "you now need to do XYZ", but then it's
+ about internal kernel API's, and the people who do that then also
+ obviously have to fix up all the in-kernel users of that API. Nobody
+ can say "I now broke the API you used, and now _you_ need to fix it
+ up". Whoever broke something gets to fix it too.
+
+ And we simply do not break user space.
+
+ * From `2020-05-21
+ <https://lore.kernel.org/all/CAHk-=wiVi7mSrsMP=fLXQrXK_UimybW=ziLOwSzFTtoXUacWVQ@mail.gmail.com/>`_::
+
+ The rules about regressions have never been about any kind of
+ documented behavior, or where the code lives.
+
+ The rules about regressions are always about "breaks user workflow".
+
+ Users are literally the _only_ thing that matters.
+
+ No amount of "you shouldn't have used this" or "that behavior was
+ undefined, it's your own fault your app broke" or "that used to work
+ simply because of a kernel bug" is at all relevant.
+
+ Now, reality is never entirely black-and-white. So we've had things
+ like "serious security issue" etc that just forces us to make changes
+ that may break user space. But even then the rule is that we don't
+ really have other options that would allow things to continue.
+
+ And obviously, if users take years to even notice that something
+ broke, or if we have sane ways to work around the breakage that
+ doesn't make for too much trouble for users (ie "ok, there are a
+ handful of users, and they can use a kernel command line to work
+ around it" kind of things) we've also been a bit less strict.
+
+ But no, "that was documented to be broken" (whether it's because the
+ code was in staging or because the man-page said something else) is
+ irrelevant. If staging code is so useful that people end up using it,
+ that means that it's basically regular kernel code with a flag saying
+ "please clean this up".
+
+ The other side of the coin is that people who talk about "API
+ stability" are entirely wrong. API's don't matter either. You can make
+ any changes to an API you like - as long as nobody notices.
+
+ Again, the regression rule is not about documentation, not about
+ API's, and not about the phase of the moon.
+
+ It's entirely about "we caused problems for user space that used to work".
+
+ * From `2017-11-05
+ <https://lore.kernel.org/all/CA+55aFzUvbGjD8nQ-+3oiMBx14c_6zOj2n7KLN3UsJ-qsd4Dcw@mail.gmail.com/>`_::
+
+ And our regression rule has never been "behavior doesn't change".
+ That would mean that we could never make any changes at all.
+
+ For example, we do things like add new error handling etc all the
+ time, which we then sometimes even add tests for in our kselftest
+ directory.
+
+ So clearly behavior changes all the time and we don't consider that a
+ regression per se.
+
+ The rule for a regression for the kernel is that some real user
+ workflow breaks. Not some test. Not a "look, I used to be able to do
+ X, now I can't".
+
+ * From `2018-08-03
+ <https://lore.kernel.org/all/CA+55aFwWZX=CXmWDTkDGb36kf12XmTehmQjbiMPCqCRG2hi9kw@mail.gmail.com/>`_::
+
+ YOU ARE MISSING THE #1 KERNEL RULE.
+
+ We do not regress, and we do not regress exactly because your are 100% wrong.
+
+ And the reason you state for your opinion is in fact exactly *WHY* you
+ are wrong.
+
+ Your "good reasons" are pure and utter garbage.
+
+ The whole point of "we do not regress" is so that people can upgrade
+ the kernel and never have to worry about it.
+
+ > Kernel had a bug which has been fixed
+
+ That is *ENTIRELY* immaterial.
+
+ Guys, whether something was buggy or not DOES NOT MATTER.
+
+ Why?
+
+ Bugs happen. That's a fact of life. Arguing that "we had to break
+ something because we were fixing a bug" is completely insane. We fix
+ tens of bugs every single day, thinking that "fixing a bug" means that
+ we can break something is simply NOT TRUE.
+
+ So bugs simply aren't even relevant to the discussion. They happen,
+ they get found, they get fixed, and it has nothing to do with "we
+ break users".
+
+ Because the only thing that matters IS THE USER.
+
+ How hard is that to understand?
+
+ Anybody who uses "but it was buggy" as an argument is entirely missing
+ the point. As far as the USER was concerned, it wasn't buggy - it
+ worked for him/her.
+
+ Maybe it worked *because* the user had taken the bug into account,
+ maybe it worked because the user didn't notice - again, it doesn't
+ matter. It worked for the user.
+
+ Breaking a user workflow for a "bug" is absolutely the WORST reason
+ for breakage you can imagine.
+
+ It's basically saying "I took something that worked, and I broke it,
+ but now it's better". Do you not see how f*cking insane that statement
+ is?
+
+ And without users, your program is not a program, it's a pointless
+ piece of code that you might as well throw away.
+
+ Seriously. This is *why* the #1 rule for kernel development is "we
+ don't break users". Because "I fixed a bug" is absolutely NOT AN
+ ARGUMENT if that bug fix broke a user setup. You actually introduced a
+ MUCH BIGGER bug by "fixing" something that the user clearly didn't
+ even care about.
+
+ And dammit, we upgrade the kernel ALL THE TIME without upgrading any
+ other programs at all. It is absolutely required, because flag-days
+ and dependencies are horribly bad.
+
+ And it is also required simply because I as a kernel developer do not
+ upgrade random other tools that I don't even care about as I develop
+ the kernel, and I want any of my users to feel safe doing the same
+ time.
+
+ So no. Your rule is COMPLETELY wrong. If you cannot upgrade a kernel
+ without upgrading some other random binary, then we have a problem.
+
+ * From `2021-06-05
+ <https://lore.kernel.org/all/CAHk-=wiUVqHN76YUwhkjZzwTdjMMJf_zN4+u7vEJjmEGh3recw@mail.gmail.com/>`_::
+
+ THERE ARE NO VALID ARGUMENTS FOR REGRESSIONS.
+
+ Honestly, security people need to understand that "not working" is not
+ a success case of security. It's a failure case.
+
+ Yes, "not working" may be secure. But security in that case is *pointless*.
+
+ * From `2011-05-06 (1/3)
+ <https://lore.kernel.org/all/BANLkTim9YvResB+PwRp7QTK-a5VNg2PvmQ@mail.gmail.com/>`_::
+
+ Binary compatibility is more important.
+
+ And if binaries don't use the interface to parse the format (or just
+ parse it wrongly - see the fairly recent example of adding uuid's to
+ /proc/self/mountinfo), then it's a regression.
+
+ And regressions get reverted, unless there are security issues or
+ similar that makes us go "Oh Gods, we really have to break things".
+
+ I don't understand why this simple logic is so hard for some kernel
+ developers to understand. Reality matters. Your personal wishes matter
+ NOT AT ALL.
+
+ If you made an interface that can be used without parsing the
+ interface description, then we're stuck with the interface. Theory
+ simply doesn't matter.
+
+ You could help fix the tools, and try to avoid the compatibility
+ issues that way. There aren't that many of them.
+
+ From `2011-05-06 (2/3)
+ <https://lore.kernel.org/all/BANLkTi=KVXjKR82sqsz4gwjr+E0vtqCmvA@mail.gmail.com/>`_::
+
+ it's clearly NOT an internal tracepoint. By definition. It's being
+ used by powertop.
+
+ From `2011-05-06 (3/3)
+ <https://lore.kernel.org/all/BANLkTinazaXRdGovYL7rRVp+j6HbJ7pzhg@mail.gmail.com/>`_::
+
+ We have programs that use that ABI and thus it's a regression if they break.
+
+ * From `2012-07-06 <https://lore.kernel.org/all/CA+55aFwnLJ+0sjx92EGREGTWOx84wwKaraSzpTNJwPVV8edw8g@mail.gmail.com/>`_::
+
+ > Now this got me wondering if Debian _unstable_ actually qualifies as a
+ > standard distro userspace.
+
+ Oh, if the kernel breaks some standard user space, that counts. Tons
+ of people run Debian unstable
+
+ * From `2019-09-15
+ <https://lore.kernel.org/lkml/CAHk-=wiP4K8DRJWsCo=20hn_6054xBamGKF2kPgUzpB5aMaofA@mail.gmail.com/>`_::
+
+ One _particularly_ last-minute revert is the top-most commit (ignoring
+ the version change itself) done just before the release, and while
+ it's very annoying, it's perhaps also instructive.
+
+ What's instructive about it is that I reverted a commit that wasn't
+ actually buggy. In fact, it was doing exactly what it set out to do,
+ and did it very well. In fact it did it _so_ well that the much
+ improved IO patterns it caused then ended up revealing a user-visible
+ regression due to a real bug in a completely unrelated area.
+
+ The actual details of that regression are not the reason I point that
+ revert out as instructive, though. It's more that it's an instructive
+ example of what counts as a regression, and what the whole "no
+ regressions" kernel rule means. The reverted commit didn't change any
+ API's, and it didn't introduce any new bugs. But it ended up exposing
+ another problem, and as such caused a kernel upgrade to fail for a
+ user. So it got reverted.
+
+ The point here being that we revert based on user-reported _behavior_,
+ not based on some "it changes the ABI" or "it caused a bug" concept.
+ The problem was really pre-existing, and it just didn't happen to
+ trigger before. The better IO patterns introduced by the change just
+ happened to expose an old bug, and people had grown to depend on the
+ previously benign behavior of that old issue.
+
+ And never fear, we'll re-introduce the fix that improved on the IO
+ patterns once we've decided just how to handle the fact that we had a
+ bad interaction with an interface that people had then just happened
+ to rely on incidental behavior for before. It's just that we'll have
+ to hash through how to do that (there are no less than three different
+ patches by three different developers being discussed, and there might
+ be more coming...). In the meantime, I reverted the thing that exposed
+ the problem to users for this release, even if I hope it will be
+ re-introduced (perhaps even backported as a stable patch) once we have
+ consensus about the issue it exposed.
+
+ Take-away from the whole thing: it's not about whether you change the
+ kernel-userspace ABI, or fix a bug, or about whether the old code
+ "should never have worked in the first place". It's about whether
+ something breaks existing users' workflow.
+
+ Anyway, that was my little aside on the whole regression thing. Since
+ it's that "first rule of kernel programming", I felt it is perhaps
+ worth just bringing it up every once in a while
+
+..
+ end-of-content
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use "The Linux kernel developers" for author attribution and link
+ this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/handling-regressions.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 9f1b88492bb3..3587dae4d0ef 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -25,6 +25,7 @@ Below are the essential guides that every developer should read.
code-of-conduct-interpretation
development-process
submitting-patches
+ handling-regressions
programming-language
coding-style
maintainer-handbooks
@@ -48,6 +49,7 @@ Other guides to the community that are of interest to most developers are:
deprecated
embargoed-hardware-issues
maintainers
+ researcher-guidelines
These are some overall technical guides that have been put here for now for
lack of a better place.
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
new file mode 100644
index 000000000000..afc944e0e898
--- /dev/null
+++ b/Documentation/process/researcher-guidelines.rst
@@ -0,0 +1,143 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _researcher_guidelines:
+
+Researcher Guidelines
++++++++++++++++++++++
+
+The Linux kernel community welcomes transparent research on the Linux
+kernel, the activities involved in producing it, and any other byproducts
+of its development. Linux benefits greatly from this kind of research, and
+most aspects of Linux are driven by research in one form or another.
+
+The community greatly appreciates if researchers can share preliminary
+findings before making their results public, especially if such research
+involves security. Getting involved early helps both improve the quality
+of research and ability for Linux to improve from it. In any case,
+sharing open access copies of the published research with the community
+is recommended.
+
+This document seeks to clarify what the Linux kernel community considers
+acceptable and non-acceptable practices when conducting such research. At
+the very least, such research and related activities should follow
+standard research ethics rules. For more background on research ethics
+generally, ethics in technology, and research of developer communities
+in particular, see:
+
+* `History of Research Ethics <https://www.unlv.edu/research/ORI-HSR/history-ethics>`_
+* `IEEE Ethics <https://www.ieee.org/about/ethics/index.html>`_
+* `Developer and Researcher Views on the Ethics of Experiments on Open-Source Projects <https://arxiv.org/pdf/2112.13217.pdf>`_
+
+The Linux kernel community expects that everyone interacting with the
+project is participating in good faith to make Linux better. Research on
+any publicly-available artifact (including, but not limited to source
+code) produced by the Linux kernel community is welcome, though research
+on developers must be distinctly opt-in.
+
+Passive research that is based entirely on publicly available sources,
+including posts to public mailing lists and commits to public
+repositories, is clearly permissible. Though, as with any research,
+standard ethics must still be followed.
+
+Active research on developer behavior, however, must be done with the
+explicit agreement of, and full disclosure to, the individual developers
+involved. Developers cannot be interacted with/experimented on without
+consent; this, too, is standard research ethics.
+
+To help clarify: sending patches to developers *is* interacting
+with them, but they have already consented to receiving *good faith
+contributions*. Sending intentionally flawed/vulnerable patches or
+contributing misleading information to discussions is not consented
+to. Such communication can be damaging to the developer (e.g. draining
+time, effort, and morale) and damaging to the project by eroding
+the entire developer community's trust in the contributor (and the
+contributor's organization as a whole), undermining efforts to provide
+constructive feedback to contributors, and putting end users at risk of
+software flaws.
+
+Participation in the development of Linux itself by researchers, as
+with anyone, is welcomed and encouraged. Research into Linux code is
+a common practice, especially when it comes to developing or running
+analysis tools that produce actionable results.
+
+When engaging with the developer community, sending a patch has
+traditionally been the best way to make an impact. Linux already has
+plenty of known bugs -- what's much more helpful is having vetted fixes.
+Before contributing, carefully read the appropriate documentation:
+
+* Documentation/process/development-process.rst
+* Documentation/process/submitting-patches.rst
+* Documentation/admin-guide/reporting-issues.rst
+* Documentation/admin-guide/security-bugs.rst
+
+Then send a patch (including a commit log with all the details listed
+below) and follow up on any feedback from other developers.
+
+When sending patches produced from research, the commit logs should
+contain at least the following details, so that developers have
+appropriate context for understanding the contribution. Answer:
+
+* What is the specific problem that has been found?
+* How could the problem be reached on a running system?
+* What effect would encountering the problem have on the system?
+* How was the problem found? Specifically include details about any
+ testing, static or dynamic analysis programs, and any other tools or
+ methods used to perform the work.
+* Which version of Linux was the problem found on? Using the most recent
+ release or a recent linux-next branch is strongly preferred (see
+ Documentation/process/howto.rst).
+* What was changed to fix the problem, and why it is believed to be correct?
+* How was the change build tested and run-time tested?
+* What prior commit does this change fix? This should go in a "Fixes:"
+ tag as the documentation describes.
+* Who else has reviewed this patch? This should go in appropriate
+ "Reviewed-by:" tags; see below.
+
+For example::
+
+ From: Author <author@email>
+ Subject: [PATCH] drivers/foo_bar: Add missing kfree()
+
+ The error path in foo_bar driver does not correctly free the allocated
+ struct foo_bar_info. This can happen if the attached foo_bar device
+ rejects the initialization packets sent during foo_bar_probe(). This
+ would result in a 64 byte slab memory leak once per device attach,
+ wasting memory resources over time.
+
+ This flaw was found using an experimental static analysis tool we are
+ developing, LeakMagic[1], which reported the following warning when
+ analyzing the v5.15 kernel release:
+
+ path/to/foo_bar.c:187: missing kfree() call?
+
+ Add the missing kfree() to the error path. No other references to
+ this memory exist outside the probe function, so this is the only
+ place it can be freed.
+
+ x86_64 and arm64 defconfig builds with CONFIG_FOO_BAR=y using GCC
+ 11.2 show no new warnings, and LeakMagic no longer warns about this
+ code path. As we don't have a FooBar device to test with, no runtime
+ testing was able to be performed.
+
+ [1] https://url/to/leakmagic/details
+
+ Reported-by: Researcher <researcher@email>
+ Fixes: aaaabbbbccccdddd ("Introduce support for FooBar")
+ Signed-off-by: Author <author@email>
+ Reviewed-by: Reviewer <reviewer@email>
+
+If you are a first time contributor it is recommended that the patch
+itself be vetted by others privately before being posted to public lists.
+(This is required if you have been explicitly told your patches need
+more careful internal review.) These people are expected to have their
+"Reviewed-by" tag included in the resulting patch. Finding another
+developer familiar with Linux contribution, especially within your own
+organization, and having them help with reviews before sending them to
+the public mailing lists tends to significantly improve the quality of the
+resulting patches, and there by reduces the burden on other developers.
+
+If no one can be found to internally review patches and you need
+help finding such a person, or if you have any other questions
+related to this document and the developer community's expectations,
+please reach out to the private Technical Advisory Board mailing list:
+<tech-board@lists.linux-foundation.org>.
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 31ea120ce531..fb496b2ebfd3 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -495,7 +495,8 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
The Reported-by tag gives credit to people who find bugs and report them and it
hopefully inspires them to help us again in the future. Please note that if
the bug was reported in private, then ask for permission first before using the
-Reported-by tag.
+Reported-by tag. The tag is intended for bugs; please do not use it to credit
+feature requests.
A Tested-by: tag indicates that the patch has been successfully tested (in
some environment) by the person named. This tag informs maintainers that
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 88900aabdbf7..b430d856056a 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -14,9 +14,11 @@ Linux Scheduler
sched-domains
sched-capacity
sched-energy
+ schedutil
sched-nice-design
sched-rt-group
sched-stats
+ sched-debug
text_files
diff --git a/Documentation/scheduler/sched-debug.rst b/Documentation/scheduler/sched-debug.rst
new file mode 100644
index 000000000000..4d3d24f2a439
--- /dev/null
+++ b/Documentation/scheduler/sched-debug.rst
@@ -0,0 +1,54 @@
+=================
+Scheduler debugfs
+=================
+
+Booting a kernel with CONFIG_SCHED_DEBUG=y will give access to
+scheduler specific debug files under /sys/kernel/debug/sched. Some of
+those files are described below.
+
+numa_balancing
+==============
+
+`numa_balancing` directory is used to hold files to control NUMA
+balancing feature. If the system overhead from the feature is too
+high then the rate the kernel samples for NUMA hinting faults may be
+controlled by the `scan_period_min_ms, scan_delay_ms,
+scan_period_max_ms, scan_size_mb` files.
+
+
+scan_period_min_ms, scan_delay_ms, scan_period_max_ms, scan_size_mb
+-------------------------------------------------------------------
+
+Automatic NUMA balancing scans tasks address space and unmaps pages to
+detect if pages are properly placed or if the data should be migrated to a
+memory node local to where the task is running. Every "scan delay" the task
+scans the next "scan size" number of pages in its address space. When the
+end of the address space is reached the scanner restarts from the beginning.
+
+In combination, the "scan delay" and "scan size" determine the scan rate.
+When "scan delay" decreases, the scan rate increases. The scan delay and
+hence the scan rate of every task is adaptive and depends on historical
+behaviour. If pages are properly placed then the scan delay increases,
+otherwise the scan delay decreases. The "scan size" is not adaptive but
+the higher the "scan size", the higher the scan rate.
+
+Higher scan rates incur higher system overhead as page faults must be
+trapped and potentially data must be migrated. However, the higher the scan
+rate, the more quickly a tasks memory is migrated to a local node if the
+workload pattern changes and minimises performance impact due to remote
+memory accesses. These files control the thresholds for scan delays and
+the number of pages scanned.
+
+``scan_period_min_ms`` is the minimum time in milliseconds to scan a
+tasks virtual memory. It effectively controls the maximum scanning
+rate for each task.
+
+``scan_delay_ms`` is the starting "scan delay" used for a task when it
+initially forks.
+
+``scan_period_max_ms`` is the maximum time in milliseconds to scan a
+tasks virtual memory. It effectively controls the minimum scanning
+rate for each task.
+
+``scan_size_mb`` is how many megabytes worth of pages are scanned for
+a given scan.
diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index 84dcdcd2911c..e57ad28301bd 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -37,10 +37,10 @@ rebalancing event for the current runqueue has arrived. The actual load
balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
in softirq context (SCHED_SOFTIRQ).
-The latter function takes two arguments: the current CPU and whether it was idle
-at the time the scheduler_tick() happened and iterates over all sched domains
-our CPU is on, starting from its base domain and going up the ->parent chain.
-While doing that, it checks to see if the current domain has exhausted its
+The latter function takes two arguments: the runqueue of current CPU and whether
+the CPU was idle at the time the scheduler_tick() happened and iterates over all
+sched domains our CPU is on, starting from its base domain and going up the ->parent
+chain. While doing that, it checks to see if the current domain has exhausted its
rebalance interval. If so, it runs load_balance() on that domain. It then checks
the parent sched_domain (if it exists), and the parent of the parent and so
forth.
diff --git a/Documentation/scheduler/schedutil.txt b/Documentation/scheduler/schedutil.rst
index 78f6b91e2291..32c7d69fc86c 100644
--- a/Documentation/scheduler/schedutil.txt
+++ b/Documentation/scheduler/schedutil.rst
@@ -1,11 +1,15 @@
+=========
+Schedutil
+=========
+.. note::
-NOTE; all this assumes a linear relation between frequency and work capacity,
-we know this is flawed, but it is the best workable approximation.
+ All this assumes a linear relation between frequency and work capacity,
+ we know this is flawed, but it is the best workable approximation.
PELT (Per Entity Load Tracking)
--------------------------------
+===============================
With PELT we track some metrics across the various scheduler entities, from
individual tasks to task-group slices to CPU runqueues. As the basis for this
@@ -38,8 +42,8 @@ while 'runnable' will increase to reflect the amount of contention.
For more detail see: kernel/sched/pelt.c
-Frequency- / CPU Invariance
----------------------------
+Frequency / CPU Invariance
+==========================
Because consuming the CPU for 50% at 1GHz is not the same as consuming the CPU
for 50% at 2GHz, nor is running 50% on a LITTLE CPU the same as running 50% on
@@ -47,7 +51,7 @@ a big CPU, we allow architectures to scale the time delta with two ratios, one
Dynamic Voltage and Frequency Scaling (DVFS) ratio and one microarch ratio.
For simple DVFS architectures (where software is in full control) we trivially
-compute the ratio as:
+compute the ratio as::
f_cur
r_dvfs := -----
@@ -55,7 +59,7 @@ compute the ratio as:
For more dynamic systems where the hardware is in control of DVFS we use
hardware counters (Intel APERF/MPERF, ARMv8.4-AMU) to provide us this ratio.
-For Intel specifically, we use:
+For Intel specifically, we use::
APERF
f_cur := ----- * P0
@@ -87,7 +91,7 @@ For more detail see:
UTIL_EST / UTIL_EST_FASTUP
---------------------------
+==========================
Because periodic tasks have their averages decayed while they sleep, even
though when running their expected utilization will be the same, they suffer a
@@ -106,7 +110,7 @@ For more detail see: kernel/sched/fair.c:util_est_dequeue()
UCLAMP
-------
+======
It is possible to set effective u_min and u_max clamps on each CFS or RT task;
the runqueue keeps an max aggregate of these clamps for all running tasks.
@@ -115,7 +119,7 @@ For more detail see: include/uapi/linux/sched/types.h
Schedutil / DVFS
-----------------
+================
Every time the scheduler load tracking is updated (task wakeup, task
migration, time progression) we call out to schedutil to update the hardware
@@ -123,7 +127,7 @@ DVFS state.
The basis is the CPU runqueue's 'running' metric, which per the above it is
the frequency invariant utilization estimate of the CPU. From this we compute
-a desired frequency like:
+a desired frequency like::
max( running, util_est ); if UTIL_EST
u_cfs := { running; otherwise
@@ -135,7 +139,7 @@ a desired frequency like:
f_des := min( f_max, 1.25 u * f_max )
-XXX IO-wait; when the update is due to a task wakeup from IO-completion we
+XXX IO-wait: when the update is due to a task wakeup from IO-completion we
boost 'u' above.
This frequency is then used to select a P-state/OPP or directly munged into a
@@ -153,7 +157,7 @@ For more information see: kernel/sched/cpufreq_schedutil.c
NOTES
------
+=====
- On low-load scenarios, where DVFS is most relevant, the 'running' numbers
will closely reflect utilization.
diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index d5fd6ccc3dcb..b73eb764a001 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented::
security_sctp_assoc_request()
security_sctp_bind_connect()
security_sctp_sk_clone()
-
-Also the following security hook has been utilised::
-
- security_inet_conn_established()
+ security_sctp_assoc_established()
The usage of these hooks are described below with the SELinux implementation
described in the `SCTP SELinux Support`_ chapter.
@@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3).
@newsk - pointer to new sock structure.
-security_inet_conn_established()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received::
+security_sctp_assoc_established()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Called when a COOKIE ACK is received, and the peer secid will be
+saved into ``@asoc->peer_secid`` for client::
- @sk - pointer to sock structure.
+ @asoc - pointer to sctp association structure.
@skb - pointer to skbuff of the COOKIE ACK packet.
@@ -134,7 +132,7 @@ Security Hooks used for Association Establishment
-------------------------------------------------
The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
+``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
establishing an association.
::
@@ -172,7 +170,7 @@ establishing an association.
<------------------------------------------- COOKIE ACK
| |
sctp_sf_do_5_1E_ca |
- Call security_inet_conn_established() |
+ Call security_sctp_assoc_established() |
to set the peer label. |
| |
| If SCTP_SOCKET_TCP or peeled off
@@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below::
security_sctp_assoc_request()
security_sctp_bind_connect()
security_sctp_sk_clone()
- security_inet_conn_established()
+ security_sctp_assoc_established()
security_sctp_assoc_request()
@@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
@newsk - pointer to new sock structure.
-security_inet_conn_established()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+security_sctp_assoc_established()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Called when a COOKIE ACK is received where it sets the connection's peer sid
to that in ``@skb``::
- @sk - pointer to sock structure.
+ @asoc - pointer to sctp association structure.
@skb - pointer to skbuff of the COOKIE ACK packet.
diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst
index 80d5a5af62a1..f614dad7de12 100644
--- a/Documentation/security/keys/trusted-encrypted.rst
+++ b/Documentation/security/keys/trusted-encrypted.rst
@@ -107,12 +107,13 @@ Encrypted Keys
--------------
Encrypted keys do not depend on a trust source, and are faster, as they use AES
-for encryption/decryption. New keys are created from kernel-generated random
-numbers, and are encrypted/decrypted using a specified ‘master’ key. The
-‘master’ key can either be a trusted-key or user-key type. The main disadvantage
-of encrypted keys is that if they are not rooted in a trusted key, they are only
-as secure as the user key encrypting them. The master user key should therefore
-be loaded in as secure a way as possible, preferably early in boot.
+for encryption/decryption. New keys are created either from kernel-generated
+random numbers or user-provided decrypted data, and are encrypted/decrypted
+using a specified ‘master’ key. The ‘master’ key can either be a trusted-key or
+user-key type. The main disadvantage of encrypted keys is that if they are not
+rooted in a trusted key, they are only as secure as the user key encrypting
+them. The master user key should therefore be loaded in as secure a way as
+possible, preferably early in boot.
Usage
@@ -199,6 +200,8 @@ Usage::
keyctl add encrypted name "new [format] key-type:master-key-name keylen"
ring
+ keyctl add encrypted name "new [format] key-type:master-key-name keylen
+ decrypted-data" ring
keyctl add encrypted name "load hex_blob" ring
keyctl update keyid "update key-type:master-key-name"
@@ -303,6 +306,16 @@ Load an encrypted key "evm" from saved blob::
82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
+Instantiate an encrypted key "evm" using user-provided decrypted data::
+
+ $ keyctl add encrypted evm "new default user:kmk 32 `cat evm_decrypted_data.blob`" @u
+ 794890253
+
+ $ keyctl print 794890253
+ default user:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382d
+ bbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0247
+ 17c64 5972dcb82ab2dde83376d82b2e3c09ffc
+
Other uses for trusted and encrypted keys, such as for disk and file encryption
are anticipated. In particular the new format 'ecryptfs' has been defined
in order to use encrypted keys to mount an eCryptfs filesystem. More details
diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
new file mode 100644
index 000000000000..9d0204dc38be
--- /dev/null
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -0,0 +1,226 @@
+% -*- coding: utf-8 -*-
+% SPDX-License-Identifier: GPL-2.0
+%
+% LaTeX preamble for "make latexdocs" or "make pdfdocs" including:
+% - TOC width settings
+% - Setting of tabulary (\tymin)
+% - Headheight setting for fancyhdr
+% - Fontfamily settings for CJK (Chinese, Japanese, and Korean) translations
+%
+% Note on the suffix of .sty:
+% This is not implemented as a LaTeX style file, but as a file containing
+% plain LaTeX code to be included into preamble.
+% ".sty" is chosen because ".tex" would cause the build scripts to confuse
+% this file with a LaTeX main file.
+%
+% Copyright (C) 2022 Akira Yokosawa
+
+% Custom width parameters for TOC
+% - Redefine low-level commands defined in report.cls.
+% - Indent of 2 chars is preserved for ease of comparison.
+% Summary of changes from default params:
+% Width of page number (\@pnumwidth): 1.55em -> 2.7em
+% Width of chapter number: 1.5em -> 1.8em
+% Indent of section number: 1.5em -> 1.8em
+% Width of section number: 2.6em -> 3.2em
+% Indent of sebsection number: 4.1em -> 5em
+% Width of subsection number: 3.5em -> 4.3em
+%
+% These params can have 4 digit page counts, 2 digit chapter counts,
+% section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts
+% of 5 digits + 2 periods (e.g., 18.7.13).
+\makeatletter
+%% Redefine \@pnumwidth (page number width)
+\renewcommand*\@pnumwidth{2.7em}
+%% Redefine \l@chapter (chapter list entry)
+\renewcommand*\l@chapter[2]{%
+ \ifnum \c@tocdepth >\m@ne
+ \addpenalty{-\@highpenalty}%
+ \vskip 1.0em \@plus\p@
+ \setlength\@tempdima{1.8em}%
+ \begingroup
+ \parindent \z@ \rightskip \@pnumwidth
+ \parfillskip -\@pnumwidth
+ \leavevmode \bfseries
+ \advance\leftskip\@tempdima
+ \hskip -\leftskip
+ #1\nobreak\hfil
+ \nobreak\hb@xt@\@pnumwidth{\hss #2%
+ \kern-\p@\kern\p@}\par
+ \penalty\@highpenalty
+ \endgroup
+ \fi}
+%% Redefine \l@section and \l@subsection
+\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}}
+\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}}
+\makeatother
+%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook
+\providecommand{\sphinxtableofcontentshook}{}
+%% Undefine it for compatibility with Sphinx 1.7.9
+\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook
+
+% Prevent column squeezing of tabulary. \tymin is set by Sphinx as:
+% \setlength{\tymin}{3\fontcharwd\font`0 }
+% , which is too short.
+\setlength{\tymin}{20em}
+
+% Adjust \headheight for fancyhdr
+\addtolength{\headheight}{1.6pt}
+\addtolength{\topmargin}{-1.6pt}
+
+% Translations have Asian (CJK) characters which are only displayed if
+% xeCJK is used
+\IfFontExistsTF{Noto Sans CJK SC}{
+ % Load xeCJK when CJK font is available
+ \usepackage{xeCJK}
+ % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits
+ % its emulation.
+ % Select KR variant at the beginning of each document so that quotation
+ % and apostorph symbols of half-width is used in TOC of Latin documents.
+ \IfFontExistsTF{Noto Serif CJK KR}{
+ \setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
+ }{
+ \setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant]
+ }
+ \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
+ \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
+ % Teach xeCJK of half-width symbols
+ \xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
+ \xeCJKDeclareCharClass{HalfRight}{`â€,`’}
+ % CJK Language-specific font choices
+ %% for Simplified Chinese
+ \IfFontExistsTF{Noto Serif CJK SC}{
+ \newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant]
+ \newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant]
+ }{
+ \newCJKfontfamily[SCmain]\scmain{Noto Sans CJK SC}[AutoFakeSlant]
+ \newCJKfontfamily[SCserif]\scserif{Noto Sans CJK SC}[AutoFakeSlant]
+ }
+ \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant]
+ \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
+ %% for Traditional Chinese
+ \IfFontExistsTF{Noto Serif CJK TC}{
+ \newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
+ \newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
+ }{
+ \newCJKfontfamily[TCmain]\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
+ \newCJKfontfamily[TCserif]\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
+ }
+ \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
+ \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
+ %% for Korean
+ \IfFontExistsTF{Noto Serif CJK KR}{
+ \newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant]
+ \newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant]
+ }{
+ \newCJKfontfamily[KRmain]\krmain{Noto Sans CJK KR}[AutoFakeSlant]
+ \newCJKfontfamily[KRserif]\krserif{Noto Sans CJK KR}[AutoFakeSlant]
+ }
+ \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant]
+ \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
+ %% for Japanese
+ \IfFontExistsTF{Noto Serif CJK JP}{
+ \newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
+ \newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
+ }{
+ \newCJKfontfamily[JPmain]\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
+ \newCJKfontfamily[JPserif]\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
+ }
+ \newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
+ \newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
+ % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
+ \providecommand{\onehalfspacing}{}
+ \providecommand{\singlespacing}{}
+ % Define custom macros to on/off CJK
+ %% One and half spacing for CJK contents
+ \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing}
+ \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing}
+ % Define custom macros for switching CJK font setting
+ %% for Simplified Chinese
+ \newcommand{\kerneldocBeginSC}{%
+ \begingroup%
+ \scmain%
+ \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in SC
+ \xeCJKDeclareCharClass{FullRight}{`â€,`’}% Full-width in SC
+ \renewcommand{\CJKrmdefault}{SCserif}%
+ \renewcommand{\CJKsfdefault}{SCsans}%
+ \renewcommand{\CJKttdefault}{SCmono}%
+ \xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
+ % For CJK ascii-art alignment
+ \setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
+ }
+ \newcommand{\kerneldocEndSC}{\endgroup}
+ %% for Traditional Chinese
+ \newcommand{\kerneldocBeginTC}{%
+ \begingroup%
+ \tcmain%
+ \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in TC
+ \xeCJKDeclareCharClass{FullRight}{`â€,`’}% Full-width in TC
+ \renewcommand{\CJKrmdefault}{TCserif}%
+ \renewcommand{\CJKsfdefault}{TCsans}%
+ \renewcommand{\CJKttdefault}{TCmono}%
+ \xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
+ % For CJK ascii-art alignment
+ \setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
+ }
+ \newcommand{\kerneldocEndTC}{\endgroup}
+ %% for Korean
+ \newcommand{\kerneldocBeginKR}{%
+ \begingroup%
+ \krmain%
+ \renewcommand{\CJKrmdefault}{KRserif}%
+ \renewcommand{\CJKsfdefault}{KRsans}%
+ \renewcommand{\CJKttdefault}{KRmono}%
+ % \xeCJKsetup{CJKspace = true} % true by default
+ % For CJK ascii-art alignment (still misaligned for Hangul)
+ \setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
+ }
+ \newcommand{\kerneldocEndKR}{\endgroup}
+ %% for Japanese
+ \newcommand{\kerneldocBeginJP}{%
+ \begingroup%
+ \jpmain%
+ \renewcommand{\CJKrmdefault}{JPserif}%
+ \renewcommand{\CJKsfdefault}{JPsans}%
+ \renewcommand{\CJKttdefault}{JPmono}%
+ \xeCJKsetup{CJKspace = false}% gobble white space by ' '
+ % For CJK ascii-art alignment
+ \setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
+ }
+ \newcommand{\kerneldocEndJP}{\endgroup}
+
+ % Single spacing in literal blocks
+ \fvset{baselinestretch=1}
+ % To customize \sphinxtableofcontents
+ \usepackage{etoolbox}
+ % Inactivate CJK after tableofcontents
+ \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{}
+ \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC
+}{ % No CJK font found
+ % Custom macros to on/off CJK and switch CJK fonts (Dummy)
+ \newcommand{\kerneldocCJKon}{}
+ \newcommand{\kerneldocCJKoff}{}
+ %% By defining \kerneldocBegin(SC|TC|KR|JP) as commands with an argument
+ %% and ignore the argument (#1) in their definitions, whole contents of
+ %% CJK chapters can be ignored.
+ \newcommand{\kerneldocBeginSC}[1]{%
+ %% Put a note on missing CJK fonts in place of zh_CN translation.
+ \begin{sphinxadmonition}{note}{Note on missing fonts:}
+ Translations of Simplified Chinese (zh\_CN), Traditional Chinese
+ (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped
+ due to the lack of suitable font families.
+
+ If you want them, please install ``Noto Sans CJK'' font families
+ by following instructions from
+ \sphinxcode{./scripts/sphinx-pre-install}.
+ Having optional ``Noto Serif CJK'' font families will improve
+ the looks of those translations.
+ \end{sphinxadmonition}}
+ \newcommand{\kerneldocEndSC}{}
+ \newcommand{\kerneldocBeginTC}[1]{}
+ \newcommand{\kerneldocEndTC}{}
+ \newcommand{\kerneldocBeginKR}[1]{}
+ \newcommand{\kerneldocEndKR}{}
+ \newcommand{\kerneldocBeginJP}[1]{}
+ \newcommand{\kerneldocEndJP}{}
+}
diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index 3c78828330be..24d2b2addcce 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -31,10 +31,13 @@ u"""
* ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
available, the DOT language is inserted as literal-block.
+ For conversion to PDF, ``rsvg-convert(1)`` of librsvg
+ (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
* SVG to PDF: To generate PDF, you need at least one of this tools:
- ``convert(1)``: ImageMagick (https://www.imagemagick.org)
+ - ``inkscape(1)``: Inkscape (https://inkscape.org/)
List of customizations:
@@ -49,6 +52,7 @@ import os
from os import path
import subprocess
from hashlib import sha1
+import re
from docutils import nodes
from docutils.statemachine import ViewList
from docutils.parsers.rst import directives
@@ -109,10 +113,20 @@ def pass_handle(self, node): # pylint: disable=W0613
# Graphviz's dot(1) support
dot_cmd = None
+# dot(1) -Tpdf should be used
+dot_Tpdf = False
# ImageMagick' convert(1) support
convert_cmd = None
+# librsvg's rsvg-convert(1) support
+rsvg_convert_cmd = None
+
+# Inkscape's inkscape(1) support
+inkscape_cmd = None
+# Inkscape prior to 1.0 uses different command options
+inkscape_ver_one = False
+
def setup(app):
# check toolchain first
@@ -160,23 +174,62 @@ def setupTools(app):
This function is called once, when the builder is initiated.
"""
- global dot_cmd, convert_cmd # pylint: disable=W0603
+ global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603
+ global inkscape_cmd, inkscape_ver_one # pylint: disable=W0603
kernellog.verbose(app, "kfigure: check installed tools ...")
dot_cmd = which('dot')
convert_cmd = which('convert')
+ rsvg_convert_cmd = which('rsvg-convert')
+ inkscape_cmd = which('inkscape')
if dot_cmd:
kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
+
+ try:
+ dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
+ stderr=subprocess.STDOUT)
+ except subprocess.CalledProcessError as err:
+ dot_Thelp_list = err.output
+ pass
+
+ dot_Tpdf_ptn = b'pdf'
+ dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
else:
kernellog.warn(app, "dot(1) not found, for better output quality install "
"graphviz from https://www.graphviz.org")
- if convert_cmd:
- kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
+ if inkscape_cmd:
+ kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
+ inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
+ stderr=subprocess.DEVNULL)
+ ver_one_ptn = b'Inkscape 1'
+ inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
+ convert_cmd = None
+ rsvg_convert_cmd = None
+ dot_Tpdf = False
+
else:
- kernellog.warn(app,
- "convert(1) not found, for SVG to PDF conversion install "
- "ImageMagick (https://www.imagemagick.org)")
+ if convert_cmd:
+ kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
+ else:
+ kernellog.warn(app,
+ "Neither inkscape(1) nor convert(1) found.\n"
+ "For SVG to PDF conversion, "
+ "install either Inkscape (https://inkscape.org/) (preferred) or\n"
+ "ImageMagick (https://www.imagemagick.org)")
+
+ if rsvg_convert_cmd:
+ kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
+ kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
+ dot_Tpdf = False
+ else:
+ kernellog.verbose(app,
+ "rsvg-convert(1) not found.\n"
+ " SVG rendering of convert(1) is done by ImageMagick-native renderer.")
+ if dot_Tpdf:
+ kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
+ else:
+ kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
# integrate conversion tools
@@ -242,7 +295,7 @@ def convert_image(img_node, translator, src_fname=None):
elif in_ext == '.svg':
if translator.builder.format == 'latex':
- if convert_cmd is None:
+ if not inkscape_cmd and convert_cmd is None:
kernellog.verbose(app,
"no SVG to PDF conversion available / include SVG raw.")
img_node.replace_self(file2literal(src_fname))
@@ -266,7 +319,14 @@ def convert_image(img_node, translator, src_fname=None):
if in_ext == '.dot':
kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
- ok = dot2format(app, src_fname, dst_fname)
+ if translator.builder.format == 'latex' and not dot_Tpdf:
+ svg_fname = path.join(translator.builder.outdir, fname + '.svg')
+ ok1 = dot2format(app, src_fname, svg_fname)
+ ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
+ ok = ok1 and ok2
+
+ else:
+ ok = dot2format(app, src_fname, dst_fname)
elif in_ext == '.svg':
kernellog.verbose(app, 'convert SVG to: {out}/' + _name)
@@ -303,22 +363,70 @@ def dot2format(app, dot_fname, out_fname):
return bool(exit_code == 0)
def svg2pdf(app, svg_fname, pdf_fname):
- """Converts SVG to PDF with ``convert(1)`` command.
+ """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
- Uses ``convert(1)`` from ImageMagick (https://www.imagemagick.org) for
- conversion. Returns ``True`` on success and ``False`` if an error occurred.
+ Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
+ from ImageMagick (https://www.imagemagick.org) for conversion.
+ Returns ``True`` on success and ``False`` if an error occurred.
* ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
* ``pdf_name`` pathname of the output PDF file with extension (``.pdf``)
"""
cmd = [convert_cmd, svg_fname, pdf_fname]
- # use stdout and stderr from parent
- exit_code = subprocess.call(cmd)
+ cmd_name = 'convert(1)'
+
+ if inkscape_cmd:
+ cmd_name = 'inkscape(1)'
+ if inkscape_ver_one:
+ cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
+ else:
+ cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
+
+ try:
+ warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+ exit_code = 0
+ except subprocess.CalledProcessError as err:
+ warning_msg = err.output
+ exit_code = err.returncode
+ pass
+
if exit_code != 0:
kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+ if warning_msg:
+ kernellog.warn(app, "Warning msg from %s: %s"
+ % (cmd_name, str(warning_msg, 'utf-8')))
+ elif warning_msg:
+ kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s"
+ % (cmd_name, str(warning_msg, 'utf-8')))
+
return bool(exit_code == 0)
+def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
+ """Convert SVG to PDF with ``rsvg-convert(1)`` command.
+
+ * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
+ * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
+
+ Input SVG file should be the one generated by ``dot2format()``.
+ SVG -> PDF conversion is done by ``rsvg-convert(1)``.
+
+ If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
+
+ """
+
+ if rsvg_convert_cmd is None:
+ ok = svg2pdf(app, svg_fname, pdf_fname)
+ else:
+ cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
+ # use stdout and stderr from parent
+ exit_code = subprocess.call(cmd)
+ if exit_code != 0:
+ kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+ ok = bool(exit_code == 0)
+
+ return ok
+
# image handling
# ---------------------
diff --git a/Documentation/spi/pxa2xx.rst b/Documentation/spi/pxa2xx.rst
index 6347580826be..716f65d87d04 100644
--- a/Documentation/spi/pxa2xx.rst
+++ b/Documentation/spi/pxa2xx.rst
@@ -101,7 +101,6 @@ device. All fields are optional.
u8 rx_threshold;
u8 dma_burst_size;
u32 timeout;
- int gpio_cs;
};
The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
@@ -146,7 +145,6 @@ field. Below is a sample configuration using the PXA255 NSSP.
.rx_threshold = 8, /* SSP hardward FIFO threshold */
.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
.timeout = 235, /* See Intel documentation */
- .gpio_cs = 2, /* Use external chip select */
};
static struct pxa2xx_spi_chip cs8405a_chip_info = {
@@ -154,7 +152,6 @@ field. Below is a sample configuration using the PXA255 NSSP.
.rx_threshold = 8, /* SSP hardward FIFO threshold */
.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
.timeout = 235, /* See Intel documentation */
- .gpio_cs = 3, /* Use external chip select */
};
static struct spi_board_info streetracer_spi_board_info[] __initdata = {
diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst
new file mode 100644
index 000000000000..0bb1e61bdcc0
--- /dev/null
+++ b/Documentation/tools/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Kernel tools
+============
+
+This book covers user-space tools that are shipped with the kernel source;
+more additions are needed here:
+
+.. toctree::
+ :maxdepth: 1
+
+ rtla/index
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/tools/rtla/common_hist_options.rst b/Documentation/tools/rtla/common_hist_options.rst
index 0266cd08a6c9..df53ff835bfb 100644
--- a/Documentation/tools/rtla/common_hist_options.rst
+++ b/Documentation/tools/rtla/common_hist_options.rst
@@ -2,7 +2,7 @@
Set the histogram bucket size (default *1*).
-**-e**, **--entries** *N*
+**-E**, **--entries** *N*
Set the number of entries of the histogram (default 256).
diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst
index 721790ad984e..af76df6205d4 100644
--- a/Documentation/tools/rtla/common_options.rst
+++ b/Documentation/tools/rtla/common_options.rst
@@ -14,6 +14,25 @@
Save the stopped trace to [*file|osnoise_trace.txt*].
+**-e**, **--event** *sys:event*
+
+ Enable an event in the trace (**-t**) session. The argument can be a specific event, e.g., **-e** *sched:sched_switch*, or all events of a system group, e.g., **-e** *sched*. Multiple **-e** are allowed. It is only active when **-t** or **-a** are set.
+
+**--filter** *<filter>*
+
+ Filter the previous **-e** *sys:event* event with *<filter>*. For further information about event filtering see https://www.kernel.org/doc/html/latest/trace/events.html#event-filtering.
+
+**--trigger** *<trigger>*
+ Enable a trace event trigger to the previous **-e** *sys:event*.
+ If the *hist:* trigger is activated, the output histogram will be automatically saved to a file named *system_event_hist.txt*.
+ For example, the command:
+
+ rtla <command> <mode> -t -e osnoise:irq_noise --trigger="hist:key=desc,duration/1000:sort=desc,duration/1000:vals=hitcount"
+
+ Will automatically save the content of the histogram associated to *osnoise:irq_noise* event in *osnoise_irq_noise_hist.txt*.
+
+ For further information about event trigger see https://www.kernel.org/doc/html/latest/trace/events.html#event-triggers.
+
**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period*
Set scheduling parameters to the osnoise tracer threads, the format to set the priority are:
diff --git a/Documentation/tools/rtla/common_osnoise_description.rst b/Documentation/tools/rtla/common_osnoise_description.rst
index 8973c5df888f..d5d61615b967 100644
--- a/Documentation/tools/rtla/common_osnoise_description.rst
+++ b/Documentation/tools/rtla/common_osnoise_description.rst
@@ -1,7 +1,7 @@
The **rtla osnoise** tool is an interface for the *osnoise* tracer. The
*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the
time in a loop while with preemption, softirq and IRQs enabled, thus
-allowing all the sources of operating systme noise during its execution.
+allowing all the sources of operating system noise during its execution.
The *osnoise*'s tracer threads take note of the delta between each time
read, along with an interference counter of all sources of interference.
At the end of each period, the *osnoise* tracer displays a summary of
diff --git a/Documentation/tools/rtla/common_osnoise_options.rst b/Documentation/tools/rtla/common_osnoise_options.rst
index d556883e4e26..f792ca58c211 100644
--- a/Documentation/tools/rtla/common_osnoise_options.rst
+++ b/Documentation/tools/rtla/common_osnoise_options.rst
@@ -1,3 +1,8 @@
+**-a**, **--auto** *us*
+
+ Set the automatic trace mode. This mode sets some commonly used options
+ while debugging the system. It is equivalent to use **-s** *us* **-T 1 -t**.
+
**-p**, **--period** *us*
Set the *osnoise* tracer period in microseconds.
@@ -15,3 +20,8 @@
Stop the trace if the total sample is higher than the argument in microseconds.
If **-T** is set, it will also save the trace to the output.
+
+**-T**, **--threshold** *us*
+
+ Specify the minimum delta between two time reads to be considered noise.
+ The default threshold is *5 us*.
diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst
index e9c1bfd55d48..bacdea6de7a3 100644
--- a/Documentation/tools/rtla/common_timerlat_options.rst
+++ b/Documentation/tools/rtla/common_timerlat_options.rst
@@ -1,3 +1,10 @@
+**-a**, **--auto** *us*
+
+ Set the automatic trace mode. This mode sets some commonly used options
+ while debugging the system. It is equivalent to use **-T** *us* **-s** *us*
+ **-t**. By default, *timerlat* tracer uses FIFO:95 for *timerlat* threads,
+ thus equilavent to **-P** *f:95*.
+
**-p**, **--period** *us*
Set the *timerlat* tracer period in microseconds.
@@ -14,3 +21,8 @@
Save the stack trace at the *IRQ* if a *Thread* latency is higher than the
argument in us.
+
+**--dma-latency** *us*
+ Set the /dev/cpu_dma_latency to *us*, aiming to bound exit from idle latencies.
+ *cyclictest* sets this value to *0* by default, use **--dma-latency** *0* to have
+ similar results.
diff --git a/Documentation/tools/rtla/index.rst b/Documentation/tools/rtla/index.rst
new file mode 100644
index 000000000000..840f0bf3e803
--- /dev/null
+++ b/Documentation/tools/rtla/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+The realtime Linux analysis tool
+================================
+
+RTLA provides a set of tools for the analysis of the kernel's realtime
+behavior on specific hardware.
+
+.. toctree::
+ :maxdepth: 1
+
+ rtla
+ rtla-osnoise
+ rtla-osnoise-hist
+ rtla-osnoise-top
+ rtla-timerlat
+ rtla-timerlat-hist
+ rtla-timerlat-top
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/tools/rtla/rtla-osnoise-hist.rst b/Documentation/tools/rtla/rtla-osnoise-hist.rst
index 52298ddd8701..f2e79d22c4c4 100644
--- a/Documentation/tools/rtla/rtla-osnoise-hist.rst
+++ b/Documentation/tools/rtla/rtla-osnoise-hist.rst
@@ -36,7 +36,7 @@ default). The reason for reducing the runtime is to avoid starving the
**rtla** tool. The tool is also set to run for *one minute*. The output
histogram is set to group outputs in buckets of *10us* and *25* entries::
- [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25
+ [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -E 25
# RTLA osnoise histogram
# Time unit is microseconds (us)
# Duration: 0 00:01:00
diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index b648cb9bf1f0..963def9f97c6 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -51,7 +51,7 @@ For example::
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo osnoise > current_tracer
-It is possible to follow the trace by reading the trace trace file::
+It is possible to follow the trace by reading the trace file::
[root@f32 tracing]# cat trace
# tracer: osnoise
@@ -108,7 +108,7 @@ The tracer has a set of options inside the osnoise directory, they are:
option.
- tracing_threshold: the minimum delta between two time() reads to be
considered as noise, in us. When set to 0, the default value will
- will be used, which is currently 5 us.
+ be used, which is currently 5 us.
Additional Tracing
------------------
diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py
deleted file mode 100644
index 92cdbba74229..000000000000
--- a/Documentation/translations/conf.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# SPDX-License-Identifier: GPL-2.0
-
-# -- Additinal options for LaTeX output ----------------------------------
-# font config for ascii-art alignment
-
-latex_elements['preamble'] += '''
- \\IfFontExistsTF{Noto Sans CJK SC}{
- % For CJK ascii-art alignment
- \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
- }{}
-'''
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 88d4d98eed15..20738c931d02 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -3,7 +3,7 @@
\renewcommand\thesection*
\renewcommand\thesubsection*
\kerneldocCJKon
- \kerneldocBeginJP
+ \kerneldocBeginJP{
Japanese translations
=====================
@@ -15,4 +15,4 @@ Japanese translations
.. raw:: latex
- \kerneldocEndJP
+ }\kerneldocEndJP
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index f636b482fb4c..4add6b2fe1f2 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -3,7 +3,7 @@
\renewcommand\thesection*
\renewcommand\thesubsection*
\kerneldocCJKon
- \kerneldocBeginKR
+ \kerneldocBeginKR{
한국어 번역
===========
@@ -26,5 +26,4 @@
.. raw:: latex
- \normalsize
- \kerneldocEndKR
+ }\kerneldocEndKR
diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
index 67d5606e5401..f1849411018e 100644
--- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst
+++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
@@ -17,6 +17,8 @@ a) 等待一个CPU(任务为å¯è¿è¡Œï¼‰
b) 完æˆç”±è¯¥ä»»åŠ¡å‘èµ·çš„å—I/OåŒæ­¥è¯·æ±‚
c) 页é¢äº¤æ¢
d) 内存回收
+e) 页缓存抖动
+f) 直接规整
并将这些统计信æ¯é€šè¿‡taskstats接å£æ供给用户空间。
@@ -37,10 +39,10 @@ d) 内存回收
å‘用户æ€è¿”回一个通用数æ®ç»“构,对应æ¯pid或æ¯tgid的统计信æ¯ã€‚延时计数功能填写
该数æ®ç»“构的特定字段。è§
- include/linux/taskstats.h
+ include/uapi/linux/taskstats.h
å…¶æ述了延时计数相关字段。系统通常以计数器形å¼è¿”回 CPUã€åŒæ­¥å— I/Oã€äº¤æ¢ã€å†…å­˜
-回收等的累积延时。
+回收ã€é¡µç¼“存抖动ã€ç›´æŽ¥è§„整等的累积延时。
å–任务æŸè®¡æ•°å™¨ä¸¤ä¸ªè¿žç»­è¯»æ•°çš„差值,将得到任务在该时间间隔内等待对应资æºçš„总延时。
@@ -72,40 +74,36 @@ kernel.task_delayacct进行开关。注æ„,åªæœ‰åœ¨å¯ç”¨å»¶æ—¶è®¡æ•°åŽå¯åŠ
getdelays命令的一般格å¼::
- getdelays [-t tgid] [-p pid] [-c cmd...]
+ getdelays [-dilv] [-t tgid] [-p pid]
获å–pid为10的任务从系统å¯åŠ¨åŽçš„延时信æ¯::
- # ./getdelays -p 10
+ # ./getdelays -d -p 10
(输出信æ¯å’Œä¸‹ä¾‹ç›¸ä¼¼ï¼‰
获å–所有tgid为5的任务从系统å¯åŠ¨åŽçš„总延时信æ¯::
- # ./getdelays -t 5
-
-
- CPU count real total virtual total delay total
- 7876 92005750 100000000 24001500
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
-
-获å–指定简å•å‘½ä»¤è¿è¡Œæ—¶çš„延时信æ¯::
-
- # ./getdelays -c ls /
-
- bin data1 data3 data5 dev home media opt root srv sys usr
- boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
-
-
- CPU count real total virtual total delay total
- 6 4000250 4000000 0
- IO count delay total
- 0 0
- SWAP count delay total
- 0 0
- RECLAIM count delay total
- 0 0
+ # ./getdelays -d -t 5
+ print delayacct stats ON
+ TGID 5
+
+
+ CPU count real total virtual total delay total delay average
+ 8 7000000 6872122 3382277 0.423ms
+ IO count delay total delay average
+ 0 0 0ms
+ SWAP count delay total delay average
+ 0 0 0ms
+ RECLAIM count delay total delay average
+ 0 0 0ms
+ THRASHING count delay total delay average
+ 0 0 0ms
+ COMPACT count delay total delay average
+ 0 0 0ms
+
+获å–pid为1çš„IO计数,它åªå’Œ-p一起使用::
+ # ./getdelays -i -p 1
+ printing IO accounting
+ linuxrc: read=65536, write=0, cancelled_write=0
+
+上é¢çš„命令与-v一起使用,å¯ä»¥èŽ·å–更多调试信æ¯ã€‚
diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index 548e57f4b3f1..be535ffaf4b0 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -20,15 +20,15 @@ Linux 内核用户和管ç†å‘˜æŒ‡å—
Todolist:
- kernel-parameters
- devices
- sysctl/index
+* kernel-parameters
+* devices
+* sysctl/index
本节介ç»CPUæ¼æ´žåŠå…¶ç¼“解措施。
Todolist:
- hw-vuln/index
+* hw-vuln/index
下é¢çš„一组文档,针对的是试图跟踪问题和bug的用户。
@@ -44,18 +44,18 @@ Todolist:
Todolist:
- reporting-bugs
- ramoops
- dynamic-debug-howto
- kdump/index
- perf/index
+* reporting-bugs
+* ramoops
+* dynamic-debug-howto
+* kdump/index
+* perf/index
这是应用程åºå¼€å‘人员感兴趣的章节的开始。å¯ä»¥åœ¨è¿™é‡Œæ‰¾åˆ°æ¶µç›–内核ABIå„个
æ–¹é¢çš„文档。
Todolist:
- sysfs-rules
+* sysfs-rules
本手册的其余部分包括å„ç§æŒ‡å—,介ç»å¦‚何根æ®æ‚¨çš„喜好é…置内核的特定行为。
@@ -69,61 +69,61 @@ Todolist:
lockup-watchdogs
unicode
sysrq
+ mm/index
Todolist:
- acpi/index
- aoe/index
- auxdisplay/index
- bcache
- binderfs
- binfmt-misc
- blockdev/index
- bootconfig
- braille-console
- btmrvl
- cgroup-v1/index
- cgroup-v2
- cifs/index
- dell_rbu
- device-mapper/index
- edid
- efi-stub
- ext4
- nfs/index
- gpio/index
- highuid
- hw_random
- initrd
- iostats
- java
- jfs
- kernel-per-CPU-kthreads
- laptops/index
- lcd-panel-cgram
- ldm
- LSM/index
- md
- media/index
- mm/index
- module-signing
- mono
- namespaces/index
- numastat
- parport
- perf-security
- pm/index
- pnp
- rapidio
- ras
- rtc
- serial-console
- svga
- thunderbolt
- ufs
- vga-softcursor
- video-output
- xfs
+* acpi/index
+* aoe/index
+* auxdisplay/index
+* bcache
+* binderfs
+* binfmt-misc
+* blockdev/index
+* bootconfig
+* braille-console
+* btmrvl
+* cgroup-v1/index
+* cgroup-v2
+* cifs/index
+* dell_rbu
+* device-mapper/index
+* edid
+* efi-stub
+* ext4
+* nfs/index
+* gpio/index
+* highuid
+* hw_random
+* initrd
+* iostats
+* java
+* jfs
+* kernel-per-CPU-kthreads
+* laptops/index
+* lcd-panel-cgram
+* ldm
+* LSM/index
+* md
+* media/index
+* module-signing
+* mono
+* namespaces/index
+* numastat
+* parport
+* perf-security
+* pm/index
+* pnp
+* rapidio
+* ras
+* rtc
+* serial-console
+* svga
+* thunderbolt
+* ufs
+* vga-softcursor
+* video-output
+* xfs
.. only:: subproject and html
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
new file mode 100644
index 000000000000..0c8276109fc0
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/index.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+============
+监测数æ®è®¿é—®
+============
+
+:doc:`DAMON </vm/damon/index>` å…许轻é‡çº§çš„æ•°æ®è®¿é—®ç›‘测。使用DAMON,
+用户å¯ä»¥åˆ†æžä»–们系统的内存访问模å¼ï¼Œå¹¶ä¼˜åŒ–它们。
+
+.. toctree::
+ :maxdepth: 2
+
+ start
+ usage
+ reclaim
+
+
+
+
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
new file mode 100644
index 000000000000..9e541578f38d
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
@@ -0,0 +1,232 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/reclaim.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+===============
+基于DAMON的回收
+===============
+
+基于DAMON的回收(DAMON_RECLAIM)是一个é™æ€çš„内核模å—,旨在用于轻度内存压力下的主动和轻
+é‡çº§çš„回收。它的目的ä¸æ˜¯å–代基于LRU列表的页é¢å›žæ”¶ï¼Œè€Œæ˜¯æœ‰é€‰æ‹©åœ°ç”¨äºŽä¸åŒç¨‹åº¦çš„内存压力和è¦
+求。
+
+哪些地方需è¦ä¸»åŠ¨å›žæ”¶ï¼Ÿ
+======================
+
+在一般的内存超é‡ä½¿ç”¨ï¼ˆover-committed systems,虚拟化相关术语)的系统上,主动回收冷页
+有助于节çœå†…存和å‡å°‘延迟高峰,这些延迟是由直接回收进程或kswapdçš„CPU消耗引起的,åŒæ—¶åªäº§
+生最å°çš„æ€§èƒ½ä¸‹é™ [1]_ [2]_ 。
+
+基于空闲页报告 [3]_ 的内存过度承诺的虚拟化系统就是很好的例å­ã€‚在这样的系统中,客户机
+å‘主机报告他们的空闲内存,而主机则将报告的内存é‡æ–°åˆ†é…给其他客户。因此,系统的内存得到了充
+分的利用。然而,客户å¯èƒ½ä¸é‚£ä¹ˆèŠ‚çœå†…存,主è¦æ˜¯å› ä¸ºä¸€äº›å†…æ ¸å­ç³»ç»Ÿå’Œç”¨æˆ·ç©ºé—´åº”用程åºè¢«è®¾è®¡ä¸º
+使用尽å¯èƒ½å¤šçš„内存。然åŽï¼Œå®¢æˆ·æœºå¯èƒ½åªå‘主机报告少é‡çš„内存是空闲的,导致系统的内存利用率下é™ã€‚
+在客户中è¿è¡Œä¸»åŠ¨å›žæ”¶å¯ä»¥ç¼“解这个问题。
+
+它是如何工作的?
+================
+
+DAMON_RECLAIM找到在特定时间内没有被访问的内存区域并分页。为了é¿å…它在分页æ“作中消耗过多
+çš„CPU,å¯ä»¥é…置一个速度é™åˆ¶ã€‚在这个速度é™åˆ¶ä¸‹ï¼Œå®ƒé¦–先分页出那些没有被访问过的内存区域。系
+统管ç†å‘˜è¿˜å¯ä»¥é…置在什么情况下这个方案应该自动激活和åœç”¨ä¸‰ä¸ªå†…存压力水ä½ã€‚
+
+接å£: 模å—å‚æ•°
+==============
+
+è¦ä½¿ç”¨è¿™ä¸ªåŠŸèƒ½ï¼Œä½ é¦–å…ˆè¦ç¡®ä¿ä½ çš„系统è¿è¡Œåœ¨ä¸€ä¸ªä»¥ ``CONFIG_DAMON_RECLAIM=y`` 构建的内
+核上。
+
+为了让系统管ç†å‘˜å¯ç”¨æˆ–ç¦ç”¨å®ƒï¼Œå¹¶ä¸ºç»™å®šçš„系统进行调整,DAMON_RECLAIM利用了模å—å‚数。也就
+是说,你å¯ä»¥æŠŠ ``damon_reclaim.<parameter>=<value>`` 放在内核å¯åŠ¨å‘½ä»¤è¡Œä¸Šï¼Œæˆ–者把
+适当的值写入 ``/sys/modules/damon_reclaim/parameters/<parameter>`` 文件。
+
+注æ„,除 ``å¯ç”¨`` 外的å‚数值åªåœ¨DAMON_RECLAIMå¯åŠ¨æ—¶åº”用。因此,如果你想在è¿è¡Œæ—¶åº”用新
+çš„å‚数值,而DAMON_RECLAIMå·²ç»è¢«å¯ç”¨ï¼Œä½ åº”该通过 ``å¯ç”¨`` çš„å‚数文件ç¦ç”¨å’Œé‡æ–°å¯ç”¨å®ƒã€‚
+在é‡æ–°å¯ç”¨ä¹‹å‰ï¼Œåº”将新的å‚数值写入适当的å‚数值中。
+
+下é¢æ˜¯æ¯ä¸ªå‚æ•°çš„æ述。
+
+enable
+------
+
+å¯ç”¨æˆ–ç¦ç”¨DAMON_RECLAIM。
+
+ä½ å¯ä»¥é€šè¿‡æŠŠè¿™ä¸ªå‚数的值设置为 ``Y`` æ¥å¯ç”¨DAMON_RCLAIM,把它设置为 ``N`` å¯ä»¥ç¦ç”¨
+DAMON_RECLAIM。注æ„,由于基于水ä½çš„激活æ¡ä»¶ï¼ŒDAMON_RECLAIMä¸èƒ½è¿›è¡ŒçœŸæ­£çš„监测和回收。
+这一点请å‚考下é¢å…³äºŽæ°´ä½å‚æ•°çš„æ述。
+
+min_age
+-------
+
+识别冷内存区域的时间阈值,å•ä½æ˜¯å¾®ç§’。
+
+如果一个内存区域在这个时间或更长的时间内没有被访问,DAMON_RECLAIM会将该区域识别为冷的,
+并回收它。
+
+默认为120秒。
+
+quota_ms
+--------
+
+回收的时间é™åˆ¶ï¼Œä»¥æ¯«ç§’为å•ä½ã€‚
+
+DAMON_RECLAIM 试图在一个时间窗å£ï¼ˆquota_reset_interval_ms)内åªä½¿ç”¨åˆ°è¿™ä¸ªæ—¶é—´ï¼Œä»¥
+å°è¯•å›žæ”¶å†·é¡µã€‚è¿™å¯ä»¥ç”¨æ¥é™åˆ¶DAMON_RECLAIMçš„CPU消耗。如果该值为零,则该é™åˆ¶è¢«ç¦ç”¨ã€‚
+
+默认为10ms。
+
+quota_sz
+--------
+
+回收的内存大å°é™åˆ¶ï¼Œå•ä½ä¸ºå­—节。
+
+DAMON_RECLAIM 收å–在一个时间窗å£ï¼ˆquota_reset_interval_ms)内试图回收的内存é‡ï¼Œå¹¶
+使其ä¸è¶…过这个é™åˆ¶ã€‚è¿™å¯ä»¥ç”¨æ¥é™åˆ¶CPUå’ŒIO的消耗。如果该值为零,则é™åˆ¶è¢«ç¦ç”¨ã€‚
+
+默认情况下是128 MiB。
+
+quota_reset_interval_ms
+-----------------------
+
+时间/大å°é…é¢æ”¶å–é‡ç½®é—´éš”,å•ä½ä¸ºæ¯«ç§’。
+
+时间(quota_ms)和大å°ï¼ˆquota_sz)的é…é¢çš„目标é‡ç½®é—´éš”。也就是说,DAMON_RECLAIM在
+å°è¯•å›žæ”¶â€˜ä¸â€™è¶…过quota_ms毫秒或quota_sz字节的内存。
+
+默认为1秒。
+
+wmarks_interval
+---------------
+
+当DAMON_RECLAIM被å¯ç”¨ä½†ç”±äºŽå…¶æ°´ä½è§„则而ä¸æ´»è·ƒæ—¶ï¼Œåœ¨æ£€æŸ¥æ°´ä½ä¹‹å‰çš„最å°ç­‰å¾…时间。
+
+wmarks_high
+-----------
+
+高水ä½çš„å¯ç”¨å†…存率(æ¯åƒå­—节)。
+
+如果系统的å¯ç”¨å†…存(以æ¯åƒå­—节为å•ä½ï¼‰é«˜äºŽè¿™ä¸ªæ•°å€¼ï¼ŒDAMON_RECLAIM就会å˜å¾—ä¸æ´»è·ƒï¼Œæ‰€ä»¥
+它什么也ä¸åšï¼Œåªæ˜¯å®šæœŸæ£€æŸ¥æ°´ä½ã€‚
+
+wmarks_mid
+----------
+
+中间水ä½çš„å¯ç”¨å†…存率(æ¯åƒå­—节)。
+
+如果系统的空闲内存(以æ¯åƒå­—节为å•ä½ï¼‰åœ¨è¿™ä¸ªå’Œä½Žæ°´ä½çº¿ä¹‹é—´ï¼ŒDAMON_RECLAIM就会被激活,
+因此开始监测和回收。
+
+wmarks_low
+----------
+
+低水ä½çš„å¯ç”¨å†…存率(æ¯åƒå­—节)。
+
+如果系统的空闲内存(以æ¯åƒå­—节为å•ä½ï¼‰ä½ŽäºŽè¿™ä¸ªæ•°å€¼ï¼ŒDAMON_RECLAIM就会å˜å¾—ä¸æ´»è·ƒï¼Œæ‰€ä»¥
+它除了定期检查水ä½å¤–什么都ä¸åšã€‚在这ç§æƒ…况下,系统会退回到基于LRU列表的页é¢ç²’度回收逻辑。
+
+sample_interval
+---------------
+
+监测的采样间隔,å•ä½æ˜¯å¾®ç§’。
+
+DAMON用于监测冷内存的采样间隔。更多细节请å‚考DAMON文档 (:doc:`usage`) 。
+
+aggr_interval
+-------------
+
+监测的èšé›†é—´éš”,å•ä½æ˜¯å¾®ç§’。
+
+DAMON对冷内存监测的èšé›†é—´éš”。更多细节请å‚考DAMON文档 (:doc:`usage`)。
+
+min_nr_regions
+--------------
+
+监测区域的最å°æ•°é‡ã€‚
+
+DAMON用于冷内存监测的最å°ç›‘测区域数。这å¯ä»¥ç”¨æ¥è®¾ç½®ç›‘测质é‡çš„下é™ã€‚但是,设
+置的太高å¯èƒ½ä¼šå¯¼è‡´ç›‘测开销的增加。更多细节请å‚考DAMON文档 (:doc:`usage`) 。
+
+max_nr_regions
+--------------
+
+监测区域的最大数é‡ã€‚
+
+DAMON用于冷内存监测的最大监测区域数。这å¯ä»¥ç”¨æ¥è®¾ç½®ç›‘测开销的上é™å€¼ã€‚但是,
+设置得太低å¯èƒ½ä¼šå¯¼è‡´ç›‘测质é‡ä¸å¥½ã€‚更多细节请å‚考DAMON文档 (:doc:`usage`) 。
+
+monitor_region_start
+--------------------
+
+目标内存区域的物ç†åœ°å€èµ·ç‚¹ã€‚
+
+DAMON_RECLAIM将对其进行工作的内存区域的起始物ç†åœ°å€ã€‚也就是说,DAMON_RECLAIM
+将在这个区域中找到冷的内存区域并进行回收。默认情况下,该区域使用最大系统内存区。
+
+monitor_region_end
+------------------
+
+目标内存区域的结æŸç‰©ç†åœ°å€ã€‚
+
+DAMON_RECLAIM将对其进行工作的内存区域的末端物ç†åœ°å€ã€‚也就是说,DAMON_RECLAIMå°†
+在这个区域内找到冷的内存区域并进行回收。默认情况下,该区域使用最大系统内存区。
+
+kdamond_pid
+-----------
+
+DAMON线程的PID。
+
+如果DAMON_RECLAIM被å¯ç”¨ï¼Œè¿™å°†æˆä¸ºå·¥ä½œçº¿ç¨‹çš„PID。å¦åˆ™ï¼Œä¸º-1。
+
+nr_reclaim_tried_regions
+------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的数é‡ã€‚
+
+bytes_reclaim_tried_regions
+---------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的总字节数。
+
+nr_reclaimed_regions
+--------------------
+
+通过DAMON_RECLAIMæˆåŠŸå›žæ”¶çš„内存区域的数é‡ã€‚
+
+bytes_reclaimed_regions
+-----------------------
+
+通过DAMON_RECLAIMæˆåŠŸå›žæ”¶çš„内存区域的总字节数。
+
+nr_quota_exceeds
+----------------
+
+超过时间/空间é…é¢é™åˆ¶çš„次数。
+
+例å­
+====
+
+下é¢çš„è¿è¡Œç¤ºä¾‹å‘½ä»¤ä½¿DAMON_RECLAIM找到30秒或更长时间没有访问的内存区域并“回收â€ï¼Ÿ
+为了é¿å…DAMON_RECLAIM在分页æ“作中消耗过多的CPU时间,回收被é™åˆ¶åœ¨æ¯ç§’1GiB以内。
+它还è¦æ±‚DAMON_RECLAIM在系统的å¯ç”¨å†…存率超过50%æ—¶ä¸åšä»»ä½•äº‹æƒ…,但如果它低于40%æ—¶
+就开始真正的工作。如果DAMON_RECLAIM没有å–得进展,因此空闲内存率低于20%,它会è¦æ±‚
+DAMON_RECLAIMå†æ¬¡ä»€ä¹ˆéƒ½ä¸åšï¼Œè¿™æ ·æˆ‘们就å¯ä»¥é€€å›žåˆ°åŸºäºŽLRU列表的页é¢ç²’度回收了::
+
+ # cd /sys/modules/damon_reclaim/parameters
+ # echo 30000000 > min_age
+ # echo $((1 * 1024 * 1024 * 1024)) > quota_sz
+ # echo 1000 > quota_reset_interval_ms
+ # echo 500 > wmarks_high
+ # echo 400 > wmarks_mid
+ # echo 200 > wmarks_low
+ # echo Y > enabled
+
+.. [1] https://research.google/pubs/pub48551/
+.. [2] https://lwn.net/Articles/787611/
+.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
new file mode 100644
index 000000000000..67d1b49481dc
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/start.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+========
+入门指å—
+========
+
+本文通过演示DAMON的默认用户空间工具,简è¦åœ°ä»‹ç»äº†å¦‚何使用DAMON。请注æ„,为了简æ´
+èµ·è§ï¼Œæœ¬æ–‡æ¡£åªæ述了它的部分功能。更多细节请å‚考该工具的使用文档。
+`doc <https://github.com/awslabs/damo/blob/next/USAGE.md>`_ .
+
+
+å‰ææ¡ä»¶
+========
+
+内核
+----
+
+首先,你è¦ç¡®ä¿ä½ å½“å‰ç³»ç»Ÿä¸­è·‘的内核构建时选定了这个功能选项 ``CONFIG_DAMON_*=y``.
+
+
+用户空间工具
+------------
+
+在演示中,我们将使用DAMON的默认用户空间工具,称为DAMON Operator(DAMO)。它å¯ä»¥åœ¨
+https://github.com/awslabs/damo找到。下é¢çš„例å­å‡è®¾DAMO在你的$PATH上。当然,但
+这并ä¸æ˜¯å¼ºåˆ¶æ€§çš„。
+
+因为DAMO使用的是DAMONçš„debugfs接å£(详情请å‚考 :doc:`usage` 中的使用方法) 你应该
+ç¡®ä¿debugfs被挂载。手动挂载它,如下所示::
+
+ # mount -t debugfs none /sys/kernel/debug/
+
+或者在你的 ``/etc/fstab`` 文件中添加以下一行,这样你的系统就å¯ä»¥åœ¨å¯åŠ¨æ—¶è‡ªåŠ¨æŒ‚è½½
+debugfs了::
+
+ debugfs /sys/kernel/debug debugfs defaults 0 0
+
+
+记录数æ®è®¿é—®æ¨¡å¼
+================
+
+下é¢çš„命令记录了一个程åºçš„内存访问模å¼ï¼Œå¹¶å°†ç›‘测结果ä¿å­˜åˆ°æ–‡ä»¶ä¸­ã€‚ ::
+
+ $ git clone https://github.com/sjp38/masim
+ $ cd masim; make; ./masim ./configs/zigzag.cfg &
+ $ sudo damo record -o damon.data $(pidof masim)
+
+命令的å‰ä¸¤è¡Œä¸‹è½½äº†ä¸€ä¸ªäººå·¥å†…存访问生æˆå™¨ç¨‹åºå¹¶åœ¨åŽå°è¿è¡Œã€‚生æˆå™¨å°†é‡å¤åœ°é€ä¸€è®¿é—®ä¸¤ä¸ª
+100 MiB大å°çš„内存区域。你å¯ä»¥ç”¨ä½ çš„真实工作负载æ¥ä»£æ›¿å®ƒã€‚最åŽä¸€è¡Œè¦æ±‚ ``damo`` å°†
+访问模å¼è®°å½•åœ¨ ``damon.data`` 文件中。
+
+
+将记录的模å¼å¯è§†åŒ–
+==================
+
+ä½ å¯ä»¥åœ¨heatmap中直观地看到这ç§æ¨¡å¼ï¼Œæ˜¾ç¤ºå“ªä¸ªå†…存区域(X轴)何时被访问(Y轴)以åŠè®¿
+问的频率(数字)。::
+
+ $ sudo damo report heats --heatmap stdout
+ 22222222222222222222222222222222222222211111111111111111111111111111111111111100
+ 44444444444444444444444444444444444444434444444444444444444444444444444444443200
+ 44444444444444444444444444444444444444433444444444444444444444444444444444444200
+ 33333333333333333333333333333333333333344555555555555555555555555555555555555200
+ 33333333333333333333333333333333333344444444444444444444444444444444444444444200
+ 22222222222222222222222222222222222223355555555555555555555555555555555555555200
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ 33333333333333333333333333333333333333355555555555555555555555555555555555555200
+ 88888888888888888888888888888888888888600000000000000000000000000000000000000000
+ 88888888888888888888888888888888888888600000000000000000000000000000000000000000
+ 33333333333333333333333333333333333333444444444444444444444444444444444444443200
+ 00000000000000000000000000000000000000288888888888888888888888888888888888888400
+ [...]
+ # access_frequency: 0 1 2 3 4 5 6 7 8 9
+ # x-axis: space (139728247021568-139728453431248: 196.848 MiB)
+ # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s)
+ # resolution: 80x40 (2.461 MiB and 1.758 s for each character)
+
+你也å¯ä»¥ç›´è§‚地看到工作集的大å°åˆ†å¸ƒï¼ŒæŒ‰å¤§å°æŽ’åºã€‚::
+
+ $ sudo damo report wss --range 0 101 10
+ # <percentile> <wss>
+ # target_id 18446632103789443072
+ # avr: 107.708 MiB
+ 0 0 B | |
+ 10 95.328 MiB |**************************** |
+ 20 95.332 MiB |**************************** |
+ 30 95.340 MiB |**************************** |
+ 40 95.387 MiB |**************************** |
+ 50 95.387 MiB |**************************** |
+ 60 95.398 MiB |**************************** |
+ 70 95.398 MiB |**************************** |
+ 80 95.504 MiB |**************************** |
+ 90 190.703 MiB |********************************************************* |
+ 100 196.875 MiB |***********************************************************|
+
+在上述命令中使用 ``--sortby`` 选项,å¯ä»¥æ˜¾ç¤ºå·¥ä½œé›†çš„大å°æ˜¯å¦‚何按时间顺åºå˜åŒ–的。::
+
+ $ sudo damo report wss --range 0 101 10 --sortby time
+ # <percentile> <wss>
+ # target_id 18446632103789443072
+ # avr: 107.708 MiB
+ 0 3.051 MiB | |
+ 10 190.703 MiB |***********************************************************|
+ 20 95.336 MiB |***************************** |
+ 30 95.328 MiB |***************************** |
+ 40 95.387 MiB |***************************** |
+ 50 95.332 MiB |***************************** |
+ 60 95.320 MiB |***************************** |
+ 70 95.398 MiB |***************************** |
+ 80 95.398 MiB |***************************** |
+ 90 95.340 MiB |***************************** |
+ 100 95.398 MiB |***************************** |
+
+
+æ•°æ®è®¿é—®æ¨¡å¼æ„ŸçŸ¥çš„内存管ç†
+==========================
+
+以下三个命令使æ¯ä¸€ä¸ªå¤§å°>=4K的内存区域在你的工作负载中没有被访问>=60秒,就会被æ¢æŽ‰ã€‚ ::
+
+ $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme
+ $ echo "4K max 0 0 60s max pageout" >> test_scheme
+ $ damo schemes -c test_scheme <pid of your workload>
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
new file mode 100644
index 000000000000..5d7533347216
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/usage.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+========
+详细用法
+========
+
+DAMON 为ä¸åŒçš„用户æ供了下é¢ä¸‰ç§æŽ¥å£ã€‚
+
+- *DAMON用户空间工具。*
+ `è¿™ <https://github.com/awslabs/damo>`_ 为有这特æƒçš„人, 如系统管ç†å‘˜ï¼Œå¸Œæœ›æœ‰ä¸€ä¸ªåˆšå¥½
+ å¯ä»¥å·¥ä½œçš„人性化界é¢ã€‚
+ 使用它,用户å¯ä»¥ä»¥äººæ€§åŒ–çš„æ–¹å¼ä½¿ç”¨DAMON的主è¦åŠŸèƒ½ã€‚ä¸è¿‡ï¼Œå®ƒå¯èƒ½ä¸ä¼šä¸ºç‰¹æ®Šæƒ…况进行高度调整。
+ 它åŒæ—¶æ”¯æŒè™šæ‹Ÿå’Œç‰©ç†åœ°å€ç©ºé—´çš„监测。更多细节,请å‚考它的 `使用文档
+ <https://github.com/awslabs/damo/blob/next/USAGE.md>`_。
+- *debugfs接å£ã€‚*
+ :ref:`è¿™ <debugfs_interface>` 是为那些希望更高级的使用DAMON的特æƒç”¨æˆ·ç©ºé—´ç¨‹åºå‘˜å‡†å¤‡çš„。
+ 使用它,用户å¯ä»¥é€šè¿‡è¯»å–和写入特殊的debugfs文件æ¥ä½¿ç”¨DAMON的主è¦åŠŸèƒ½ã€‚因此,你å¯ä»¥ç¼–写和使
+ 用你个性化的DAMON debugfs包装程åºï¼Œä»£æ›¿ä½ è¯»/写debugfs文件。 `DAMON用户空间工具
+ <https://github.com/awslabs/damo>`_ 就是这ç§ç¨‹åºçš„ä¸€ä¸ªä¾‹å­ å®ƒåŒæ—¶æ”¯æŒè™šæ‹Ÿå’Œç‰©ç†åœ°å€
+ 空间的监测。注æ„,这个界é¢åªæ供简å•çš„监测结果 :ref:`统计 <damos_stats>`。对于详细的监测
+ 结果,DAMONæ供了一个:ref:`跟踪点 <tracepoint>`。
+
+- *内核空间编程接å£ã€‚*
+ :doc:`This </vm/damon/api>` 这是为内核空间程åºå‘˜å‡†å¤‡çš„。使用它,用户å¯ä»¥é€šè¿‡ä¸ºä½ ç¼–写内
+ 核空间的DAMON应用程åºï¼Œæœ€çµæ´»æœ‰æ•ˆåœ°åˆ©ç”¨DAMONçš„æ¯ä¸€ä¸ªåŠŸèƒ½ã€‚你甚至å¯ä»¥ä¸ºå„ç§åœ°å€ç©ºé—´æ‰©å±•DAMON。
+ 详细情况请å‚è€ƒæŽ¥å£ :doc:`文件 </vm/damon/api>`。
+
+
+debugfs接å£
+===========
+
+DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
+``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和
+``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
+
+
+属性
+----
+
+用户å¯ä»¥é€šè¿‡è¯»å–和写入 ``attrs`` 文件获得和设置 ``采样间隔`` 〠``èšé›†é—´éš”`` 〠``区域更新间隔``
+以åŠç›‘测目标区域的最å°/最大数é‡ã€‚è¦è¯¦ç»†äº†è§£ç›‘测属性,请å‚考 `:doc:/vm/damon/design` 。例如,
+下é¢çš„命令将这些值设置为5msã€100msã€1000msã€10å’Œ1000,然åŽå†æ¬¡æ£€æŸ¥::
+
+ # cd <debugfs>/damon
+ # echo 5000 100000 1000000 10 1000 > attrs
+ # cat attrs
+ 5000 100000 1000000 10 1000
+
+
+目标ID
+------
+
+一些类型的地å€ç©ºé—´æ”¯æŒå¤šä¸ªç›‘测目标。例如,虚拟内存地å€ç©ºé—´çš„监测å¯ä»¥æœ‰å¤šä¸ªè¿›ç¨‹ä½œä¸ºç›‘测目标。用户
+å¯ä»¥é€šè¿‡å†™å…¥ç›®æ ‡çš„相关id值æ¥è®¾ç½®ç›®æ ‡ï¼Œå¹¶é€šè¿‡è¯»å– ``target_ids`` 文件æ¥èŽ·å¾—当å‰ç›®æ ‡çš„id。在监
+测虚拟地å€ç©ºé—´çš„情况下,这些值应该是监测目标进程的pid。例如,下é¢çš„命令将pid为42å’Œ4242的进程设
+为监测目标,并å†æ¬¡æ£€æŸ¥::
+
+ # cd <debugfs>/damon
+ # echo 42 4242 > target_ids
+ # cat target_ids
+ 42 4242
+
+用户还å¯ä»¥é€šè¿‡åœ¨æ–‡ä»¶ä¸­å†™å…¥ä¸€ä¸ªç‰¹æ®Šçš„关键字 "paddr\n" æ¥ç›‘测系统的物ç†å†…存地å€ç©ºé—´ã€‚因为物ç†åœ°
+å€ç©ºé—´ç›‘测ä¸æ”¯æŒå¤šä¸ªç›®æ ‡ï¼Œè¯»å–文件会显示一个å‡å€¼ï¼Œå³ ``42`` ,如下图所示::
+
+ # cd <debugfs>/damon
+ # echo paddr > target_ids
+ # cat target_ids
+ 42
+
+请注æ„,设置目标ID并ä¸å¯åŠ¨ç›‘测。
+
+
+åˆå§‹ç›‘测目标区域
+----------------
+
+在虚拟地å€ç©ºé—´ç›‘测的情况下,DAMON自动设置和更新监测的目标区域,这样就å¯ä»¥è¦†ç›–目标进程的整个
+内存映射。然而,用户å¯èƒ½å¸Œæœ›å°†ç›‘测区域é™åˆ¶åœ¨ç‰¹å®šçš„地å€èŒƒå›´å†…,如堆ã€æ ˆæˆ–特定的文件映射区域。
+或者,一些用户å¯ä»¥çŸ¥é“他们工作负载的åˆå§‹è®¿é—®æ¨¡å¼ï¼Œå› æ­¤å¸Œæœ›ä¸ºâ€œè‡ªé€‚应区域调整â€è®¾ç½®æœ€ä½³åˆå§‹åŒºåŸŸã€‚
+
+相比之下,DAMON在物ç†å†…存监测的情况下ä¸ä¼šè‡ªåŠ¨è®¾ç½®å’Œæ›´æ–°ç›‘测目标区域。因此,用户应该自己设置
+监测目标区域。
+
+在这ç§æƒ…况下,用户å¯ä»¥é€šè¿‡åœ¨ ``init_regions`` 文件中写入适当的值,明确地设置他们想è¦çš„åˆ
+始监测目标区域。输入的æ¯ä¸€è¡Œåº”代表一个区域,形å¼å¦‚下::
+
+ <target idx> <start address> <end address>
+
+目标idx应该是 ``target_ids`` 文件中目标的索引,从 ``0`` 开始,区域应该按照地å€é¡ºåºä¼ é€’。
+例如,下é¢çš„命令将设置几个地å€èŒƒå›´ï¼Œ ``1-100`` å’Œ ``100-200`` 作为pid 42çš„åˆå§‹ç›‘测目标
+区域,这是 ``target_ids`` 中的第一个(索引 ``0`` ),å¦å¤–几个地å€èŒƒå›´ï¼Œ ``20-40`` å’Œ
+``50-100`` 作为pid 4242的地å€ï¼Œè¿™æ˜¯ ``target_ids`` 中的第二个(索引 ``1`` )::
+
+ # cd <debugfs>/damon
+ # cat target_ids
+ 42 4242
+ # echo "0 1 100
+ 0 100 200
+ 1 20 40
+ 1 50 100" > init_regions
+
+请注æ„,这åªæ˜¯è®¾ç½®äº†åˆå§‹çš„监测目标区域。在虚拟内存监测的情况下,DAMON会在一个 ``区域更新间隔``
+åŽè‡ªåŠ¨æ›´æ–°åŒºåŸŸçš„边界。因此,在这ç§æƒ…况下,如果用户ä¸å¸Œæœ›æ›´æ–°çš„è¯ï¼Œåº”该把 ``区域的更新间隔`` 设
+置得足够大。
+
+
+方案
+----
+
+对于通常的基于DAMONçš„æ•°æ®è®¿é—®æ„ŸçŸ¥çš„内存管ç†ä¼˜åŒ–,用户åªæ˜¯å¸Œæœ›ç³»ç»Ÿå¯¹ç‰¹å®šè®¿é—®æ¨¡å¼çš„内存区域应用内
+存管ç†æ“作。DAMON从用户那里接收这ç§å½¢å¼åŒ–çš„æ“作方案,并将这些方案应用到目标进程中。
+
+用户å¯ä»¥é€šè¿‡è¯»å–和写入 ``scheme`` debugfs文件æ¥èŽ·å¾—和设置这些方案。读å–该文件还å¯ä»¥æ˜¾ç¤ºæ¯ä¸ª
+方案的统计数æ®ã€‚在文件中,æ¯ä¸€ä¸ªæ–¹æ¡ˆéƒ½åº”该在æ¯ä¸€è¡Œä¸­ä»¥ä¸‹åˆ—å½¢å¼è¡¨ç¤ºå‡ºæ¥::
+
+ <target access pattern> <action> <quota> <watermarks>
+
+ä½ å¯ä»¥é€šè¿‡ç®€å•åœ°åœ¨æ–‡ä»¶ä¸­å†™å…¥ä¸€ä¸ªç©ºå­—符串æ¥ç¦ç”¨æ–¹æ¡ˆã€‚
+
+目标访问模å¼
+~~~~~~~~~~~~
+
+``<目标访问模å¼>`` 是由三个范围构æˆçš„,形å¼å¦‚下::
+
+ min-size max-size min-acc max-acc min-age max-age
+
+具体æ¥è¯´ï¼ŒåŒºåŸŸå¤§å°çš„字节数( `min-size` å’Œ `max-size` ),访问频率的æ¯èšåˆåŒºé—´çš„监测访问次
+数( `min-acc` å’Œ `max-acc` ),区域年龄的èšåˆåŒºé—´æ•°ï¼ˆ `min-age` å’Œ `max-age` )都被指定。
+请注æ„,这些范围是å°é—­åŒºé—´ã€‚
+
+动作
+~~~~
+
+``<action>`` 是一个预定义的内存管ç†åŠ¨ä½œçš„整数,DAMON将应用于具有目标访问模å¼çš„区域。支æŒ
+的数字和它们的å«ä¹‰å¦‚下::
+
+ - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
+ - 1: Call ``madvise()`` for the region with ``MADV_COLD``
+ - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
+ - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
+ - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
+ - 5: Do nothing but count the statistics
+
+é…é¢
+~~~~
+
+æ¯ä¸ª ``动作`` 的最佳 ``目标访问模å¼`` å–决于工作负载,所以ä¸å®¹æ˜“找到。更糟糕的是,将æŸä¸ª
+动作的方案设置得过于激进会导致严é‡çš„开销。为了é¿å…è¿™ç§å¼€é”€ï¼Œç”¨æˆ·å¯ä»¥é€šè¿‡ä¸‹é¢è¡¨æ ¼ä¸­çš„ ``<quota>``
+æ¥é™åˆ¶æ–¹æ¡ˆçš„时间和大å°é…é¢::
+
+ <ms> <sz> <reset interval> <priority weights>
+
+这使得DAMON在 ``<reset interval>`` 毫秒内,尽é‡åªç”¨ ``<ms>`` 毫秒的时间对 ``目标访
+问模å¼`` 的内存区域应用动作,并在 ``<reset interval>`` 内åªå¯¹æœ€å¤š<sz>字节的内存区域应
+用动作。将 ``<ms>`` å’Œ ``<sz>`` 都设置为零,å¯ä»¥ç¦ç”¨é…é¢é™åˆ¶ã€‚
+
+当预计超过é…é¢é™åˆ¶æ—¶ï¼ŒDAMONä¼šæ ¹æ® ``目标访问模å¼`` 的大å°ã€è®¿é—®é¢‘率和年龄,对å‘现的内存
+区域进行优先排åºã€‚为了实现个性化的优先级,用户å¯ä»¥åœ¨ ``<优先级æƒé‡>`` 中设置这三个属性的
+æƒé‡ï¼Œå…·ä½“å½¢å¼å¦‚下::
+
+ <size weight> <access frequency weight> <age weight>
+
+æ°´ä½
+~~~~
+
+有些方案需è¦æ ¹æ®ç³»ç»Ÿç‰¹å®šæŒ‡æ ‡çš„当å‰å€¼æ¥è¿è¡Œï¼Œå¦‚自由内存比率。对于这ç§æƒ…况,用户å¯ä»¥ä¸ºè¯¥æ¡
+件指定水ä½ã€‚::
+
+ <metric> <check interval> <high mark> <middle mark> <low mark>
+
+``<metric>`` 是一个预定义的整数,用于è¦æ£€æŸ¥çš„度é‡ã€‚支æŒçš„数字和它们的å«ä¹‰å¦‚下。
+
+ - 0: 忽视水ä½
+ - 1: 系统空闲内存率 (åƒåˆ†æ¯”)
+
+æ¯éš” ``<检查间隔>`` 微秒检查一次公制的值。
+
+如果该值高于 ``<高标>`` 或低于 ``<低标>`` ,该方案被åœç”¨ã€‚如果该值低于 ``<中标>`` ,
+该方案将被激活。
+
+统计数æ®
+~~~~~~~~
+
+它还统计æ¯ä¸ªæ–¹æ¡ˆè¢«å°è¯•åº”用的区域的总数é‡å’Œå­—节数,æ¯ä¸ªæ–¹æ¡ˆè¢«æˆåŠŸåº”用的区域的两个数é‡ï¼Œä»¥
+åŠè¶…过é…é¢é™åˆ¶çš„总数é‡ã€‚这些统计数æ®å¯ç”¨äºŽåœ¨çº¿åˆ†æžæˆ–调整方案。
+
+统计数æ®å¯ä»¥é€šè¿‡è¯»å–方案文件æ¥æ˜¾ç¤ºã€‚读å–该文件将显示你在æ¯ä¸€è¡Œä¸­è¾“入的æ¯ä¸ª ``方案`` ,
+统计的五个数字将被加在æ¯ä¸€è¡Œçš„末尾。
+
+例å­
+~~~~
+
+下é¢çš„命令应用了一个方案:â€å¦‚果一个大å°ä¸º[4KiB, 8KiB]的内存区域在[10, 20]çš„èšåˆæ—¶é—´
+间隔内显示出æ¯ä¸€ä¸ªèšåˆæ—¶é—´é—´éš”[0, 5]的访问é‡ï¼Œè¯·åˆ†é¡µå‡ºè¯¥åŒºåŸŸã€‚对于分页,æ¯ç§’最多åªèƒ½ä½¿
+用10ms,而且æ¯ç§’分页ä¸èƒ½è¶…过1GiB。在这一é™åˆ¶ä¸‹ï¼Œé¦–先分页出具有较长年龄的内存区域。å¦å¤–,
+æ¯5秒钟检查一次系统的å¯ç”¨å†…存率,当å¯ç”¨å†…存率低于50%时开始监测和分页,但如果å¯ç”¨å†…存率
+大于60%,或低于30%,则åœæ­¢ç›‘测“::
+
+ # cd <debugfs>/damon
+ # scheme="4096 8192 0 5 10 20 2" # target access pattern and action
+ # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
+ # scheme+=" 0 0 100" # prioritization weights
+ # scheme+=" 1 5000000 600 500 300" # watermarks
+ # echo "$scheme" > schemes
+
+
+开关
+----
+
+除éžä½ æ˜Žç¡®åœ°å¯åŠ¨ç›‘测,å¦åˆ™å¦‚上所述的文件设置ä¸ä¼šäº§ç”Ÿæ•ˆæžœã€‚ä½ å¯ä»¥é€šè¿‡å†™å…¥å’Œè¯»å– ``monitor_on``
+文件æ¥å¯åŠ¨ã€åœæ­¢å’Œæ£€æŸ¥ç›‘测的当å‰çŠ¶æ€ã€‚写入 ``on`` 该文件å¯ä»¥å¯åŠ¨å¯¹æœ‰å±žæ€§çš„目标的监测。写入
+``off`` 该文件则åœæ­¢è¿™äº›ç›®æ ‡ã€‚如果æ¯ä¸ªç›®æ ‡è¿›ç¨‹è¢«ç»ˆæ­¢ï¼ŒDAMON也会åœæ­¢ã€‚下é¢çš„示例命令开å¯ã€å…³
+闭和检查DAMON的状æ€::
+
+ # cd <debugfs>/damon
+ # echo on > monitor_on
+ # echo off > monitor_on
+ # cat monitor_on
+ off
+
+请注æ„,当监测开å¯æ—¶ï¼Œä½ ä¸èƒ½å†™åˆ°ä¸Šè¿°çš„debugfs文件。如果你在DAMONè¿è¡Œæ—¶å†™åˆ°è¿™äº›æ–‡ä»¶ï¼Œå°†ä¼šè¿”
+回一个错误代ç ï¼Œå¦‚ ``-EBUSY`` 。
+
+
+监测线程PID
+-----------
+
+DAMON通过一个å«åškdamond的内核线程æ¥è¿›è¡Œè¯·æ±‚监测。你å¯ä»¥é€šè¿‡è¯»å– ``kdamond_pid`` 文件获
+得该线程的 ``pid`` 。当监测被 ``关闭`` 时,读å–该文件ä¸ä¼šè¿”回任何信æ¯::
+
+ # cd <debugfs>/damon
+ # cat monitor_on
+ off
+ # cat kdamond_pid
+ none
+ # echo on > monitor_on
+ # cat kdamond_pid
+ 18594
+
+
+使用多个监测线程
+----------------
+
+æ¯ä¸ªç›‘测上下文都会创建一个 ``kdamond`` 线程。你å¯ä»¥ä½¿ç”¨ ``mk_contexts`` å’Œ ``rm_contexts``
+文件为多个 ``kdamond`` 需è¦çš„用例创建和删除监测上下文。
+
+将新上下文的å称写入 ``mk_contexts`` 文件,在 ``DAMON debugfs`` 目录上创建一个该å称的目录。
+该目录将有该上下文的 ``DAMON debugfs`` 文件::
+
+ # cd <debugfs>/damon
+ # ls foo
+ # ls: cannot access 'foo': No such file or directory
+ # echo foo > mk_contexts
+ # ls foo
+ # attrs init_regions kdamond_pid schemes target_ids
+
+如果ä¸å†éœ€è¦ä¸Šä¸‹æ–‡ï¼Œä½ å¯ä»¥é€šè¿‡æŠŠä¸Šä¸‹æ–‡çš„å字放到 ``rm_contexts`` 文件中æ¥åˆ é™¤å®ƒå’Œç›¸åº”的目录::
+
+ # echo foo > rm_contexts
+ # ls foo
+ # ls: cannot access 'foo': No such file or directory
+
+注æ„, ``mk_contexts`` 〠``rm_contexts`` å’Œ ``monitor_on`` 文件åªåœ¨æ ¹ç›®å½•ä¸‹ã€‚
+
+
+监测结果的监测点
+================
+
+DAMON通过一个tracepoint ``damon:damon_aggregated`` æ供监测结果. 当监测开å¯æ—¶ï¼Œä½ å¯
+以记录追踪点事件,并使用追踪点支æŒå·¥å…·å¦‚perf显示结果。比如说::
+
+ # echo on > monitor_on
+ # perf record -e damon:damon_aggregated &
+ # sleep 5
+ # kill 9 $(pidof perf)
+ # echo off > monitor_on
+ # perf script
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
new file mode 100644
index 000000000000..702271c5b683
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
@@ -0,0 +1,49 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/index.rst
+
+:翻译:
+
+ å¾é‘« xu xin <xu.xin16@zte.com.cn>
+
+
+========
+内存管ç†
+========
+
+Linux内存管ç†å­ç³»ç»Ÿï¼Œé¡¾åæ€ä¹‰ï¼Œæ˜¯è´Ÿè´£ç³»ç»Ÿä¸­çš„内存管ç†ã€‚它包括了虚拟内存与请求
+分页的实现,内核内部结构和用户空间程åºçš„内存分é…ã€å°†æ–‡ä»¶æ˜ å°„到进程地å€ç©ºé—´ä»¥
+åŠè®¸å¤šå…¶ä»–很酷的事情。
+
+Linux内存管ç†æ˜¯ä¸€ä¸ªå…·æœ‰è®¸å¤šå¯é…置设置的å¤æ‚系统, 且这些设置中的大多数都å¯ä»¥é€š
+过 ``/proc`` 文件系统获得,并且å¯ä»¥ä½¿ç”¨ ``sysctl`` 进行查询和调整。这些API接
+å£è¢«æ述在Documentation/admin-guide/sysctl/vm.rst文件和 `man 5 proc`_ 中。
+
+.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
+
+Linux内存管ç†æœ‰å®ƒè‡ªå·±çš„术语,如果你还ä¸ç†Ÿæ‚‰å®ƒï¼Œè¯·è€ƒè™‘阅读下é¢å‚考:
+:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+
+在此目录下,我们详细æ述了如何与Linux内存管ç†ä¸­çš„å„ç§æœºåˆ¶äº¤äº’。
+
+.. toctree::
+ :maxdepth: 1
+
+ damon/index
+ ksm
+
+Todolist:
+* concepts
+* cma_debugfs
+* hugetlbpage
+* idle_page_tracking
+* memory-hotplug
+* nommu-mmap
+* numa_memory_policy
+* numaperf
+* pagemap
+* soft-dirty
+* swap_numa
+* transhuge
+* userfaultfd
+* zswap
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
new file mode 100644
index 000000000000..4829156ef1ae
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
@@ -0,0 +1,148 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/ksm.rst
+
+:翻译:
+
+ å¾é‘« xu xin <xu.xin16@zte.com.cn>
+
+
+============
+内核åŒé¡µåˆå¹¶
+============
+
+
+概述
+====
+
+KSM是一ç§èƒ½èŠ‚çœå†…存的数æ®åŽ»é‡åŠŸèƒ½ï¼Œç”±CONFIG_KSM=yå¯ç”¨ï¼Œå¹¶åœ¨2.6.32版本时被添
+加到Linuxå†…æ ¸ã€‚è¯¦è§ ``mm/ksm.c`` 的实现,以åŠhttp://lwn.net/Articles/306704
+å’Œhttps://lwn.net/Articles/330589
+
+KSM最åˆç›®çš„是为了与KVM(å³è‘—å的内核共享内存)一起使用而开å‘的,通过共享虚拟机
+之间的公共数æ®ï¼Œå°†æ›´å¤šè™šæ‹Ÿæœºæ”¾å…¥ç‰©ç†å†…存。但它对于任何会生æˆå¤šä¸ªç›¸åŒæ•°æ®å®žä¾‹çš„
+应用程åºéƒ½æ˜¯å¾ˆæœ‰ç”¨çš„。
+
+KSM的守护进程ksmd会定期扫æ那些已注册的用户内存区域,查找内容相åŒçš„页é¢ï¼Œè¿™äº›
+页é¢å¯ä»¥è¢«å•ä¸ªå†™ä¿æŠ¤é¡µé¢æ›¿æ¢ï¼ˆå¦‚果进程以åŽæƒ³è¦æ›´æ–°å…¶å†…容,将自动å¤åˆ¶ï¼‰ã€‚使用:
+引用:`sysfs intraface <ksm_sysfs>` 接å£æ¥é…ç½®KSM守护程åºåœ¨å•ä¸ªè¿‡ç¨‹ä¸­æ‰€æ‰«æ的页
+数以åŠä¸¤ä¸ªè¿‡ç¨‹ä¹‹é—´çš„间隔时间。
+
+KSMåªåˆå¹¶åŒ¿å(ç§æœ‰ï¼‰é¡µé¢ï¼Œä»Žä¸åˆå¹¶é¡µç¼“存(文件)页é¢ã€‚KSMçš„åˆå¹¶é¡µé¢æœ€åˆåªèƒ½è¢«
+é”定在内核内存中,但现在å¯ä»¥å°±åƒå…¶ä»–用户页é¢ä¸€æ ·è¢«æ¢å‡ºï¼ˆä½†å½“它们被交æ¢å›žæ¥æ—¶å…±
+享会被破å: ksmdå¿…é¡»é‡æ–°å‘现它们的身份并å†æ¬¡åˆå¹¶ï¼‰ã€‚
+
+以madvise控制KSM
+================
+
+KSM仅在特定的地å€ç©ºé—´åŒºåŸŸæ—¶è¿è¡Œï¼Œå³åº”用程åºé€šè¿‡ä½¿ç”¨å¦‚下所示的madvise(2)系统调
+用æ¥è¯·æ±‚æŸå—地å€æˆä¸ºå¯èƒ½çš„åˆå¹¶å€™é€‰è€…的地å€ç©ºé—´::
+
+ int madvise(addr, length, MADV_MERGEABLE)
+
+应用程åºå½“然也å¯ä»¥é€šè¿‡è°ƒç”¨::
+
+ int madvise(addr, length, MADV_UNMERGEABLE)
+
+æ¥å–消该请求,并æ¢å¤ä¸ºéžå…±äº«é¡µé¢ï¼šæ­¤æ—¶KSM将去除åˆå¹¶åœ¨è¯¥èŒƒå›´å†…的任何åˆå¹¶é¡µã€‚注æ„:
+这个去除åˆå¹¶çš„调用å¯èƒ½çªç„¶éœ€è¦çš„内存é‡è¶…过实际å¯ç”¨çš„内存é‡-那么å¯èƒ½ä¼šå‡ºçŽ°EAGAIN
+失败,但更å¯èƒ½ä¼šå”¤é†’OOM killer。
+
+如果KSM未被é…置到正在è¿è¡Œçš„内核中,则madvise MADV_MERGEABLE å’Œ MADV_UNMERGEABLE
+的调用åªä¼šä»¥EINVAL 失败。如果正在è¿è¡Œçš„内核是用CONFIG_KSM=yæ–¹å¼æž„建的,那么这些
+调用通常会æˆåŠŸï¼šå³ä½¿KSM守护程åºå½“å‰æ²¡æœ‰è¿è¡Œï¼ŒMADV_MERGEABLE ä»ç„¶ä¼šåœ¨KSM守护程åº
+å¯åŠ¨æ—¶æ³¨å†ŒèŒƒå›´ï¼Œå³ä½¿è¯¥èŒƒå›´ä¸èƒ½åŒ…å«KSM实际å¯ä»¥åˆå¹¶çš„任何页é¢ï¼Œå³ä½¿MADV_UNMERGEABLE
+应用于从未标记为MADV_MERGEABLE的范围。
+
+如果一å—内存区域必须被拆分为至少一个新的MADV_MERGEABLE区域或MADV_UNMERGEABLE区域,
+当该进程将超过 ``vm.max_map_count`` 的设定,则madviseå¯èƒ½è¿”回ENOMEM。(请å‚阅文档
+Documentation/admin-guide/sysctl/vm.rst)。
+
+与其他madvise调用一样,它们在用户地å€ç©ºé—´çš„映射区域上使用:如果指定的范围包å«æœª
+映射的间隙(尽管在中间的映射区域工作),它们将报告ENOMEM,如果没有足够的内存用于
+内部结构,则å¯èƒ½ä¼šå› EAGAIN而失败。
+
+KSM守护进程sysfs接å£
+====================
+
+KSM守护进程å¯ä»¥ç”±``/sys/kernel/mm/ksm/`` 中的sysfs文件控制,所有人都å¯ä»¥è¯»å–,但
+åªèƒ½ç”±root用户写入。å„接å£è§£é‡Šå¦‚下:
+
+
+pages_to_scan
+ ksmd进程进入ç¡çœ å‰è¦æ‰«æ的页数。
+ 例如, ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``
+
+ 默认值:100(该值被选择用于演示目的)
+
+sleep_millisecs
+ ksmd在下次扫æå‰åº”休眠多少毫秒
+ 例如, ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs``
+
+ 默认值:20(该值被选择用于演示目的)
+
+merge_across_nodes
+ 指定是å¦å¯ä»¥åˆå¹¶æ¥è‡ªä¸åŒNUMA节点的页é¢ã€‚当设置为0时,ksmä»…åˆå¹¶åœ¨ç‰©ç†ä¸Šä½
+ 于åŒä¸€NUMA节点的内存区域中的页é¢ã€‚è¿™é™ä½Žäº†è®¿é—®å…±äº«é¡µé¢çš„延迟。在有明显的
+ NUMAè·ç¦»ä¸Šï¼Œå…·æœ‰æ›´å¤šèŠ‚点的系统å¯èƒ½å—益于设置该值为0时的更低延迟。而对于
+ 需è¦å¯¹å†…存使用é‡æœ€å°åŒ–的较å°ç³»ç»Ÿæ¥è¯´ï¼Œè®¾ç½®è¯¥å€¼ä¸º1(默认设置)则å¯èƒ½ä¼šå—
+ 益于更大共享页é¢ã€‚在决定使用哪ç§è®¾ç½®ä¹‹å‰ï¼Œæ‚¨å¯èƒ½å¸Œæœ›æ¯”较系统在æ¯ç§è®¾ç½®ä¸‹
+ 的性能。 ``merge_across_nodes`` 仅当系统中没有ksm共享页é¢æ—¶ï¼Œæ‰èƒ½è¢«æ›´æ”¹è®¾
+ 置:首先将接å£`run` 设置为2从而对页进行去åˆå¹¶ï¼Œç„¶åŽåœ¨ä¿®æ”¹
+ ``merge_across_nodes`` åŽå†å°†â€˜run’åˆè®¾ç½®ä¸º1,以根æ®æ–°è®¾ç½®æ¥é‡æ–°åˆå¹¶ã€‚
+
+ 默认值:1(如早期的å‘布版本一样åˆå¹¶è·¨ç«™ç‚¹ï¼‰
+
+run
+ * 设置为0å¯åœæ­¢ksmdè¿è¡Œï¼Œä½†ä¿ç•™åˆå¹¶é¡µé¢ï¼Œ
+ * 设置为1å¯è¿è¡Œksmd,例如, ``echo 1 > /sys/kernel/mm/ksm/run`` ,
+ * 设置为2å¯åœæ­¢ksmdè¿è¡Œï¼Œå¹¶ä¸”对所有目å‰å·²åˆå¹¶çš„页进行去åˆå¹¶ï¼Œä½†ä¿ç•™å¯åˆå¹¶
+ 区域以供下次è¿è¡Œã€‚
+
+ 默认值:0(必须设置为1æ‰èƒ½æ¿€æ´»KSM,除éžç¦ç”¨äº†CONFIG_SYSFS)
+
+use_zero_pages
+ 指定是å¦åº”当特殊处ç†ç©ºé¡µï¼ˆå³é‚£äº›ä»…å«zero的已分é…页)。当该值设置为1时,
+ 空页与内核零页åˆå¹¶ï¼Œè€Œä¸æ˜¯åƒé€šå¸¸æƒ…况下那样空页自身彼此åˆå¹¶ã€‚è¿™å¯ä»¥æ ¹æ®
+ 工作负载的ä¸åŒï¼Œåœ¨å…·æœ‰ç€è‰²é›¶é¡µçš„架构上å¯ä»¥æ高性能。å¯ç”¨æ­¤è®¾ç½®æ—¶åº”å°å¿ƒï¼Œ
+ 因为它å¯èƒ½ä¼šé™ä½ŽæŸäº›å·¥ä½œè´Ÿè½½çš„KSM性能,比如,当待åˆå¹¶çš„候选页é¢çš„校验和
+ 与空页é¢çš„校验和æ°å¥½åŒ¹é…的时候。此设置å¯éšæ—¶æ›´æ”¹ï¼Œä»…对那些更改åŽå†åˆå¹¶
+ 的页é¢æœ‰æ•ˆã€‚
+
+ 默认值:0(如åŒæ—©æœŸç‰ˆæœ¬çš„KSM正常表现)
+
+max_page_sharing
+ å•ä¸ªKSM页é¢å…许的最大共享站点数。这将强制执行é‡å¤æ•°æ®æ¶ˆé™¤é™åˆ¶ï¼Œä»¥é¿å…涉
+ åŠé历共享KSM页é¢çš„虚拟映射的虚拟内存æ“作的高延迟。最å°å€¼ä¸º2,因为新创
+ 建的KSM页é¢å°†è‡³å°‘有两个共享者。该值越高,KSMåˆå¹¶å†…存的速度越快,去é‡
+ å› å­ä¹Ÿè¶Šé«˜ï¼Œä½†æ˜¯å¯¹äºŽä»»ä½•ç»™å®šçš„KSM页é¢ï¼Œè™šæ‹Ÿæ˜ å°„的最å情况é历的速度也会
+ 越慢。å‡æ…¢äº†è¿™ç§é历速度就æ„味ç€åœ¨äº¤æ¢ã€åŽ‹ç¼©ã€NUMA平衡和页é¢è¿ç§»æœŸé—´ï¼Œ
+ æŸäº›è™šæ‹Ÿå†…å­˜æ“作将有更高的延迟,从而é™ä½Žè¿™äº›è™šæ‹Ÿå†…å­˜æ“作调用者的å“应能力。
+ 其他任务如果ä¸æ¶‰åŠæ‰§è¡Œè™šæ‹Ÿæ˜ å°„é历的VMæ“作,其任务调度延迟ä¸å—æ­¤å‚æ•°çš„å½±
+ å“,因为这些é历本身是调度å‹å¥½çš„。
+
+stable_node_chains_prune_millisecs
+ 指定KSM检查特定页é¢çš„元数æ®çš„频率(å³é‚£äº›è¾¾åˆ°è¿‡æ—¶ä¿¡æ¯æ•°æ®åŽ»é‡é™åˆ¶æ ‡å‡†çš„
+ 页é¢ï¼‰å•ä½æ˜¯æ¯«ç§’。较å°çš„毫秒值将以更低的延迟æ¥é‡Šæ”¾KSM元数æ®ï¼Œä½†å®ƒä»¬å°†ä½¿
+ ksmd在扫æ期间使用更多CPU。如果还没有一个KSM页é¢è¾¾åˆ° ``max_page_sharing``
+ 标准,那就没有什么用。
+
+KSM与MADV_MERGEABLE的工作有效性体现于 ``/sys/kernel/mm/ksm/`` 路径下的接å£ï¼š
+
+pages_shared
+ 表示多少共享页正在被使用
+pages_sharing
+ 表示还有多少站点正在共享这些共享页,å³èŠ‚çœäº†å¤šå°‘
+pages_unshared
+ 表示有多少页是唯一的,但被åå¤æ£€æŸ¥ä»¥è¿›è¡Œåˆå¹¶
+pages_volatile
+ 表示有多少页因å˜åŒ–太快而无法放在tree中
+full_scans
+ 表示所有å¯åˆå¹¶åŒºåŸŸå·²æ‰«æ多少次
+stable_node_chains
+ 达到 ``max_page_sharing`` é™åˆ¶çš„KSM页数
+stable_node_dups
+ é‡å¤çš„KSM页数
+
+比值 ``pages_sharing/pages_shared`` 的最大值å—é™åˆ¶äºŽ ``max_page_sharing``
+的设定。è¦æƒ³å¢žåŠ è¯¥æ¯”值,则相应地è¦å¢žåŠ  ``max_page_sharing`` 的值。
diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index d10191c45cf1..26d9913fc8b6 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -42,6 +42,7 @@
kref
assoc_array
xarray
+ rbtree
Todolist:
@@ -49,7 +50,6 @@ Todolist:
idr
circular-buffers
- rbtree
generic-radix-tree
packing
bus-virt-phys-mapping
diff --git a/Documentation/translations/zh_CN/core-api/rbtree.rst b/Documentation/translations/zh_CN/core-api/rbtree.rst
new file mode 100644
index 000000000000..a3e1555cb974
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/rbtree.rst
@@ -0,0 +1,391 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/rbtree.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=========================
+Linux中的红黑树(rbtree)
+=========================
+
+
+:日期: 2007年1月18日
+:作者: Rob Landley <rob@landley.net>
+
+何为红黑树,它们有什么用?
+--------------------------
+
+红黑树是一ç§è‡ªå¹³è¡¡äºŒå‰æœç´¢æ ‘,被用æ¥å­˜å‚¨å¯æŽ’åºçš„é”®/值数æ®å¯¹ã€‚这与基数树(被用æ¥é«˜æ•ˆ
+存储稀ç–数组,因此使用长整型下标æ¥æ’å…¥/访问/删除结点)和哈希表(没有ä¿æŒæŽ’åºå› è€Œæ— æ³•
+容易地按åºé历,åŒæ—¶å¿…须调节其大å°å’Œå“ˆå¸Œå‡½æ•°ï¼Œç„¶è€Œçº¢é»‘æ ‘å¯ä»¥ä¼˜é›…地伸缩以便存储任æ„
+æ•°é‡çš„键)ä¸åŒã€‚
+
+红黑树和AVL树类似,但在æ’入和删除时æ供了更快的实时有界的最å情况性能(分别最多两次
+旋转和三次旋转,æ¥å¹³è¡¡æ ‘),查询时间轻微å˜æ…¢ï¼ˆä½†æ—¶é—´å¤æ‚度ä»ç„¶æ˜¯O(log n))。
+
+引用Linuxæ¯å‘¨æ–°é—»ï¼ˆLinux Weekly News):
+
+ 内核中有多处红黑树的使用案例。最åŽæœŸé™è°ƒåº¦å™¨å’Œå®Œå…¨å…¬å¹³æŽ’队(CFQ)I/O调度器利用
+ 红黑树跟踪请求;数æ®åŒ…CD/DVD驱动程åºä¹Ÿæ˜¯å¦‚此。高精度时钟代ç ä½¿ç”¨ä¸€é¢—红黑树组织
+ 未完æˆçš„定时器请求。ext3文件系统用红黑树跟踪目录项。虚拟内存区域(VMAs)ã€epoll
+ 文件æ述符ã€å¯†ç å­¦å¯†é’¥å’Œåœ¨â€œåˆ†å±‚令牌桶â€è°ƒåº¦å™¨ä¸­çš„网络数æ®åŒ…都由红黑树跟踪。
+
+本文档涵盖了对Linux红黑树实现的使用方法。更多关于红黑树的性质和实现的信æ¯ï¼Œå‚è§ï¼š
+
+ Linuxæ¯å‘¨æ–°é—»å…³äºŽçº¢é»‘树的文章
+ https://lwn.net/Articles/184495/
+
+ 维基百科红黑树è¯æ¡
+ https://en.wikipedia.org/wiki/Red-black_tree
+
+红黑树的Linux实现
+-----------------
+
+Linux的红黑树实现在文件“lib/rbtree.câ€ä¸­ã€‚è¦ä½¿ç”¨å®ƒï¼Œéœ€è¦â€œ#include <linux/rbtree.h>â€ã€‚
+
+Linux的红黑树实现对速度进行了优化,因此比传统的实现少一个间接层(有更好的缓存局部性)。
+æ¯ä¸ªrb_node结构体的实例嵌入在它管ç†çš„æ•°æ®ç»“构中,因此ä¸éœ€è¦é æŒ‡é’ˆæ¥åˆ†ç¦»rb_node和它
+管ç†çš„æ•°æ®ç»“构。用户应该编写他们自己的树æœç´¢å’Œæ’入函数,æ¥è°ƒç”¨å·²æ供的红黑树函数,
+而ä¸æ˜¯ä½¿ç”¨ä¸€ä¸ªæ¯”较回调函数指针。加é”代ç ä¹Ÿç•™ç»™çº¢é»‘树的用户编写。
+
+创建一颗红黑树
+--------------
+
+红黑树中的数æ®ç»“点是包å«rb_node结构体æˆå‘˜çš„结构体::
+
+ struct mytype {
+ struct rb_node node;
+ char *keystring;
+ };
+
+当处ç†ä¸€ä¸ªæŒ‡å‘内嵌rb_node结构体的指针时,包ä½rb_node的结构体å¯ç”¨æ ‡å‡†çš„container_of()
+å®è®¿é—®ã€‚此外,个体æˆå‘˜å¯ç›´æŽ¥ç”¨rb_entry(node, type, member)访问。
+
+æ¯é¢—红黑树的根是一个rb_rootæ•°æ®ç»“构,它由以下方å¼åˆå§‹åŒ–为空:
+
+ struct rb_root mytree = RB_ROOT;
+
+在一颗红黑树中æœç´¢å€¼
+--------------------
+
+为你的树写一个æœç´¢å‡½æ•°æ˜¯ç›¸å½“简å•çš„:从树根开始,比较æ¯ä¸ªå€¼ï¼Œç„¶åŽæ ¹æ®éœ€è¦ç»§ç»­å‰å¾€å·¦è¾¹æˆ–
+å³è¾¹çš„分支。
+
+示例::
+
+ struct mytype *my_search(struct rb_root *root, char *string)
+ {
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct mytype *data = container_of(node, struct mytype, node);
+ int result;
+
+ result = strcmp(string, data->keystring);
+
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+ }
+
+在一颗红黑树中æ’入数æ®
+----------------------
+
+在树中æ’入数æ®çš„步骤包括:首先æœç´¢æ’入新结点的ä½ç½®ï¼Œç„¶åŽæ’入结点并对树å†å¹³è¡¡
+("recoloring")。
+
+æ’入的æœç´¢å’Œä¸Šæ–‡çš„æœç´¢ä¸åŒï¼Œå®ƒè¦æ‰¾åˆ°å«æŽ¥æ–°ç»“点的ä½ç½®ã€‚新结点也需è¦ä¸€ä¸ªæŒ‡å‘它的父节点
+的链接,以达到å†å¹³è¡¡çš„目的。
+
+示例::
+
+ int my_insert(struct rb_root *root, struct mytype *data)
+ {
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct mytype *this = container_of(*new, struct mytype, node);
+ int result = strcmp(data->keystring, this->keystring);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return FALSE;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ return TRUE;
+ }
+
+在一颗红黑树中删除或替æ¢å·²ç»å­˜åœ¨çš„æ•°æ®
+--------------------------------------
+
+è‹¥è¦ä»Žæ ‘中删除一个已ç»å­˜åœ¨çš„结点,调用::
+
+ void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+示例::
+
+ struct mytype *data = mysearch(&mytree, "walrus");
+
+ if (data) {
+ rb_erase(&data->node, &mytree);
+ myfree(data);
+ }
+
+è‹¥è¦ç”¨ä¸€ä¸ªæ–°ç»“点替æ¢æ ‘中一个已ç»å­˜åœ¨çš„键值相åŒçš„结点,调用::
+
+ void rb_replace_node(struct rb_node *old, struct rb_node *new,
+ struct rb_root *tree);
+
+通过这ç§æ–¹å¼æ›¿æ¢ç»“点ä¸ä¼šå¯¹æ ‘åšé‡æŽ’åºï¼šå¦‚果新结点的键值和旧结点ä¸åŒï¼Œçº¢é»‘æ ‘å¯èƒ½è¢«
+ç ´å。
+
+(按排åºçš„顺åºï¼‰é历存储在红黑树中的元素
+----------------------------------------
+
+我们æ供了四个函数,用于以排åºçš„æ–¹å¼é历一颗红黑树的内容。这些函数å¯ä»¥åœ¨ä»»æ„红黑树
+上工作,并且ä¸éœ€è¦è¢«ä¿®æ”¹æˆ–包装(除éžåŠ é”的目的)::
+
+ struct rb_node *rb_first(struct rb_root *tree);
+ struct rb_node *rb_last(struct rb_root *tree);
+ struct rb_node *rb_next(struct rb_node *node);
+ struct rb_node *rb_prev(struct rb_node *node);
+
+è¦å¼€å§‹è¿­ä»£ï¼Œéœ€è¦ä½¿ç”¨ä¸€ä¸ªæŒ‡å‘树根的指针调用rb_first()或rb_last(),它将返回一个指å‘
+树中第一个或最åŽä¸€ä¸ªå…ƒç´ æ‰€åŒ…å«çš„节点结构的指针。è¦ç»§ç»­çš„è¯ï¼Œå¯ä»¥åœ¨å½“å‰ç»“点上调用
+rb_next()或rb_prev()æ¥èŽ·å–下一个或上一个结点。当没有剩余的结点时,将返回NULL。
+
+迭代器函数返回一个指å‘被嵌入的rb_node结构体的指针,由此,包ä½rb_node的结构体å¯ç”¨
+标准的container_of()å®è®¿é—®ã€‚此外,个体æˆå‘˜å¯ç›´æŽ¥ç”¨rb_entry(node, type, member)
+访问。
+
+示例::
+
+ struct rb_node *node;
+ for (node = rb_first(&mytree); node; node = rb_next(node))
+ printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+带缓存的红黑树
+--------------
+
+计算最左边(最å°çš„)结点是二å‰æœç´¢æ ‘的一个相当常è§çš„任务,例如用于é历,或用户根æ®
+他们自己的逻辑ä¾èµ–一个特定的顺åºã€‚为此,用户å¯ä»¥ä½¿ç”¨'struct rb_root_cached'æ¥ä¼˜åŒ–
+时间å¤æ‚度为O(logN)çš„rb_first()的调用,以简å•åœ°èŽ·å–指针,é¿å…了潜在的昂贵的树迭代。
+维护æ“作的é¢å¤–è¿è¡Œæ—¶é—´å¼€é”€å¯å¿½ç•¥ï¼Œä¸è¿‡å†…å­˜å ç”¨è¾ƒå¤§ã€‚
+
+å’Œrb_root结构体类似,带缓存的红黑树由以下方å¼åˆå§‹åŒ–为空::
+
+ struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+带缓存的红黑树åªæ˜¯ä¸€ä¸ªå¸¸è§„çš„rb_root,加上一个é¢å¤–的指针æ¥ç¼“存最左边的节点。这使得
+rb_root_cachedå¯ä»¥å­˜åœ¨äºŽrb_root存在的任何地方,并且åªéœ€å¢žåŠ å‡ ä¸ªæŽ¥å£æ¥æ”¯æŒå¸¦ç¼“存的
+æ ‘::
+
+ struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+ void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+ void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+æ“作和删除也有对应的带缓存的树的调用::
+
+ void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+ bool, struct rb_augment_callbacks *);
+ void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+ struct rb_augment_callbacks *);
+
+
+对增强型红黑树的支æŒ
+--------------------
+
+增强型红黑树是一ç§åœ¨æ¯ä¸ªç»“点里存储了“一些â€é™„加数æ®çš„红黑树,其中结点N的附加数æ®
+必须是以N为根的å­æ ‘中所有结点的内容的函数。它是建立在红黑树基础设施之上的å¯é€‰ç‰¹æ€§ã€‚
+想è¦ä½¿ç”¨è¿™ä¸ªç‰¹æ€§çš„红黑树用户,æ’入和删除结点时必须调用增强型接å£å¹¶æ供增强型回调函数。
+
+实现增强型红黑树æ“作的C文件必须包å«<linux/rbtree_augmented.h>而ä¸æ˜¯<linux/rbtree.h>。
+注æ„,linux/rbtree_augmented.h暴露了一些红黑树实现的细节而你ä¸åº”ä¾èµ–它们,请åšæŒ
+使用文档记录的API,并且ä¸è¦åœ¨å¤´æ–‡ä»¶ä¸­åŒ…å«<linux/rbtree_augmented.h>,以最å°åŒ–ä½ çš„
+用户æ„外地ä¾èµ–这些实现细节的å¯èƒ½ã€‚
+
+æ’入时,用户必须更新通往被æ’入节点的路径上的增强信æ¯ï¼Œç„¶åŽåƒå¾€å¸¸ä¸€æ ·è°ƒç”¨rb_link_node(),
+然åŽæ˜¯rb_augment_inserted()而ä¸æ˜¯å¹³æ—¶çš„rb_insert_color()调用。如果
+rb_augment_inserted()å†å¹³è¡¡äº†çº¢é»‘树,它将回调至一个用户æ供的函数æ¥æ›´æ–°å—å½±å“çš„
+å­æ ‘上的增强信æ¯ã€‚
+
+删除一个结点时,用户必须调用rb_erase_augmented()而ä¸æ˜¯rb_erase()。
+rb_erase_augmented()回调至一个用户æ供的函数æ¥æ›´æ–°å—å½±å“çš„å­æ ‘上的增强信æ¯ã€‚
+
+在两ç§æƒ…况下,回调都是通过rb_augment_callbacks结构体æ供的。必须定义3个回调:
+
+- 一个传播回调,它更新一个给定结点和它的祖先们的增强数æ®ï¼Œç›´åˆ°ä¸€ä¸ªç»™å®šçš„åœæ­¢ç‚¹
+ (如果是NULL,将更新一路更新到树根)。
+
+- 一个å¤åˆ¶å›žè°ƒï¼Œå®ƒå°†ä¸€é¢—给定å­æ ‘的增强数æ®å¤åˆ¶åˆ°ä¸€ä¸ªæ–°æŒ‡å®šçš„å­æ ‘树根。
+
+- 一个树旋转回调,它将一颗给定的å­æ ‘的增强值å¤åˆ¶åˆ°æ–°æŒ‡å®šçš„å­æ ‘树根上,并é‡æ–°è®¡ç®—
+ å…ˆå‰çš„å­æ ‘树根的增强值。
+
+rb_erase_augmented()编译åŽçš„代ç å¯èƒ½ä¼šå†…è”ä¼ æ’­ã€å¤åˆ¶å›žè°ƒï¼Œè¿™å°†å¯¼è‡´å‡½æ•°ä½“积更大,
+å› æ­¤æ¯ä¸ªå¢žå¼ºåž‹çº¢é»‘树的用户应该åªæœ‰ä¸€ä¸ªrb_erase_augmented()的调用点,以é™åˆ¶ç¼–译åŽ
+的代ç å¤§å°ã€‚
+
+
+使用示例
+^^^^^^^^
+
+区间树是增强型红黑树的一个例å­ã€‚å‚考Cormen,Leiserson,Rivestå’ŒStein写的
+《算法导论》。区间树的更多细节:
+
+ç»å…¸çš„红黑树åªæœ‰ä¸€ä¸ªé”®ï¼Œå®ƒä¸èƒ½ç›´æŽ¥ç”¨æ¥å­˜å‚¨åƒ[lo:hi]这样的区间范围,也ä¸èƒ½å¿«é€ŸæŸ¥æ‰¾
+与新的lo:hié‡å çš„部分,或者查找是å¦æœ‰ä¸Žæ–°çš„lo:hi完全匹é…的部分。
+
+然而,红黑树å¯ä»¥è¢«å¢žå¼ºï¼Œä»¥ä¸€ç§ç»“构化的方å¼æ¥å­˜å‚¨è¿™ç§åŒºé—´èŒƒå›´ï¼Œä»Žè€Œä½¿é«˜æ•ˆçš„查找和
+精确匹é…æˆä¸ºå¯èƒ½ã€‚
+
+这个存储在æ¯ä¸ªèŠ‚点中的“é¢å¤–ä¿¡æ¯â€æ˜¯å…¶æ‰€æœ‰åŽä»£ç»“点中的最大hi(max_hi)值。这个信æ¯
+å¯ä»¥ä¿æŒåœ¨æ¯ä¸ªç»“点上,åªéœ€æŸ¥çœ‹ä¸€ä¸‹è¯¥ç»“点和它的直系å­ç»“点们。这将被用于时间å¤æ‚度
+为O(log n)的最低匹é…查找(所有å¯èƒ½çš„匹é…中最低的起始地å€ï¼‰ï¼Œå°±åƒè¿™æ ·::
+
+ struct interval_tree_node *
+ interval_tree_first_match(struct rb_root *root,
+ unsigned long start, unsigned long last)
+ {
+ struct interval_tree_node *node;
+
+ if (!root->rb_node)
+ return NULL;
+ node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+ while (true) {
+ if (node->rb.rb_left) {
+ struct interval_tree_node *left =
+ rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb);
+ if (left->__subtree_last >= start) {
+ /*
+ * Some nodes in left subtree satisfy Cond2.
+ * Iterate to find the leftmost such node N.
+ * If it also satisfies Cond1, that's the match
+ * we are looking for. Otherwise, there is no
+ * matching interval as nodes to the right of N
+ * can't satisfy Cond1 either.
+ */
+ node = left;
+ continue;
+ }
+ }
+ if (node->start <= last) { /* Cond1 */
+ if (node->last >= start) /* Cond2 */
+ return node; /* node is leftmost match */
+ if (node->rb.rb_right) {
+ node = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb);
+ if (node->__subtree_last >= start)
+ continue;
+ }
+ }
+ return NULL; /* No match */
+ }
+ }
+
+æ’å…¥/删除是通过以下增强型回调æ¥å®šä¹‰çš„::
+
+ static inline unsigned long
+ compute_subtree_last(struct interval_tree_node *node)
+ {
+ unsigned long max = node->last, subtree_last;
+ if (node->rb.rb_left) {
+ subtree_last = rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ if (node->rb.rb_right) {
+ subtree_last = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ return max;
+ }
+
+ static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+ {
+ while (rb != stop) {
+ struct interval_tree_node *node =
+ rb_entry(rb, struct interval_tree_node, rb);
+ unsigned long subtree_last = compute_subtree_last(node);
+ if (node->__subtree_last == subtree_last)
+ break;
+ node->__subtree_last = subtree_last;
+ rb = rb_parent(&node->rb);
+ }
+ }
+
+ static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+ {
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+ }
+
+ static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+ {
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+ old->__subtree_last = compute_subtree_last(old);
+ }
+
+ static const struct rb_augment_callbacks augment_callbacks = {
+ augment_propagate, augment_copy, augment_rotate
+ };
+
+ void interval_tree_insert(struct interval_tree_node *node,
+ struct rb_root *root)
+ {
+ struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+ unsigned long start = node->start, last = node->last;
+ struct interval_tree_node *parent;
+
+ while (*link) {
+ rb_parent = *link;
+ parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+ if (parent->__subtree_last < last)
+ parent->__subtree_last = last;
+ if (start < parent->start)
+ link = &parent->rb.rb_left;
+ else
+ link = &parent->rb.rb_right;
+ }
+
+ node->__subtree_last = last;
+ rb_link_node(&node->rb, rb_parent, link);
+ rb_insert_augmented(&node->rb, root, &augment_callbacks);
+ }
+
+ void interval_tree_remove(struct interval_tree_node *node,
+ struct rb_root *root)
+ {
+ rb_erase_augmented(&node->rb, root, &augment_callbacks);
+ }
diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
index 87a36044f828..2ca92042767b 100644
--- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
@@ -84,6 +84,8 @@ CPUfreq核心层注册一个cpufreq_driver结构体。
.resume - 一个指å‘per-policyæ¢å¤å‡½æ•°çš„指针,该函数在关中断且在调节器å†ä¸€æ¬¡å¯åŠ¨å‰è¢«
调用。
+ .ready - 一个指å‘per-policy准备函数的指针,该函数在策略完全åˆå§‹åŒ–之åŽè¢«è°ƒç”¨ã€‚
+
.attr - 一个指å‘NULL结尾的"struct freq_attr"列表的指针,该列表å…许导出值到
sysfs。
diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst
new file mode 100644
index 000000000000..23d0b6fccd58
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/index.rst
@@ -0,0 +1,50 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/index.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=============================
+Open Firmware 和 Devicetree
+=============================
+
+该文档是整个设备树文档的总目录,标题中多是业内默认的术语,åˆè§å°±ç¿»è¯‘æˆä¸­æ–‡ï¼Œ
+晦涩难懂,因此尽é‡ä¿ç•™ï¼ŒåŽé¢ç¿»è¯‘å…¶å­æ–‡æ¡£æ—¶ï¼Œå¯èƒ½ä¼šæ ¹æ®è¯­å¢ƒï¼Œçµæ´»åœ°ç¿»è¯‘为中文。
+
+内核Devicetree的使用
+=======================
+.. toctree::
+ :maxdepth: 1
+
+ usage-model
+ of_unittest
+
+Todolist:
+
+* kernel-api
+
+Devicetree Overlays
+===================
+.. toctree::
+ :maxdepth: 1
+
+Todolist:
+
+* changesets
+* dynamic-resolution-notes
+* overlay-notes
+
+Devicetree Bindings
+===================
+.. toctree::
+ :maxdepth: 1
+
+Todolist:
+
+* bindings/index
diff --git a/Documentation/translations/zh_CN/devicetree/of_unittest.rst b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
new file mode 100644
index 000000000000..abd94e771ef8
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/of_unittest.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================================
+Open Firmware Devicetree å•å…ƒæµ‹è¯•
+=================================
+
+作者: Gaurav Minocha <gaurav.minocha.os@gmail.com>
+
+1. 概述
+=======
+
+本文档解释了执行 OF å•å…ƒæµ‹è¯•æ‰€éœ€çš„测试数æ®æ˜¯å¦‚何动æ€åœ°é™„加到实时树上的,与机器的架构无关。
+
+建议在继续读下去之å‰ï¼Œå…ˆé˜…读以下文件。
+
+(1) Documentation/devicetree/usage-model.rst
+(2) http://www.devicetree.org/Device_Tree_Usage
+
+OF Selftest被设计用æ¥æµ‹è¯•æ供给设备驱动开å‘者的接å£ï¼ˆinclude/linux/of.h),以从未æ‰å¹³
+化的设备树数æ®ç»“构中获å–设备信æ¯ç­‰ã€‚这个接å£è¢«å¤§å¤šæ•°è®¾å¤‡é©±åŠ¨åœ¨å„ç§ä½¿ç”¨æƒ…况下使用。
+
+
+2. 测试数æ®
+===========
+
+设备树æºæ–‡ä»¶ï¼ˆdrivers/of/unittest-data/testcases.dts)包å«æ‰§è¡Œdrivers/of/unittest.c
+中自动化å•å…ƒæµ‹è¯•æ‰€éœ€çš„测试数æ®ã€‚ç›®å‰ï¼Œä»¥ä¸‹è®¾å¤‡æ ‘æºåŒ…å«æ–‡ä»¶ï¼ˆ.dtsi)被包å«åœ¨testcases.dt中::
+
+ drivers/of/unittest-data/tests-interrupts.dtsi
+ drivers/of/unittest-data/tests-platform.dtsi
+ drivers/of/unittest-data/tests-phandle.dtsi
+ drivers/of/unittest-data/tests-match.dtsi
+
+当内核在å¯ç”¨OF_SELFTEST的情况下被构建时,那么下é¢çš„make规则::
+
+ $(obj)/%.dtb: $(src)/%.dts FORCE
+ $(call if_changed_dep, dtc)
+
+用于将DTæºæ–‡ä»¶ï¼ˆtestcases.dts)编译æˆäºŒè¿›åˆ¶blob(testcases.dtb),也被称为æ‰å¹³åŒ–çš„DT。
+
+之åŽï¼Œä½¿ç”¨ä»¥ä¸‹è§„则将上述二进制blob包装æˆä¸€ä¸ªæ±‡ç¼–文件(testcases.dtb.S)::
+
+ $(obj)/%.dtb.S: $(obj)/%.dtb
+ $(call cmd, dt_S_dtb)
+
+汇编文件被编译æˆä¸€ä¸ªå¯¹è±¡æ–‡ä»¶ï¼ˆtestcases.dtb.o),并被链接到内核镜åƒä¸­ã€‚
+
+
+2.1. 添加测试数æ®
+-----------------
+
+未æ‰å¹³åŒ–的设备树结构体:
+
+未æ‰å¹³åŒ–的设备树由连接的设备节点组æˆï¼Œå…¶æ ‘状结构形å¼å¦‚下所述::
+
+ // following struct members are used to construct the tree
+ struct device_node {
+ ...
+ struct device_node *parent;
+ struct device_node *child;
+ struct device_node *sibling;
+ ...
+ };
+
+图1æ述了一个机器的未æ‰å¹³åŒ–设备树的通用结构,åªè€ƒè™‘了å­èŠ‚点和åŒçº§æŒ‡é’ˆã€‚存在å¦ä¸€ä¸ªæŒ‡é’ˆï¼Œ
+``*parent`` ,用于åå‘é历该树。因此,在一个特定的层次上,å­èŠ‚点和所有的兄弟å§å¦¹èŠ‚点将
+有一个指å‘å…±åŒèŠ‚点的父指针(例如,child1ã€sibling2ã€sibling3ã€sibling4的父指针指å‘
+根节点)::
+
+ root ('/')
+ |
+ child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | |
+ | | | null
+ | | |
+ | | child31 -> sibling32 -> null
+ | | | |
+ | | null null
+ | |
+ | child21 -> sibling22 -> sibling23 -> null
+ | | | |
+ | null null null
+ |
+ child11 -> sibling12 -> sibling13 -> sibling14 -> null
+ | | | |
+ | | | null
+ | | |
+ null null child131 -> null
+ |
+ null
+
+Figure 1: 未æ‰å¹³åŒ–的设备树的通用结构
+
+
+在执行OFå•å…ƒæµ‹è¯•ä¹‹å‰ï¼Œéœ€è¦å°†æµ‹è¯•æ•°æ®é™„加到机器的设备树上(如果存在)。因此,当调用
+selftest_data_add()时,首先会读å–通过以下内核符å·é“¾æŽ¥åˆ°å†…核镜åƒä¸­çš„æ‰å¹³åŒ–设备树
+æ•°æ®::
+
+ __dtb_testcases_begin - address marking the start of test data blob
+ __dtb_testcases_end - address marking the end of test data blob
+
+其次,它调用of_fdt_unflatten_tree()æ¥è§£é™¤æ‰å¹³åŒ–çš„blob。最åŽï¼Œå¦‚果机器的设备树
+(å³å®žæ—¶æ ‘)是存在的,那么它将未æ‰å¹³åŒ–的测试数æ®æ ‘附加到实时树上,å¦åˆ™å®ƒå°†è‡ªå·±ä½œä¸º
+实时设备树附加。
+
+attach_node_and_children()使用of_attach_node()将节点附加到实时树上,如下所
+述。为了解释这一点,图2中æ述的测试数æ®æ ‘被附加到图1中æ述的实时树上::
+
+ root ('/')
+ |
+ testcase-data
+ |
+ test-child0 -> test-sibling1 -> test-sibling2 -> test-sibling3 -> null
+ | | | |
+ test-child01 null null null
+
+
+Figure 2: 将测试数æ®æ ‘附在实时树上的例å­ã€‚
+
+æ ¹æ®ä¸Šé¢çš„方案,实时树已ç»å­˜åœ¨ï¼Œæ‰€ä»¥ä¸éœ€è¦é™„加根('/')节点。所有其他节点都是通过在
+æ¯ä¸ªèŠ‚点上调用of_attach_node()æ¥é™„加的。
+
+在函数of_attach_node()中,新的节点被附在实时树中给定的父节点的å­èŠ‚点上。但是,如
+果父节点已ç»æœ‰äº†ä¸€ä¸ªå­©å­ï¼Œé‚£ä¹ˆæ–°èŠ‚点就会å–代当å‰çš„å­©å­ï¼Œå¹¶å°†å…¶å˜æˆå…¶å…„弟å§å¦¹ã€‚因此,
+当测试案例的数æ®èŠ‚点被连接到上é¢çš„实时树(图1)时,最终的结构如图3所示::
+
+ root ('/')
+ |
+ testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | | |
+ (...) | | | null
+ | | child31 -> sibling32 -> null
+ | | | |
+ | | null null
+ | |
+ | child21 -> sibling22 -> sibling23 -> null
+ | | | |
+ | null null null
+ |
+ child11 -> sibling12 -> sibling13 -> sibling14 -> null
+ | | | |
+ null null | null
+ |
+ child131 -> null
+ |
+ null
+ -----------------------------------------------------------------------
+
+ root ('/')
+ |
+ testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | | |
+ | (...) (...) (...) null
+ |
+ test-sibling3 -> test-sibling2 -> test-sibling1 -> test-child0 -> null
+ | | | |
+ null null null test-child01
+
+
+Figure 3: 附加测试案例数æ®åŽçš„实时设备树结构。
+
+
+èªæ˜Žçš„读者会注æ„到,与先å‰çš„结构相比,test-child0节点æˆä¸ºæœ€åŽä¸€ä¸ªå…„弟å§å¦¹ï¼ˆå›¾2)。
+在连接了第一个test-child0节点之åŽï¼Œåˆè¿žæŽ¥äº†test-sibling1节点,该节点推动å­èŠ‚点
+(å³test-child0)æˆä¸ºå…„弟å§å¦¹ï¼Œå¹¶ä½¿è‡ªå·±æˆä¸ºå­èŠ‚点,如上所述。
+
+如果å‘现一个é‡å¤çš„节点(å³å¦‚果一个具有相åŒfull_name属性的节点已ç»å­˜åœ¨äºŽå®žæ—¶æ ‘中),
+那么该节点ä¸ä¼šè¢«é™„加,而是通过调用函数update_node_properties()将其属性更新到活
+树的节点中。
+
+
+2.2. 删除测试数æ®
+-----------------
+
+一旦测试用例执行完,selftest_data_remove被调用,以移除最åˆè¿žæŽ¥çš„设备节点(首先是
+å¶å­èŠ‚点被分离,然åŽå‘上移动父节点被移除,最åŽæ˜¯æ•´ä¸ªæ ‘)。selftest_data_remove()
+调用detach_node_and_children(),使用of_detach_node()将节点从实时设备树上分离。
+
+为了分离一个节点,of_detach_node()è¦ä¹ˆå°†ç»™å®šèŠ‚点的父节点的å­èŠ‚点指针更新为其åŒçº§èŠ‚
+点,è¦ä¹ˆæ ¹æ®æƒ…况将å‰ä¸€ä¸ªåŒçº§èŠ‚点附在给定节点的åŒçº§èŠ‚点上。就这样å§ã€‚ :)
diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst
new file mode 100644
index 000000000000..318a3c6a0114
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst
@@ -0,0 +1,330 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/usage-model.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+===================
+Linux 和 Devicetree
+===================
+
+Linux对设备树数æ®çš„使用模型
+
+:作者: Grant Likely <grant.likely@secretlab.ca>
+
+这篇文章æ述了Linux如何使用设备树。关于设备树数æ®æ ¼å¼çš„概述å¯ä»¥åœ¨
+devicetree.org\ [1]_ 的设备树使用页é¢ä¸Šæ‰¾åˆ°ã€‚
+
+.. [1] https://www.devicetree.org/specifications/
+
+"Open Firmware Device Tree",或简称为Devicetree(DT),是一ç§ç”¨äºŽæ述硬
+件的数æ®ç»“构和语言。更确切地说,它是一ç§æ“作系统å¯è¯»çš„硬件æ述,这样æ“作系统就ä¸
+需è¦å¯¹æœºå™¨çš„细节进行硬编ç ã€‚
+
+从结构上看,DT是一棵树,或者说是带有命å节点的无环图,节点å¯ä»¥æœ‰ä»»æ„æ•°é‡çš„命å
+属性æ¥å°è£…ä»»æ„çš„æ•°æ®ã€‚还存在一ç§æœºåˆ¶ï¼Œå¯ä»¥åœ¨è‡ªç„¶çš„树状结构之外创建从一个节点到
+å¦ä¸€ä¸ªèŠ‚点的任æ„链接。
+
+从概念上讲,一套通用的使用惯例,称为 "bindings"(åŽæ–‡è¯‘为绑定),被定义为数æ®
+应该如何出现在树中,以æ述典型的硬件特性,包括数æ®æ€»çº¿ã€ä¸­æ–­çº¿ã€GPIO连接和外围
+设备。
+
+å°½å¯èƒ½ä½¿ç”¨çŽ°æœ‰çš„绑定æ¥æ述硬件,以最大é™åº¦åœ°åˆ©ç”¨çŽ°æœ‰çš„支æŒä»£ç ï¼Œä½†ç”±äºŽå±žæ€§å’ŒèŠ‚
+点å称是简å•çš„文本字符串,通过定义新的节点和属性æ¥æ‰©å±•çŽ°æœ‰çš„绑定或创建新的绑定
+很容易。然而,è¦è­¦æƒ•çš„是,在创建一个新的绑定之å‰ï¼Œæœ€å¥½å…ˆå¯¹å·²ç»å­˜åœ¨çš„东西åšä¸€äº›
+功课。目å‰æœ‰ä¸¤ç§ä¸åŒçš„ã€ä¸å…¼å®¹çš„i2c总线的绑定,这是因为在创建新的绑定时没有事先
+调查i2c设备在现有系统中是如何被枚举的。
+
+1. 历å²
+-------
+DT最åˆæ˜¯ç”±Open Firmware创建的,作为将数æ®ä»ŽOpen Firmware传递给客户程åº
+(如传递给æ“作系统)的通信方法的一部分。æ“作系统使用设备树在è¿è¡Œæ—¶æŽ¢æµ‹ç¡¬ä»¶çš„æ‹“
+扑结构,从而在没有硬编ç ä¿¡æ¯çš„情况下支æŒå¤§å¤šæ•°å¯ç”¨çš„硬件(å‡è®¾æ‰€æœ‰è®¾å¤‡çš„驱动程
+åºéƒ½å¯ç”¨ï¼‰ã€‚
+
+由于Open Firmware通常在PowerPCå’ŒSPARCå¹³å°ä¸Šä½¿ç”¨ï¼Œé•¿æœŸä»¥æ¥ï¼Œå¯¹è¿™äº›æž¶æž„çš„
+Linux支æŒä¸€ç›´ä½¿ç”¨è®¾å¤‡æ ‘。
+
+2005年,当PowerPC Linux开始大规模清ç†å¹¶åˆå¹¶32ä½å’Œ64ä½æ”¯æŒæ—¶ï¼Œå†³å®šåœ¨æ‰€æœ‰
+Powerpcå¹³å°ä¸Šè¦æ±‚DT支æŒï¼Œæ— è®ºå®ƒä»¬æ˜¯å¦ä½¿ç”¨Open Firmware。为了åšåˆ°è¿™ä¸€ç‚¹ï¼Œ
+我们创建了一个å«åšæ‰å¹³åŒ–设备树(FDT)的DT表示法,它å¯ä»¥ä½œä¸ºä¸€ä¸ªäºŒè¿›åˆ¶çš„blob
+传递给内核,而ä¸éœ€è¦çœŸæ­£çš„Open Firmware实现。U-Bootã€kexec和其他引导程åº
+被修改,以支æŒä¼ é€’设备树二进制(dtb)和在引导时修改dtb。DT也被添加到PowerPC
+引导包装器(arch/powerpc/boot/\*)中,这样dtbå°±å¯ä»¥è¢«åŒ…裹在内核镜åƒä¸­ï¼Œä»¥
+支æŒå¼•å¯¼çŽ°æœ‰çš„éžDT察觉的固件。
+
+一段时间åŽï¼ŒFDT基础架构被普åŠåˆ°äº†æ‰€æœ‰çš„架构中。在写这篇文章的时候,6个主线架
+构(armã€microblazeã€mipsã€powerpcã€sparcå’Œx86)和1个éžä¸»çº¿æž¶æž„(ios)
+有æŸç§ç¨‹åº¦çš„DT支æŒã€‚
+
+1. æ•°æ®æ¨¡åž‹
+-----------
+如果你还没有读过设备树用法\ [1]_页,那么现在就去读å§ã€‚没关系,我等ç€....
+
+2.1 高层次视角
+--------------
+最é‡è¦çš„是è¦æ˜Žç™½ï¼ŒDTåªæ˜¯ä¸€ä¸ªæ述硬件的数æ®ç»“构。它没有什么神奇之处,也ä¸ä¼šç¥ž
+奇地让所有的硬件é…置问题消失。它所åšçš„是æ供一ç§è¯­è¨€ï¼Œå°†ç¡¬ä»¶é…置与Linux内核
+(或任何其他æ“作系统)中的æ¿å¡å’Œè®¾å¤‡é©±åŠ¨æ”¯æŒè§£è€¦ã€‚使用它å¯ä»¥ä½¿æ¿å¡å’Œè®¾å¤‡æ”¯æŒ
+å˜æˆæ•°æ®é©±åŠ¨ï¼›æ ¹æ®ä¼ é€’到内核的数æ®åšå‡ºè®¾ç½®å†³å®šï¼Œè€Œä¸æ˜¯æ ¹æ®æ¯å°æœºå™¨çš„硬编ç é€‰
+择。
+
+ç†æƒ³æƒ…况下,数æ®é©±åŠ¨çš„å¹³å°è®¾ç½®åº”该导致更少的代ç é‡å¤ï¼Œå¹¶ä½¿å…¶æ›´å®¹æ˜“用一个内核
+é•œåƒæ”¯æŒå„ç§ç¡¬ä»¶ã€‚
+
+Linux使用DTæ•°æ®æœ‰ä¸‰ä¸ªä¸»è¦ç›®çš„:
+
+1) å¹³å°è¯†åˆ«ã€‚
+2) è¿è¡Œæ—¶é…置,以åŠ
+3) 设备数é‡ã€‚
+
+2.2 å¹³å°è¯†åˆ«
+------------
+首先,内核将使用DT中的数æ®æ¥è¯†åˆ«ç‰¹å®šçš„机器。在一个ç†æƒ³çš„世界里,具体的平å°å¯¹
+内核æ¥è¯´å¹¶ä¸é‡è¦ï¼Œå› ä¸ºæ‰€æœ‰çš„å¹³å°ç»†èŠ‚都会被设备树以一致和å¯é çš„æ–¹å¼å®Œç¾Žæ述。
+但是,硬件并ä¸å®Œç¾Žï¼Œæ‰€ä»¥å†…核必须在早期å¯åŠ¨æ—¶è¯†åˆ«æœºå™¨ï¼Œä»¥ä¾¿æœ‰æœºä¼šè¿è¡Œç‰¹å®šäºŽæœº
+器的修å¤ç¨‹åºã€‚
+
+在大多数情况下,机器的身份是ä¸ç›¸å…³çš„,而内核将根æ®æœºå™¨çš„核心CPU或SoCæ¥é€‰æ‹©
+设置代ç ã€‚例如,在ARM上,arch/arm/kernel/setup.c中的setup_arch()将调
+用arch/arm/kernel/devtree.c中的setup_machine_fdt(),它通过
+machine_desc表æœç´¢å¹¶é€‰æ‹©ä¸Žè®¾å¤‡æ ‘æ•°æ®æœ€åŒ¹é…çš„machine_desc。它通过查看根
+设备树节点中的'compatible'属性,并将其与struct machine_desc中的
+dt_compat列表(如果你好奇,该列表定义在arch/arm/include/asm/mach/arch.h
+中)进行比较,从而确定最佳匹é…。
+
+“compatible†属性包å«ä¸€ä¸ªæŽ’åºçš„字符串列表,以机器的确切å称开始,åŽé¢æ˜¯
+一个å¯é€‰çš„与之兼容的æ¿å­åˆ—表,从最兼容到最ä¸å…¼å®¹æŽ’åºã€‚例如,TI BeagleBoard
+和它的åŽç»§è€…BeagleBoard xMæ¿çš„根兼容属性å¯èƒ½çœ‹èµ·æ¥åˆ†åˆ«ä¸º::
+
+ compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+ compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+其中 "ti,map3-beagleboard-xm "指定了确切的型å·ï¼Œå®ƒè¿˜å£°ç§°å®ƒä¸ŽOMAP 3450 SoC
+以åŠä¸€èˆ¬çš„OMP3系列SoC兼容。你会注æ„到,该列表从最具体的(确切的æ¿å­ï¼‰åˆ°æœ€
+ä¸å…·ä½“的(SoC系列)进行排åºã€‚
+
+èªæ˜Žçš„读者å¯èƒ½ä¼šæŒ‡å‡ºï¼ŒBeagle xM也å¯ä»¥å£°ç§°ä¸ŽåŽŸBeagleæ¿å…¼å®¹ã€‚然而,我们应
+该当心在æ¿çº§ä¸Šè¿™æ ·åšï¼Œå› ä¸ºé€šå¸¸æƒ…况下,å³ä½¿åœ¨åŒä¸€äº§å“系列中,æ¯å—æ¿éƒ½æœ‰å¾ˆé«˜
+çš„å˜åŒ–,而且当一å—æ¿å£°ç§°ä¸Žå¦ä¸€å—æ¿å…¼å®¹æ—¶ï¼Œå¾ˆéš¾ç¡®å®šåˆ°åº•æ˜¯ä»€ä¹ˆæ„æ€ã€‚对于高层
+æ¥è¯´ï¼Œæœ€å¥½æ˜¯è°¨æ…Žè¡Œäº‹ï¼Œä¸è¦å£°ç§°ä¸€å—æ¿å­ä¸Žå¦ä¸€å—æ¿å­å…¼å®¹ã€‚值得注æ„的例外是,
+当一å—æ¿å­æ˜¯å¦ä¸€å—æ¿å­çš„载体时,例如CPU模å—连接到一个载体æ¿ä¸Šã€‚
+
+关于兼容值还有一个注æ„事项。在兼容属性中使用的任何字符串都必须有文件说明它
+表示什么。在Documentation/devicetree/bindings中添加兼容字符串的文档。
+
+åŒæ ·åœ¨ARM上,对于æ¯ä¸ªmachine_desc,内核会查看是å¦æœ‰ä»»ä½•dt_compat列表æ¡
+目出现在兼容属性中。如果有,那么该机器_desc就是驱动该机器的候选者。在æœç´¢
+了整个machine_descs表之åŽï¼Œsetup_machine_fdt()æ ¹æ®æ¯ä¸ªmachine_desc
+在兼容属性中匹é…çš„æ¡ç›®ï¼Œè¿”回 “最兼容†的machine_desc。如果没有找到匹é…
+的machine_desc,那么它将返回NULL。
+
+这个方案背åŽçš„原因是观察到,在大多数情况下,如果它们都使用相åŒçš„SoC或相åŒ
+系列的SoC,一个机器_descå¯ä»¥æ”¯æŒå¤§é‡çš„电路æ¿ã€‚然而,ä¸å¯é¿å…地会有一些例
+外情况,å³ç‰¹å®šçš„æ¿å­éœ€è¦ç‰¹æ®Šçš„设置代ç ï¼Œè¿™åœ¨ä¸€èˆ¬æƒ…况下是没有用的。特殊情况
+å¯ä»¥é€šè¿‡åœ¨é€šç”¨è®¾ç½®ä»£ç ä¸­æ˜Žç¡®æ£€æŸ¥æœ‰é—®é¢˜çš„æ¿å­æ¥å¤„ç†ï¼Œä½†å¦‚果超过几个情况下,
+这样åšå¾ˆå¿«å°±ä¼šå˜å¾—很难看和/或无法维护。
+
+相å,兼容列表å…许通用机器_desc通过在dt_compat列表中指定“ä¸å¤ªå…¼å®¹â€çš„值
+æ¥æ供对广泛的通用æ¿çš„支æŒã€‚在上é¢çš„例å­ä¸­ï¼Œé€šç”¨æ¿æ”¯æŒå¯ä»¥å£°ç§°ä¸Žâ€œti,ompa3â€
+或“ti,ompa3450â€å…¼å®¹ã€‚如果在最åˆçš„beagleboard上å‘现了一个bug,需è¦åœ¨
+早期å¯åŠ¨æ—¶ä½¿ç”¨ç‰¹æ®Šçš„å˜é€šä»£ç ï¼Œé‚£ä¹ˆå¯ä»¥æ·»åŠ ä¸€ä¸ªæ–°çš„machine_desc,实现å˜é€šï¼Œ
+并且åªåœ¨â€œti,omap3-beagleboardâ€ä¸ŠåŒ¹é…。
+
+PowerPC使用了一个ç¨å¾®ä¸åŒçš„方案,它从æ¯ä¸ªæœºå™¨_desc中调用.probe()é’©å­ï¼Œ
+并使用第一个返回TRUEçš„é’©å­ã€‚然而,这ç§æ–¹æ³•æ²¡æœ‰è€ƒè™‘到兼容列表的优先级,对于
+新的架构支æŒå¯èƒ½åº”该é¿å…。
+
+2.3 è¿è¡Œæ—¶é…ç½®
+--------------
+在大多数情况下,DT是将数æ®ä»Žå›ºä»¶ä¼ é€’给内核的唯一方法,所以也被用æ¥ä¼ é€’è¿è¡Œ
+时和é…置数æ®ï¼Œå¦‚内核å‚数字符串和initrdé•œåƒçš„ä½ç½®ã€‚
+
+这些数æ®å¤§éƒ¨åˆ†éƒ½åŒ…å«åœ¨/chosen节点中,当å¯åŠ¨Linux时,它看起æ¥å°±åƒè¿™æ ·::
+
+ chosen {
+ bootargs = "console=ttyS0,115200 loglevel=8";
+ initrd-start = <0xc8000000>;
+ initrd-end = <0xc8200000>;
+ };
+
+bootargs属性包å«å†…æ ¸å‚数,initrd-\*属性定义initrd blob的地å€å’Œå¤§å°ã€‚注
+æ„initrd-end是initrd映åƒåŽçš„第一个地å€ï¼Œæ‰€ä»¥è¿™ä¸Žç»“构体资æºçš„通常语义ä¸ä¸€
+致。选择的节点也å¯ä»¥é€‰æ‹©åŒ…å«ä»»æ„æ•°é‡çš„é¢å¤–属性,用于平å°ç‰¹å®šçš„é…置数æ®ã€‚
+
+在早期å¯åŠ¨è¿‡ç¨‹ä¸­ï¼Œæž¶æž„设置代ç é€šè¿‡ä¸åŒçš„辅助回调函数多次调用
+of_scan_flat_dt()æ¥è§£æžè®¾å¤‡æ ‘æ•°æ®ï¼Œç„¶åŽè¿›è¡Œåˆ†é¡µè®¾ç½®ã€‚of_scan_flat_dt()
+代ç æ‰«æ设备树,并使用辅助函数æ¥æå–早期å¯åŠ¨æœŸé—´æ‰€éœ€çš„ä¿¡æ¯ã€‚通常情况下,
+early_init_dt_scan_chosen()辅助函数用于解æžæ‰€é€‰èŠ‚点,包括内核å‚数,
+early_init_dt_scan_root()用于åˆå§‹åŒ–DT地å€ç©ºé—´æ¨¡åž‹ï¼Œearly_init_dt_scan_memory()
+用于确定å¯ç”¨RAM的大å°å’Œä½ç½®ã€‚
+
+在ARM上,函数setup_machine_fdt()负责在选择支æŒæ¿å­çš„正确machine_desc
+åŽï¼Œå¯¹è®¾å¤‡æ ‘进行早期扫æ。
+
+2.4 设备数é‡
+------------
+在电路æ¿è¢«è¯†åˆ«åŽï¼Œåœ¨æ—©æœŸé…置数æ®è¢«è§£æžåŽï¼Œå†…æ ¸åˆå§‹åŒ–å¯ä»¥ä»¥æ­£å¸¸æ–¹å¼è¿›è¡Œã€‚在
+这个过程中的æŸä¸ªæ—¶åˆ»ï¼Œunflatten_device_tree()被调用以将数æ®è½¬æ¢æˆæ›´æœ‰
+效的è¿è¡Œæ—¶è¡¨ç¤ºã€‚这也是调用机器特定设置钩å­çš„时候,比如ARM上的machine_desc
+.init_early()ã€.init_irq()å’Œ.init_machine()é’©å­ã€‚本节的其余部分使用
+了ARM实现的例å­ï¼Œä½†æ‰€æœ‰æž¶æž„在使用DT时都会åšå‡ ä¹Žç›¸åŒçš„事情。
+
+从å称上å¯ä»¥çŒœåˆ°ï¼Œ.init_early()用于在å¯åŠ¨è¿‡ç¨‹æ—©æœŸéœ€è¦æ‰§è¡Œçš„任何机器特定设
+置,而.init_irq()则用于设置中断处ç†ã€‚使用DT并ä¸ä¼šå®žè´¨æ€§åœ°æ”¹å˜è¿™ä¸¤ä¸ªå‡½æ•°çš„
+行为。如果æ供了DT,那么.init_early()å’Œ.init_irq()都能调用任何一个DT查
+询函数(of_* in include/linux/of*.h),以获得关于平å°çš„é¢å¤–æ•°æ®ã€‚
+
+DT上下文中最有趣的钩å­æ˜¯.init_machine(),它主è¦è´Ÿè´£å°†å¹³å°çš„æ•°æ®å¡«å……到
+Linux设备模型中。历å²ä¸Šï¼Œè¿™åœ¨åµŒå…¥å¼å¹³å°ä¸Šæ˜¯é€šè¿‡åœ¨æ¿å¡support .c文件中定
+义一组é™æ€æ—¶é’Ÿç»“æž„ã€platform_devices和其他数æ®ï¼Œå¹¶åœ¨.init_machine()中
+大é‡æ³¨å†Œæ¥å®žçŽ°çš„。当使用DT时,就ä¸ç”¨ä¸ºæ¯ä¸ªå¹³å°çš„é™æ€è®¾å¤‡è¿›è¡Œç¡¬ç¼–ç ï¼Œå¯ä»¥é€šè¿‡
+解æžDT获得设备列表,并动æ€åˆ†é…设备结构体。
+
+最简å•çš„情况是,.init_machine()åªè´Ÿè´£æ³¨å†Œä¸€ä¸ªplatform_devices。
+platform_device是Linux使用的一个概念,用于ä¸èƒ½è¢«ç¡¬ä»¶æ£€æµ‹åˆ°çš„内存或I/O映
+射的设备,以åŠâ€œå¤åˆâ€æˆ– “虚拟â€è®¾å¤‡ï¼ˆåŽé¢ä¼šè¯¦ç»†ä»‹ç»ï¼‰ã€‚虽然DT没有“平å°è®¾å¤‡â€çš„
+术语,但平å°è®¾å¤‡å¤§è‡´å¯¹åº”于树根的设备节点和简å•å†…存映射总线节点的å­èŠ‚点。
+
+现在是举例说明的好时机。下é¢æ˜¯NVIDIA Tegraæ¿çš„设备树的一部分::
+
+ /{
+ compatible = "nvidia,harmony", "nvidia,tegra20";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&intc>;
+
+ chosen { };
+ aliases { };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x40000000>;
+ };
+
+ soc {
+ compatible = "nvidia,tegra20-soc", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ intc: interrupt-controller@50041000 {
+ compatible = "nvidia,tegra20-gic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+ };
+
+ serial@70006300 {
+ compatible = "nvidia,tegra20-uart";
+ reg = <0x70006300 0x100>;
+ interrupts = <122>;
+ };
+
+ i2s1: i2s@70002800 {
+ compatible = "nvidia,tegra20-i2s";
+ reg = <0x70002800 0x100>;
+ interrupts = <77>;
+ codec = <&wm8903>;
+ };
+
+ i2c@7000c000 {
+ compatible = "nvidia,tegra20-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x7000c000 0x100>;
+ interrupts = <70>;
+
+ wm8903: codec@1a {
+ compatible = "wlf,wm8903";
+ reg = <0x1a>;
+ interrupts = <347>;
+ };
+ };
+ };
+
+ sound {
+ compatible = "nvidia,harmony-sound";
+ i2s-controller = <&i2s1>;
+ i2s-codec = <&wm8903>;
+ };
+ };
+
+在.init_machine()时,Tegraæ¿æ”¯æŒä»£ç å°†éœ€è¦æŸ¥çœ‹è¿™ä¸ªDT,并决定为哪些节点
+创建platform_devices。然而,看一下这个树,并ä¸èƒ½ç«‹å³çœ‹å‡ºæ¯ä¸ªèŠ‚点代表什么
+类型的设备,甚至ä¸èƒ½çœ‹å‡ºä¸€ä¸ªèŠ‚点是å¦ä»£è¡¨ä¸€ä¸ªè®¾å¤‡ã€‚/chosenã€/aliaseså’Œ
+/memory节点是信æ¯èŠ‚点,并ä¸æ述设备(尽管å¯ä»¥è¯´å†…å­˜å¯ä»¥è¢«è®¤ä¸ºæ˜¯ä¸€ä¸ªè®¾å¤‡ï¼‰ã€‚
+/soc节点的å­èŠ‚点是内存映射的设备,但是codec@1a是一个i2c设备,而sound节
+点代表的ä¸æ˜¯ä¸€ä¸ªè®¾å¤‡ï¼Œè€Œæ˜¯å…¶ä»–设备是如何连接在一起以创建音频å­ç³»ç»Ÿçš„。我知
+é“æ¯ä¸ªè®¾å¤‡æ˜¯ä»€ä¹ˆï¼Œå› ä¸ºæˆ‘熟悉电路æ¿çš„设计,但是内核怎么知é“æ¯ä¸ªèŠ‚点该怎么åšï¼Ÿ
+
+诀çªåœ¨äºŽï¼Œå†…核从树的根部开始,寻找具有“兼容â€å±žæ€§çš„节点。首先,一般认为任何
+具有“兼容â€å±žæ€§çš„节点都代表æŸç§è®¾å¤‡ï¼›å…¶æ¬¡ï¼Œå¯ä»¥è®¤ä¸ºæ ‘根的任何节点è¦ä¹ˆç›´æŽ¥è¿ž
+接到处ç†å™¨æ€»çº¿ä¸Šï¼Œè¦ä¹ˆæ˜¯æ— æ³•ç”¨å…¶ä»–æ–¹å¼æè¿°çš„æ‚项系统设备。对于这些节点中的
+æ¯ä¸€ä¸ªï¼ŒLinux都会分é…和注册一个platform_device,它åˆå¯èƒ½è¢«ç»‘定到一个
+platform_driver。
+
+为什么为这些节点使用platform_device是一个安全的å‡è®¾ï¼Ÿå—¯ï¼Œå°±Linux对设备
+的建模方å¼è€Œè¨€ï¼Œå‡ ä¹Žæ‰€æœ‰çš„总线类型都å‡å®šå…¶è®¾å¤‡æ˜¯æ€»çº¿æŽ§åˆ¶å™¨çš„å­©å­ã€‚例如,æ¯
+个i2c_client是i2c_master的一个å­èŠ‚点。æ¯ä¸ªspi_device都是SPI总线的一
+个å­èŠ‚点。类似的还有USBã€PCIã€MDIO等。åŒæ ·çš„层次结构也出现在DT中,I2C设
+备节点åªä½œä¸ºI2C总线节点的å­èŠ‚点出现。åŒç†ï¼ŒSPIã€MDIOã€USB等等。唯一ä¸éœ€
+è¦ç‰¹å®šç±»åž‹çš„父设备的设备是platform_devices(和amba_devices,但åŽé¢ä¼š
+详细介ç»ï¼‰ï¼Œå®ƒä»¬å°†æ„‰å¿«åœ°è¿è¡Œåœ¨Linux/sys/devices树的底部。因此,如果一个
+DT节点ä½äºŽæ ‘的根部,那么它真的å¯èƒ½æœ€å¥½æ³¨å†Œä¸ºplatform_device。
+
+Linuxæ¿æ”¯æŒä»£ç è°ƒç”¨of_platform_populate(NULL, NULL, NULL, NULL)æ¥
+å¯åŠ¨æ ‘根的设备å‘现。å‚数都是NULL,因为当从树的根部开始时,ä¸éœ€è¦æ供一个起
+始节点(第一个NULL),一个父结构设备(最åŽä¸€ä¸ªNULL),而且我们没有使用匹é…
+表(尚未)。对于åªéœ€è¦æ³¨å†Œè®¾å¤‡çš„æ¿å­ï¼Œé™¤äº†of_platform_populate()的调用,
+.init_machine()å¯ä»¥å®Œå…¨ä¸ºç©ºã€‚
+
+在Tegra的例å­ä¸­ï¼Œè¿™è¯´æ˜Žäº†/socå’Œ/sound节点,但是SoC节点的å­èŠ‚点呢?它们
+ä¸åº”该也被注册为平å°è®¾å¤‡å—?对于Linux DT支æŒï¼Œä¸€èˆ¬çš„行为是å­è®¾å¤‡åœ¨é©±åŠ¨
+.probe()时被父设备驱动注册。因此,一个i2c总线设备驱动程åºå°†ä¸ºæ¯ä¸ªå­èŠ‚点
+注册一个i2c_client,一个SPI总线驱动程åºå°†æ³¨å†Œå…¶spi_deviceå­èŠ‚点,其他
+总线类型也是如此。根æ®è¯¥æ¨¡åž‹ï¼Œå¯ä»¥ç¼–写一个与SoC节点绑定的驱动程åºï¼Œå¹¶ç®€å•
+地为其æ¯ä¸ªå­èŠ‚点注册platform_device。æ¿å¡æ”¯æŒä»£ç å°†åˆ†é…和注册一个SoC设
+备,一个(ç†è®ºä¸Šçš„)SoC设备驱动程åºå¯ä»¥ç»‘定到SoC设备,并在其.probe()é’©
+中为/soc/interruptcontrollerã€/soc/serialã€/soc/i2så’Œ/soc/i2c注
+册platform_devices。很简å•ï¼Œå¯¹å—?
+
+实际上,事实è¯æ˜Žï¼Œå°†ä¸€äº›platform_deviceçš„å­è®¾å¤‡æ³¨å†Œä¸ºæ›´å¤šçš„platform_device
+是一ç§å¸¸è§çš„模å¼ï¼Œè®¾å¤‡æ ‘支æŒä»£ç å映了这一点,并使上述例å­æ›´ç®€å•ã€‚
+of_platform_populate()的第二个å‚数是一个of_device_id表,任何与该表
+中的æ¡ç›®ç›¸åŒ¹é…的节点也将获得其å­èŠ‚点的注册。在Tegra的例å­ä¸­ï¼Œä»£ç å¯ä»¥æ˜¯
+这样的::
+
+ static void __init harmony_init_machine(void)
+ {
+ /* ... */
+ of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+ }
+
+“simple-busâ€åœ¨Devicetree规范中被定义为一个属性,æ„味ç€ä¸€ä¸ªç®€å•çš„内存映射
+的总线,所以of_platform_populate()代ç å¯ä»¥è¢«å†™æˆåªæ˜¯å‡è®¾ç®€å•æ€»çº¿å…¼å®¹çš„节
+点将总是被é历。然而,我们把它作为一个å‚数传入,以便电路æ¿æ”¯æŒä»£ç å¯ä»¥éšæ—¶è¦†
+盖默认行为。
+
+[需è¦æ·»åŠ å…³äºŽæ·»åŠ i2c/spi/etcå­è®¾å¤‡çš„讨论] 。
+
+附录A:AMBA设备
+---------------
+
+ARM Primecell是连接到ARM AMBA总线的æŸç§è®¾å¤‡ï¼Œå®ƒåŒ…括对硬件检测和电æºç®¡ç†
+的一些支æŒã€‚在Linux中,amba_deviceå’Œamba_bus_type结构体被用æ¥è¡¨ç¤º
+Primecell设备。然而,棘手的一点是,AMBA总线上的所有设备并éžéƒ½æ˜¯Primecell,
+而且对于Linuxæ¥è¯´ï¼Œå…¸åž‹çš„情况是amba_deviceå’Œplatform_device实例都是åŒ
+一总线段的åŒä¹‰è¯ã€‚
+
+当使用DT时,这给of_platform_populate()带æ¥äº†é—®é¢˜ï¼Œå› ä¸ºå®ƒå¿…须决定是å¦å°†
+æ¯ä¸ªèŠ‚点注册为platform_device或amba_device。ä¸å¹¸çš„是,这使设备创建模型
+å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,amba-primecellâ€
+兼容,那么of_platform_populate()将把它注册为amba_device而ä¸æ˜¯
+platform_device。
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 46e14ec9963d..88d8df957a78 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -5,7 +5,7 @@
\renewcommand\thesection*
\renewcommand\thesubsection*
\kerneldocCJKon
- \kerneldocBeginSC
+ \kerneldocBeginSC{
.. _linux_doc_zh:
@@ -56,10 +56,14 @@ TODOList:
下列文档æ述了内核需è¦çš„å¹³å°å›ºä»¶ç›¸å…³ä¿¡æ¯ã€‚
+.. toctree::
+ :maxdepth: 2
+
+ devicetree/index
+
TODOList:
* firmware-guide/index
-* devicetree/index
应用程åºå¼€å‘人员文档
--------------------
@@ -104,14 +108,17 @@ TODOList:
:maxdepth: 2
core-api/index
+ accounting/index
cpu-freq/index
iio/index
+ infiniband/index
+ power/index
+ virt/index
sound/index
filesystems/index
- virt/index
- infiniband/index
- accounting/index
scheduler/index
+ vm/index
+ peci/index
TODOList:
@@ -129,7 +136,6 @@ TODOList:
* netlabel/index
* networking/index
* pcmcia/index
-* power/index
* target/index
* timers/index
* spi/index
@@ -140,7 +146,6 @@ TODOList:
* gpu/index
* security/index
* crypto/index
-* vm/index
* bpf/index
* usb/index
* PCI/index
@@ -198,4 +203,4 @@ TODOList:
.. raw:: latex
- \kerneldocEndSC
+ }\kerneldocEndSC
diff --git a/Documentation/translations/zh_CN/peci/index.rst b/Documentation/translations/zh_CN/peci/index.rst
new file mode 100644
index 000000000000..4f6694c828fa
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/index.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================
+Linux PECI å­ç³»ç»Ÿ
+=================
+
+.. toctree::
+
+
+ peci
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/peci/peci.rst b/Documentation/translations/zh_CN/peci/peci.rst
new file mode 100644
index 000000000000..a3b4f99b994c
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/peci.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/peci.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+====
+概述
+====
+
+å¹³å°çŽ¯å¢ƒæŽ§åˆ¶æŽ¥å£ï¼ˆPECI)是英特尔处ç†å™¨å’Œç®¡ç†æŽ§åˆ¶å™¨ï¼ˆå¦‚底æ¿ç®¡ç†æŽ§åˆ¶å™¨ï¼ŒBMC)
+之间的一个通信接å£ã€‚PECIæ供的æœåŠ¡å…许管ç†æŽ§åˆ¶å™¨é€šè¿‡è®¿é—®å„ç§å¯„存器æ¥é…ç½®ã€ç›‘
+控和调试平å°ã€‚它定义了一个专门的命令å议,管ç†æŽ§åˆ¶å™¨ä½œä¸ºPECIçš„å‘起者,处ç†å™¨
+作为PECIçš„å“应者。PECIå¯ä»¥ç”¨äºŽåŸºäºŽå•å¤„ç†å™¨å’Œå¤šå¤„ç†å™¨çš„系统中。
+
+注æ„:英特尔PECI规范没有作为专门的文件å‘布,而是作为英特尔CPU的外部设计规范
+(EDS)的一部分。外部设计规范通常是ä¸å…¬å¼€çš„。
+
+PECI 线
+---------
+
+PECI线接å£ä½¿ç”¨å•çº¿è¿›è¡Œè‡ªé”和数æ®ä¼ è¾“。它ä¸éœ€è¦ä»»ä½•é¢å¤–的控制线--物ç†å±‚是一个
+自é”çš„å•çº¿æ€»çº¿ä¿¡å·ï¼Œæ¯ä¸€ä¸ªæ¯”特都从接近零ä¼çš„空闲状æ€å¼€å§‹é©±åŠ¨ã€ä¸Šå‡è¾¹ç¼˜ã€‚驱动高
+电平信å·çš„æŒç»­æ—¶é—´å¯ä»¥ç¡®å®šä½å€¼æ˜¯é€»è¾‘ “0†还是逻辑 “1â€ã€‚PECI线还包括与æ¯ä¸ªä¿¡
+æ¯å»ºç«‹çš„å¯å˜æ•°æ®é€ŸçŽ‡ã€‚
+
+对于PECI线,æ¯ä¸ªå¤„ç†å™¨åŒ…将在一个定义的范围内利用唯一的ã€å›ºå®šçš„地å€ï¼Œè¯¥åœ°å€åº”
+该与处ç†å™¨æ’座ID有固定的关系--如果其中一个处ç†å™¨è¢«ç§»é™¤ï¼Œå®ƒä¸ä¼šå½±å“其余处ç†å™¨
+的地å€ã€‚
+
+PECIå­ç³»ç»Ÿä»£ç å†…嵌文档
+------------------------
+
+该API在以下内核代ç ä¸­:
+
+include/linux/peci.h
+
+drivers/peci/internal.h
+
+drivers/peci/core.c
+
+drivers/peci/request.c
+
+PECI CPU 驱动 API
+-------------------
+
+该API在以下内核代ç ä¸­:
+
+drivers/peci/cpu.c
diff --git a/Documentation/translations/zh_CN/power/energy-model.rst b/Documentation/translations/zh_CN/power/energy-model.rst
new file mode 100644
index 000000000000..c7da1b6aefee
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/energy-model.rst
@@ -0,0 +1,190 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/energy-model.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+============
+设备能é‡æ¨¡åž‹
+============
+
+1. 概述
+-------
+
+能é‡æ¨¡åž‹ï¼ˆEM)框架是一ç§é©±åŠ¨ç¨‹åºä¸Žå†…æ ¸å­ç³»ç»Ÿä¹‹é—´çš„接å£ã€‚其中驱动程åºäº†è§£ä¸åŒ
+性能层级的设备所消耗的功率,而内核å­ç³»ç»Ÿæ„¿æ„使用该信æ¯åšå‡ºèƒ½é‡æ„ŸçŸ¥å†³ç­–。
+
+设备所消耗的功率的信æ¯æ¥æºåœ¨ä¸åŒçš„å¹³å°ä¸Šå¯èƒ½æœ‰å¾ˆå¤§çš„ä¸åŒã€‚这些功率æˆæœ¬åœ¨æŸäº›
+情况下å¯ä»¥ä½¿ç”¨è®¾å¤‡æ ‘æ•°æ®æ¥ä¼°ç®—。在其它情况下,固件会更清楚。或者,用户空间å¯èƒ½
+是最清楚的。以此类推。为了é¿å…æ¯ä¸€ä¸ªå®¢æˆ·ç«¯å­ç³»ç»Ÿå¯¹æ¯ä¸€ç§å¯èƒ½çš„ä¿¡æ¯æºè‡ªå·±é‡æ–°
+实现支æŒï¼ŒEM框架作为一个抽象层介入,它在内核中对功率æˆæœ¬è¡¨çš„æ ¼å¼è¿›è¡Œæ ‡å‡†åŒ–,
+因此能够é¿å…多余的工作。
+
+功率值å¯ä»¥ç”¨æ¯«ç“¦æˆ–“抽象刻度â€è¡¨ç¤ºã€‚多个å­ç³»ç»Ÿå¯èƒ½ä½¿ç”¨EM,由系统集æˆå•†æ¥æ£€æŸ¥
+功率值刻度类型的è¦æ±‚是å¦æ»¡è¶³ã€‚å¯ä»¥åœ¨èƒ½é‡æ„ŸçŸ¥è°ƒåº¦å™¨çš„文档中找到一个例å­
+Documentation/scheduler/sched-energy.rst。对于一些å­ç³»ç»Ÿï¼Œæ¯”如热能或
+powercap,用“抽象刻度â€æ述功率值å¯èƒ½ä¼šå¯¼è‡´é—®é¢˜ã€‚这些å­ç³»ç»Ÿå¯¹è¿‡åŽ»ä½¿ç”¨çš„功率的
+估算值更感兴趣,因此å¯èƒ½éœ€è¦çœŸå®žçš„毫瓦。这些è¦æ±‚的一个例å­å¯ä»¥åœ¨æ™ºèƒ½åŠŸçŽ‡åˆ†é…
+Documentation/driver-api/thermal/power_allocator.rst文档中找到。
+
+内核å­ç³»ç»Ÿå¯èƒ½ï¼ˆåŸºäºŽEM内部标志ä½ï¼‰å®žçŽ°äº†å¯¹EM注册设备是å¦å…·æœ‰ä¸ä¸€è‡´åˆ»åº¦çš„自动
+检查。è¦è®°ä½çš„é‡è¦äº‹æƒ…是,当功率值以“抽象刻度â€è¡¨ç¤ºæ—¶ï¼Œä»Žä¸­æŽ¨å¯¼ä»¥æ¯«ç„¦è€³ä¸ºå•ä½
+的真实能é‡æ¶ˆè€—是ä¸å¯èƒ½çš„。
+
+下图æ述了一个驱动的例å­ï¼ˆè¿™é‡Œæ˜¯é’ˆå¯¹Arm的,但该方法适用于任何体系结构),它
+å‘EM框架æ供了功率æˆæœ¬ï¼Œæ„Ÿå…´è¶£çš„客户端å¯ä»Žä¸­è¯»å–æ•°æ®::
+
+ +---------------+ +-----------------+ +---------------+
+ | Thermal (IPA) | | Scheduler (EAS) | | Other |
+ +---------------+ +-----------------+ +---------------+
+ | | em_cpu_energy() |
+ | | em_cpu_get() |
+ +---------+ | +---------+
+ | | |
+ v v v
+ +---------------------+
+ | Energy Model |
+ | Framework |
+ +---------------------+
+ ^ ^ ^
+ | | | em_dev_register_perf_domain()
+ +----------+ | +---------+
+ | | |
+ +---------------+ +---------------+ +--------------+
+ | cpufreq-dt | | arm_scmi | | Other |
+ +---------------+ +---------------+ +--------------+
+ ^ ^ ^
+ | | |
+ +--------------+ +---------------+ +--------------+
+ | Device Tree | | Firmware | | ? |
+ +--------------+ +---------------+ +--------------+
+
+对于CPU设备,EM框架管ç†ç€ç³»ç»Ÿä¸­æ¯ä¸ªâ€œæ€§èƒ½åŸŸâ€çš„功率æˆæœ¬è¡¨ã€‚一个性能域是一组
+性能一起伸缩的CPU。性能域通常与CPUFreq策略具有1对1映射。一个性能域中的
+所有CPUè¦æ±‚具有相åŒçš„微架构。ä¸åŒæ€§èƒ½åŸŸä¸­çš„CPUå¯ä»¥æœ‰ä¸åŒçš„微架构。
+
+
+2. 核心API
+----------
+
+2.1 é…置选项
+^^^^^^^^^^^^
+
+必须使能CONFIG_ENERGY_MODELæ‰èƒ½ä½¿ç”¨EM框架。
+
+
+2.2 性能域的注册
+^^^^^^^^^^^^^^^^
+
+“高级â€EM的注册
+~~~~~~~~~~~~~~~~
+
+“高级â€EM因它å…许驱动æ供更精确的功率模型而得å。它并ä¸å—é™äºŽæ¡†æž¶ä¸­çš„一些已
+实现的数学公å¼ï¼ˆå°±åƒâ€œç®€å•â€EM那样)。它å¯ä»¥æ›´å¥½åœ°å映æ¯ä¸ªæ€§èƒ½çŠ¶æ€çš„实际功率
+测é‡ã€‚因此,在EMé™æ€åŠŸçŽ‡ï¼ˆæ¼ç”µæµåŠŸçŽ‡ï¼‰æ˜¯é‡è¦çš„情况下,应该首选这ç§æ³¨å†Œæ–¹å¼ã€‚
+
+驱动程åºåº”通过以下API将性能域注册到EM框架中::
+
+ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
+ struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts);
+
+驱动程åºå¿…é¡»æ供一个回调函数,为æ¯ä¸ªæ€§èƒ½çŠ¶æ€è¿”回<频率,功率>元组。驱动程åº
+æ供的回调函数å¯ä»¥è‡ªç”±åœ°ä»Žä»»ä½•ç›¸å…³ä½ç½®ï¼ˆDTã€å›ºä»¶......)以åŠä»¥ä»»ä½•è¢«è®¤ä¸ºæ˜¯
+å¿…è¦çš„æ–¹å¼èŽ·å–æ•°æ®ã€‚åªæœ‰å¯¹äºŽCPU设备,驱动程åºå¿…须使用cpumask指定性能域的CPU。
+对于CPU以外的其他设备,最åŽä¸€ä¸ªå‚数必须被设置为NULL。
+
+最åŽä¸€ä¸ªå‚数“milliwattsâ€ï¼ˆæ¯«ç“¦ï¼‰è®¾ç½®æˆæ­£ç¡®çš„值是很é‡è¦çš„,使用EM的内核
+å­ç³»ç»Ÿå¯èƒ½ä¼šä¾èµ–这个标志æ¥æ£€æŸ¥æ‰€æœ‰çš„EM设备是å¦ä½¿ç”¨ç›¸åŒçš„刻度。如果有ä¸åŒçš„
+刻度,这些å­ç³»ç»Ÿå¯èƒ½å†³å®šï¼šè¿”回警告/错误,åœæ­¢å·¥ä½œæˆ–崩溃(panic)。
+
+关于实现这个回调函数的驱动程åºçš„例å­ï¼Œå‚è§ç¬¬3节。或者在第2.4节阅读这个API
+的更多文档。
+
+
+“简å•â€EM的注册
+~~~~~~~~~~~~~~~~
+
+“简å•â€EM是用框架的辅助函数cpufreq_register_em_with_opp()注册的。它实现了
+一个和以下数学公å¼ç´§å¯†ç›¸å…³çš„功率模型::
+
+ Power = C * V^2 * f
+
+使用这ç§æ–¹æ³•æ³¨å†Œçš„EMå¯èƒ½æ— æ³•æ­£ç¡®å映真实设备的物ç†ç‰¹æ€§ï¼Œä¾‹å¦‚当é™æ€åŠŸçŽ‡
+(æ¼ç”µæµåŠŸçŽ‡ï¼‰å¾ˆé‡è¦æ—¶ã€‚
+
+
+2.3 访问性能域
+^^^^^^^^^^^^^^
+
+有两个API函数æ供对能é‡æ¨¡åž‹çš„访问。em_cpu_get()以CPU id为å‚数,em_pd_get()
+以设备指针为å‚数。使用哪个接å£å–决于å­ç³»ç»Ÿï¼Œä½†å¯¹äºŽCPU设备æ¥è¯´ï¼Œè¿™ä¸¤ä¸ªå‡½æ•°éƒ½è¿”
+回相åŒçš„性能域。
+
+对CPU的能é‡æ¨¡åž‹æ„Ÿå…´è¶£çš„å­ç³»ç»Ÿå¯ä»¥é€šè¿‡em_cpu_get() API检索它。在创建性能域时
+分é…一次能é‡æ¨¡åž‹è¡¨ï¼Œå®ƒä¿å­˜åœ¨å†…存中ä¸è¢«ä¿®æ”¹ã€‚
+
+一个性能域所消耗的能é‡å¯ä»¥ä½¿ç”¨em_cpu_energy() APIæ¥ä¼°ç®—。该估算å‡å®šCPU设备
+使用的CPUfreq监管器是schedutil。当å‰è¯¥è®¡ç®—ä¸èƒ½æ供给其它类型的设备。
+
+关于上述API的更多细节å¯ä»¥åœ¨ ``<linux/energy_model.h>`` 或第2.4节中找到。
+
+
+2.4 API的细节æè¿°
+^^^^^^^^^^^^^^^^^
+å‚è§ include/linux/energy_model.h å’Œ kernel/power/energy_model.c çš„kernel doc。
+
+3. 驱动示例
+-----------
+
+CPUFreq框架支æŒä¸“用的回调函数,用于为指定的CPU(们)注册EM:
+cpufreq_driver::register_em()。这个回调必须为æ¯ä¸ªç‰¹å®šçš„驱动程åºæ­£ç¡®å®žçŽ°ï¼Œ
+因为框架会在设置过程中适时地调用它。本节æ供了一个简å•çš„例å­ï¼Œå±•ç¤ºCPUFreq驱动
+在能é‡æ¨¡åž‹æ¡†æž¶ä¸­ä½¿ç”¨ï¼ˆå‡çš„)“fooâ€å议注册性能域。该驱动实现了一个est_power()
+函数æ供给EM框架::
+
+ -> drivers/cpufreq/foo_cpufreq.c
+
+ 01 static int est_power(unsigned long *mW, unsigned long *KHz,
+ 02 struct device *dev)
+ 03 {
+ 04 long freq, power;
+ 05
+ 06 /* 使用“fooâ€åè®®è®¾ç½®é¢‘çŽ‡ä¸Šé™ */
+ 07 freq = foo_get_freq_ceil(dev, *KHz);
+ 08 if (freq < 0);
+ 09 return freq;
+ 10
+ 11 /* 估算相关频率下设备的功率æˆæœ¬ */
+ 12 power = foo_estimate_power(dev, freq);
+ 13 if (power < 0);
+ 14 return power;
+ 15
+ 16 /* 将这些值返回给EM框架 */
+ 17 *mW = power;
+ 18 *KHz = freq;
+ 19
+ 20 return 0;
+ 21 }
+ 22
+ 23 static void foo_cpufreq_register_em(struct cpufreq_policy *policy)
+ 24 {
+ 25 struct em_data_callback em_cb = EM_DATA_CB(est_power);
+ 26 struct device *cpu_dev;
+ 27 int nr_opp;
+ 28
+ 29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus));
+ 30
+ 31 /* 查找该策略支æŒçš„OPPæ•°é‡ */
+ 32 nr_opp = foo_get_nr_opp(policy);
+ 33
+ 34 /* 并注册新的性能域 */
+ 35 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
+ 36 true);
+ 37 }
+ 38
+ 39 static struct cpufreq_driver foo_cpufreq_driver = {
+ 40 .register_em = foo_cpufreq_register_em,
+ 41 };
diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst
new file mode 100644
index 000000000000..bc54983ba515
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/index.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/index.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+========
+电æºç®¡ç†
+========
+
+.. toctree::
+ :maxdepth: 1
+
+ energy-model
+ opp
+
+TODOList:
+
+ * apm-acpi
+ * basic-pm-debugging
+ * charger-manager
+ * drivers-testing
+ * freezing-of-tasks
+ * pci
+ * pm_qos_interface
+ * power_supply_class
+ * runtime_pm
+ * s2ram
+ * suspend-and-cpuhotplug
+ * suspend-and-interrupts
+ * swsusp-and-swap-files
+ * swsusp-dmcrypt
+ * swsusp
+ * video
+ * tricks
+
+ * userland-swsusp
+
+ * powercap/powercap
+ * powercap/dtpm
+
+ * regulator/consumer
+ * regulator/design
+ * regulator/machine
+ * regulator/overview
+ * regulator/regulator
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst
new file mode 100644
index 000000000000..8d6e3f6f6202
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/opp.rst
@@ -0,0 +1,341 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/opp.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+======================
+æ“作性能值(OPP)库
+======================
+
+(C) 2009-2010 Nishanth Menon <nm@ti.com>, 德州仪器公å¸
+
+.. 目录
+
+ 1. 简介
+ 2. OPP链表åˆå§‹æ³¨å†Œ
+ 3. OPPæœç´¢å‡½æ•°
+ 4. OPPå¯ç”¨æ€§æŽ§åˆ¶å‡½æ•°
+ 5. OPPæ•°æ®æ£€ç´¢å‡½æ•°
+ 6. æ•°æ®ç»“æž„
+
+1. 简介
+=======
+
+1.1 何为æ“作性能值(OPP)?
+------------------------------
+
+当今å¤æ‚çš„å•ç‰‡ç³»ç»Ÿï¼ˆSoC)由多个å­æ¨¡å—组æˆï¼Œè¿™äº›å­æ¨¡å—会è”åˆå·¥ä½œã€‚在一个执行ä¸åŒç”¨ä¾‹
+çš„æ“作系统中,并ä¸æ˜¯SoC中的所有模å—都需è¦ä¸€ç›´ä»¥æœ€é«˜é¢‘率工作。为了促æˆè¿™ä¸€ç‚¹ï¼ŒSoC中
+çš„å­æ¨¡å—被分组为ä¸åŒåŸŸï¼Œå…许一些域以较低的电压和频率è¿è¡Œï¼Œè€Œå…¶å®ƒåŸŸåˆ™ä»¥è¾ƒé«˜çš„“电压/
+频率对â€è¿è¡Œã€‚
+
+设备按域支æŒçš„由频率电压对组æˆçš„离散的元组的集åˆï¼Œè¢«ç§°ä¸ºæ“作性能值(组),或OPPs。
+
+举例æ¥è¯´ï¼š
+
+让我们考虑一个支æŒä¸‹è¿°é¢‘率ã€ç”µåŽ‹å€¼çš„内存ä¿æŠ¤å•å…ƒï¼ˆMPU)设备:
+{300MHz,最低电压为1V}, {800MHz,最低电压为1.2V}, {1GHz,最低电压为1.3V}
+
+我们能将它们表示为3个OPP,如下述{Hz, uV}元组(译注:频率的å•ä½æ˜¯èµ«å…¹ï¼Œç”µåŽ‹çš„å•ä½æ˜¯
+å¾®ä¼ï¼‰ã€‚
+
+- {300000000, 1000000}
+- {800000000, 1200000}
+- {1000000000, 1300000}
+
+1.2 æ“作性能值库
+----------------
+
+OPP库æ供了一组辅助函数æ¥ç»„织和查询OPPä¿¡æ¯ã€‚该库ä½äºŽdrivers/opp/目录下,其头文件
+ä½äºŽinclude/linux/pm_opp.h中。OPP库å¯ä»¥é€šè¿‡å¼€å¯CONFIG_PM_OPPæ¥å¯ç”¨ã€‚æŸäº›SoC,
+如德州仪器的OMAP框架å…许在ä¸éœ€è¦cpufreq的情况下å¯é€‰åœ°åœ¨æŸä¸€OPP下å¯åŠ¨ã€‚
+
+OPP库的典型用法如下::
+
+ (用户) -> 注册一个默认的OPPé›†åˆ -> (库)
+ (SoC框架) -> 在必è¦çš„情况下,对æŸäº›OPP进行修改 -> OPP layer
+ -> æœç´¢/检索信æ¯çš„查询 ->
+
+OPP层期望æ¯ä¸ªåŸŸç”±ä¸€ä¸ªå”¯ä¸€çš„设备指针æ¥è¡¨ç¤ºã€‚SoC框架在OPP层为æ¯ä¸ªè®¾å¤‡æ³¨å†Œäº†ä¸€ç»„åˆå§‹
+OPP。这个链表的长度被期望是一个最优化的å°æ•°å­—,通常æ¯ä¸ªè®¾å¤‡å¤§çº¦5个。åˆå§‹é“¾è¡¨åŒ…å«äº†
+一个OPP集åˆï¼Œè¿™ä¸ªé›†åˆè¢«æœŸæœ›èƒ½åœ¨ç³»ç»Ÿä¸­å®‰å…¨ä½¿èƒ½ã€‚
+
+关于OPPå¯ç”¨æ€§çš„说明
+^^^^^^^^^^^^^^^^^^^
+
+éšç€ç³»ç»Ÿçš„è¿è¡Œï¼ŒSoC框架å¯èƒ½ä¼šåŸºäºŽå„ç§å¤–部因素选择让æŸäº›OPP在æ¯ä¸ªè®¾å¤‡ä¸Šå¯ç”¨æˆ–ä¸å¯ç”¨ï¼Œ
+示例:温度管ç†æˆ–其它异常场景中,SoC框架å¯èƒ½ä¼šé€‰æ‹©ç¦ç”¨ä¸€ä¸ªè¾ƒé«˜é¢‘率的OPP以安全地继续
+è¿è¡Œï¼Œç›´åˆ°è¯¥OPP被é‡æ–°å¯ç”¨ï¼ˆå¦‚æžœå¯èƒ½ï¼‰ã€‚
+
+OPP库在它的实现中达æˆäº†è¿™ä¸ªæ¦‚念。以下æ“作函数åªèƒ½å¯¹å¯ç”¨çš„OPP使用:
+dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage,
+dev_pm_opp_get_freq, dev_pm_opp_get_opp_count。
+
+dev_pm_opp_find_freq_exact是用æ¥æŸ¥æ‰¾OPP指针的,该指针å¯è¢«ç”¨åœ¨dev_pm_opp_enable/
+disable函数,使一个OPP在被需è¦æ—¶å˜ä¸ºå¯ç”¨ã€‚
+
+警告:如果对一个设备调用dev_pm_opp_enable/disable函数,OPP库的用户应该使用
+dev_pm_opp_get_opp_countæ¥åˆ·æ–°OPPçš„å¯ç”¨æ€§è®¡æ•°ã€‚触å‘这些的具体机制,或者对有ä¾èµ–çš„
+å­ç³»ç»Ÿï¼ˆæ¯”如cpufreq)的通知机制,都是由使用OPP库的SoC特定框架酌情处ç†çš„。在这些æ“作
+中,åŒæ ·éœ€è¦æ³¨æ„刷新cpufreq表。
+
+2. OPP链表åˆå§‹æ³¨å†Œ
+==================
+SoC的实现会迭代调用dev_pm_opp_add函数æ¥å¢žåŠ æ¯ä¸ªè®¾å¤‡çš„OPP。预期SoC框架将以最优的
+æ–¹å¼æ³¨å†ŒOPPæ¡ç›® - 典型的数字范围å°äºŽ5。通过注册OPP生æˆçš„OPP链表,在整个设备è¿è¡Œè¿‡ç¨‹
+中由OPP库维护。SoC框架éšåŽå¯ä»¥ä½¿ç”¨dev_pm_opp_enable / disable函数动æ€åœ°
+控制OPPçš„å¯ç”¨æ€§ã€‚
+
+dev_pm_opp_add
+ 为设备指针所指å‘的特定域添加一个新的OPP。OPP是用频率和电压定义的。一旦完æˆ
+ 添加,OPP被认为是å¯ç”¨çš„,å¯ä»¥ç”¨dev_pm_opp_enable/disable函数æ¥æŽ§åˆ¶å…¶å¯ç”¨æ€§ã€‚
+ OPP库内部用dev_pm_opp结构体存储并管ç†è¿™äº›ä¿¡æ¯ã€‚这个函数å¯ä»¥è¢«SoC框架根æ®SoC
+ 的使用环境的需求æ¥å®šä¹‰ä¸€ä¸ªæœ€ä¼˜é“¾è¡¨ã€‚
+
+ 警告:
+ ä¸è¦åœ¨ä¸­æ–­ä¸Šä¸‹æ–‡ä½¿ç”¨è¿™ä¸ªå‡½æ•°ã€‚
+
+ 示例::
+
+ soc_pm_init()
+ {
+ /* åšä¸€äº›äº‹æƒ… */
+ r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
+ if (!r) {
+ pr_err("%s: unable to register mpu opp(%d)\n", r);
+ goto no_cpufreq;
+ }
+ /* åšä¸€äº›å’Œcpufreq相关的事情 */
+ no_cpufreq:
+ /* åšå‰©ä½™çš„事情 */
+ }
+
+3. OPPæœç´¢å‡½æ•°
+==============
+cpufreq等高层框架对频率进行æ“作,为了将频率映射到相应的OPP,OPP库æ供了便利的函数
+æ¥æœç´¢OPP库内部管ç†çš„OPP链表。这些æœç´¢å‡½æ•°å¦‚果找到匹é…çš„OPP,将返回指å‘该OPP的指针,
+å¦åˆ™è¿”回错误。这些错误预计由标准的错误检查,如IS_ERR()æ¥å¤„ç†ï¼Œå¹¶ç”±è°ƒç”¨è€…采å–适当的
+行动。
+
+这些函数的调用者应在使用完OPPåŽè°ƒç”¨dev_pm_opp_put()。å¦åˆ™ï¼ŒOPP的内存将永远ä¸ä¼š
+被释放,并导致内存泄露。
+
+dev_pm_opp_find_freq_exact
+ æ ¹æ® *精确的* 频率和å¯ç”¨æ€§æ¥æœç´¢OPP。这个函数对默认ä¸å¯ç”¨çš„OPP特别有用。
+ 例å­ï¼šåœ¨SoC框架检测到更高频率å¯ç”¨çš„情况下,它å¯ä»¥ä½¿ç”¨è¿™ä¸ªå‡½æ•°åœ¨è°ƒç”¨
+ dev_pm_opp_enable之å‰æ‰¾åˆ°OPP::
+
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+ dev_pm_opp_put(opp);
+ /* ä¸è¦æ“作指针.. åªæ˜¯åšæœ‰æ•ˆæ€§æ£€æŸ¥.. */
+ if (IS_ERR(opp)) {
+ pr_err("frequency not disabled!\n");
+ /* 触å‘åˆé€‚çš„æ“作.. */
+ } else {
+ dev_pm_opp_enable(dev,1000000000);
+ }
+
+ 注æ„:
+ 这是唯一一个å¯ä»¥æœç´¢ä¸å¯ç”¨OPP的函数。
+
+dev_pm_opp_find_freq_floor
+ æœç´¢ä¸€ä¸ª *最多* æ供指定频率的å¯ç”¨OPP。这个函数在æœç´¢è¾ƒå°çš„匹é…或按频率
+ 递å‡çš„顺åºæ“作OPPä¿¡æ¯æ—¶å¾ˆæœ‰ç”¨ã€‚
+ 例å­ï¼šè¦æ‰¾çš„一个设备的最高OPP::
+
+ freq = ULONG_MAX;
+ opp = dev_pm_opp_find_freq_floor(dev, &freq);
+ dev_pm_opp_put(opp);
+
+dev_pm_opp_find_freq_ceil
+ æœç´¢ä¸€ä¸ª *最少* æ供指定频率的å¯ç”¨OPP。这个函数在æœç´¢è¾ƒå¤§çš„匹é…或按频率
+ 递增的顺åºæ“作OPPä¿¡æ¯æ—¶å¾ˆæœ‰ç”¨ã€‚
+ 例1:找到一个设备最å°çš„OPP::
+
+ freq = 0;
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ dev_pm_opp_put(opp);
+
+ 例: 一个SoC的cpufreq_driver->target的简易实现::
+
+ soc_cpufreq_target(..)
+ {
+ /* åšç­–略检查等æ“作 */
+ /* 找到和请求最接近的频率 */
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ dev_pm_opp_put(opp);
+ if (!IS_ERR(opp))
+ soc_switch_to_freq_voltage(freq);
+ else
+ /* 当ä¸èƒ½æ»¡è¶³è¯·æ±‚时,è¦åšçš„事 */
+ /* åšå…¶å®ƒäº‹ */
+ }
+
+4. OPPå¯ç”¨æ€§æŽ§åˆ¶å‡½æ•°
+====================
+在OPP库中注册的默认OPP链表也许无法满足所有å¯èƒ½çš„场景。OPP库æ供了一套函数æ¥ä¿®æ”¹
+OPP链表中的æŸä¸ªOPPçš„å¯ç”¨æ€§ã€‚这使得SoC框架能够精细地动æ€æŽ§åˆ¶å“ªä¸€ç»„OPP是å¯ç”¨äºŽæ“作
+的。设计这些函数的目的是在诸如考虑温度时 *暂时地* 删除æŸä¸ªOPP(例如,在温度下é™
+之å‰ä¸è¦ä½¿ç”¨æŸOPP)。
+
+警告:
+ ä¸è¦åœ¨ä¸­æ–­ä¸Šä¸‹æ–‡ä½¿ç”¨è¿™äº›å‡½æ•°ã€‚
+
+dev_pm_opp_enable
+ 使一个OPPå¯ç”¨äºŽæ“作。
+ 例å­ï¼šå‡è®¾1GHzçš„OPPåªæœ‰åœ¨SoC温度低于æŸä¸ªé˜ˆå€¼æ—¶æ‰å¯ç”¨ã€‚SoC框架的实现å¯èƒ½
+ 会选择åšä»¥ä¸‹äº‹æƒ…::
+
+ if (cur_temp < temp_low_thresh) {
+ /* 若1GHz未使能,则使能 */
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+ dev_pm_opp_put(opp);
+ /* 仅仅是错误检查 */
+ if (!IS_ERR(opp))
+ ret = dev_pm_opp_enable(dev, 1000000000);
+ else
+ goto try_something_else;
+ }
+
+dev_pm_opp_disable
+ 使一个OPPä¸å¯ç”¨äºŽæ“作。
+ 例å­ï¼šå‡è®¾1GHzçš„OPPåªæœ‰åœ¨SoC温度高于æŸä¸ªé˜ˆå€¼æ—¶æ‰å¯ç”¨ã€‚SoC框架的实现å¯èƒ½
+ 会选择åšä»¥ä¸‹äº‹æƒ…::
+
+ if (cur_temp > temp_high_thresh) {
+ /* 若1GHz已使能,则关闭 */
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
+ dev_pm_opp_put(opp);
+ /* 仅仅是错误检查 */
+ if (!IS_ERR(opp))
+ ret = dev_pm_opp_disable(dev, 1000000000);
+ else
+ goto try_something_else;
+ }
+
+5. OPPæ•°æ®æ£€ç´¢å‡½æ•°
+==================
+由于OPP库对OPPä¿¡æ¯è¿›è¡Œäº†æŠ½è±¡åŒ–处ç†ï¼Œå› æ­¤éœ€è¦ä¸€ç»„函数æ¥ä»Ždev_pm_opp结构体中æå–
+ä¿¡æ¯ã€‚一旦使用æœç´¢å‡½æ•°æ£€ç´¢åˆ°ä¸€ä¸ªOPP指针,以下函数就å¯ä»¥è¢«SoC框架用æ¥æ£€ç´¢OPP层
+内部æè¿°çš„ä¿¡æ¯ã€‚
+
+dev_pm_opp_get_voltage
+ 检索OPP指针æ述的电压。
+ 例å­: 当cpufreq切æ¢åˆ°åˆ°ä¸åŒé¢‘率时,SoC框架需è¦ç”¨ç¨³åŽ‹å™¨æ¡†æž¶å°†OPPæè¿°
+ 的电压设置到æ供电压的电æºç®¡ç†èŠ¯ç‰‡ä¸­::
+
+ soc_switch_to_freq_voltage(freq)
+ {
+ /* åšä¸€äº›äº‹æƒ… */
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ v = dev_pm_opp_get_voltage(opp);
+ dev_pm_opp_put(opp);
+ if (v)
+ regulator_set_voltage(.., v);
+ /* åšå…¶å®ƒäº‹ */
+ }
+
+dev_pm_opp_get_freq
+ 检索OPP指针æ述的频率。
+ 例å­ï¼šæ¯”方说,SoC框架使用了几个辅助函数,通过这些函数,我们å¯ä»¥å°†OPP
+ 指针传入,而ä¸æ˜¯ä¼ å…¥é¢å¤–çš„å‚数,用æ¥å¤„ç†ä¸€ç³»åˆ—æ•°æ®å‚æ•°::
+
+ soc_cpufreq_target(..)
+ {
+ /* åšä¸€äº›äº‹æƒ….. */
+ max_freq = ULONG_MAX;
+ max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+ requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
+ if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+ r = soc_test_validity(max_opp, requested_opp);
+ dev_pm_opp_put(max_opp);
+ dev_pm_opp_put(requested_opp);
+ /* åšå…¶å®ƒäº‹ */
+ }
+ soc_test_validity(..)
+ {
+ if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
+ return -EINVAL;
+ if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
+ return -EINVAL;
+ /* åšä¸€äº›äº‹æƒ….. */
+ }
+
+dev_pm_opp_get_opp_count
+ 检索æŸä¸ªè®¾å¤‡å¯ç”¨çš„OPPæ•°é‡ã€‚
+ 例å­ï¼šå‡è®¾SoC中的一个å处ç†å™¨éœ€è¦çŸ¥é“æŸä¸ªè¡¨ä¸­çš„å¯ç”¨é¢‘率,主处ç†å™¨å¯ä»¥
+ 按如下方å¼å‘出通知::
+
+ soc_notify_coproc_available_frequencies()
+ {
+ /* åšä¸€äº›äº‹æƒ… */
+ num_available = dev_pm_opp_get_opp_count(dev);
+ speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+ /* 按å‡åºå¡«å……表 */
+ freq = 0;
+ while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
+ speeds[i] = freq;
+ freq++;
+ i++;
+ dev_pm_opp_put(opp);
+ }
+
+ soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+ /* åšå…¶å®ƒäº‹ */
+ }
+
+6. æ•°æ®ç»“æž„
+===========
+通常,一个SoC包å«å¤šä¸ªå¯å˜ç”µåŽ‹åŸŸã€‚æ¯ä¸ªåŸŸç”±ä¸€ä¸ªè®¾å¤‡æŒ‡é’ˆæ述。和OPP之间的关系å¯ä»¥
+按以下方å¼æè¿°::
+
+ SoC
+ |- device 1
+ | |- opp 1 (availability, freq, voltage)
+ | |- opp 2 ..
+ ... ...
+ | `- opp n ..
+ |- device 2
+ ...
+ `- device m
+
+OPP库维护ç€ä¸€ä¸ªå†…部链表,SoC框架使用上文æè¿°çš„å„个函数æ¥å¡«å……和访问。然而,æè¿°
+真实OPP和域的结构体是OPP库自身的内部组æˆï¼Œä»¥å…许åˆé€‚的抽象在ä¸åŒç³»ç»Ÿä¸­å¾—到å¤ç”¨ã€‚
+
+struct dev_pm_opp
+ OPP库的内部数æ®ç»“构,用于表示一个OPP。除了频率ã€ç”µåŽ‹ã€å¯ç”¨æ€§ä¿¡æ¯å¤–,
+ 它还包å«OPP库è¿è¡Œæ‰€éœ€çš„内部统计信æ¯ã€‚指å‘这个结构体的指针被æ供给
+ 用户(比如SoC框架)使用,在与OPP层的交互中作为OPP的标识符。
+
+ 警告:
+ 结构体dev_pm_opp的指针ä¸åº”该由用户解æžæˆ–修改。一个实例的默认值由
+ dev_pm_opp_add填充,但OPPçš„å¯ç”¨æ€§ç”±dev_pm_opp_enable/disable函数
+ 修改。
+
+struct device
+ 这用于å‘OPP层标识一个域。设备的性质和它的实现是由OPP库的用户决定的,
+ 如SoC框架。
+
+总体æ¥è¯´ï¼Œä»¥ä¸€ä¸ªç®€åŒ–的视角看,对数æ®ç»“æž„çš„æ“作å¯ä»¥æ述为下é¢å„图::
+
+ åˆå§‹åŒ– / 修改:
+ +-----+ /- dev_pm_opp_enable
+ dev_pm_opp_add --> | opp | <-------
+ | +-----+ \- dev_pm_opp_disable
+ \-------> domain_info(device)
+
+ æœç´¢å‡½æ•°:
+ /-- dev_pm_opp_find_freq_ceil ---\ +-----+
+ domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+ \-- dev_pm_opp_find_freq_floor ---/ +-----+
+
+ 检索函数:
+ +-----+ /- dev_pm_opp_get_voltage
+ | opp | <---
+ +-----+ \- dev_pm_opp_get_freq
+
+ domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst
index bbf5d7b3777a..614cde0c0997 100644
--- a/Documentation/translations/zh_CN/riscv/index.rst
+++ b/Documentation/translations/zh_CN/riscv/index.rst
@@ -18,6 +18,7 @@ RISC-V 体系结构
:maxdepth: 1
boot-image-header
+ vm-layout
pmu
patch-acceptance
diff --git a/Documentation/translations/zh_CN/riscv/vm-layout.rst b/Documentation/translations/zh_CN/riscv/vm-layout.rst
new file mode 100644
index 000000000000..585cb89317a3
--- /dev/null
+++ b/Documentation/translations/zh_CN/riscv/vm-layout.rst
@@ -0,0 +1,67 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/riscv/vm-layout.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+============================
+RISC-V Linux上的虚拟内存布局
+============================
+
+:作者: Alexandre Ghiti <alex@ghiti.fr>
+:日期: 12 February 2021
+
+这份文件æ述了RISC-V Linux内核使用的虚拟内存布局。
+
+32ä½ RISC-V Linux 内核
+======================
+
+RISC-V Linux Kernel SV32
+------------------------
+
+TODO
+
+64ä½ RISC-V Linux 内核
+======================
+
+RISC-V特æƒæž¶æž„文档指出,64ä½åœ°å€ "必须使第63-48ä½å€¼éƒ½ç­‰äºŽç¬¬47ä½ï¼Œå¦åˆ™å°†å‘生缺页异常。":这将虚
+拟地å€ç©ºé—´åˆ†æˆä¸¤åŠï¼Œä¸­é—´æœ‰ä¸€ä¸ªéžå¸¸å¤§çš„洞,下åŠéƒ¨åˆ†æ˜¯ç”¨æˆ·ç©ºé—´æ‰€åœ¨çš„地方,上åŠéƒ¨åˆ†æ˜¯RISC-V Linux
+内核所在的地方。
+
+RISC-V Linux Kernel SV39
+------------------------
+
+::
+
+ ========================================================================================================================
+ å¼€å§‹åœ°å€ | å移 | 结æŸåœ°å€ | å¤§å° | 虚拟内存区域æè¿°
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 0000003fffffffff | 256 GB | 用户空间虚拟内存,æ¯ä¸ªå†…存管ç†å™¨ä¸åŒ
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... 巨大的ã€å‡ ä¹Ž64ä½å®½çš„直到内核映射的-256GB地方
+ | | | | 开始å移的éžç»å…¸è™šæ‹Ÿå†…存地å€ç©ºæ´žã€‚
+ | | | |
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | 内核空间的虚拟内存,在所有进程之间共享:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
+ ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
+ ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
+ ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
+ ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | 直接映射所有物ç†å†…å­˜
+ fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ |
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
+ __________________|____________|__________________|_________|____________________________________________________________
diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index f8f8f35d53c7..12bf3bd02ccf 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -5,6 +5,7 @@
:翻译:
å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
:校译:
@@ -23,16 +24,14 @@ Linux调度器
sched-design-CFS
sched-domains
sched-capacity
-
+ sched-energy
+ sched-nice-design
+ sched-stats
TODOList:
- sched-bwc
sched-deadline
- sched-energy
- sched-nice-design
sched-rt-group
- sched-stats
text_files
diff --git a/Documentation/translations/zh_CN/scheduler/sched-energy.rst b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
new file mode 100644
index 000000000000..fdbf6cfeea93
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
@@ -0,0 +1,351 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-energy.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+============
+能é‡æ„ŸçŸ¥è°ƒåº¦
+============
+
+1. 简介
+-------
+
+能é‡æ„ŸçŸ¥è°ƒåº¦ï¼ˆEAS)使调度器有能力预测其决策对CPU所消耗的能é‡çš„å½±å“。EASä¾é 
+一个能é‡æ¨¡åž‹ï¼ˆEM)æ¥ä¸ºæ¯ä¸ªä»»åŠ¡é€‰æ‹©ä¸€ä¸ªèŠ‚能的CPU,åŒæ—¶æœ€å°åŒ–对åžå率的影å“。
+本文档致力于介ç»ä»‹ç»EAS是如何工作的,它背åŽçš„主è¦è®¾è®¡å†³ç­–是什么,以åŠä½¿å…¶è¿è¡Œ
+所需的æ¡ä»¶ç»†èŠ‚。
+
+在进一步阅读之å‰ï¼Œè¯·æ³¨æ„,在撰写本文时::
+
+ /!\ EASä¸æ”¯æŒå¯¹ç§°CPUæ‹“æ‰‘çš„å¹³å° /!\
+
+EASåªåœ¨å¼‚æž„CPU拓扑结构(如Arm大å°æ ¸ï¼Œbig.LITTLE)上è¿è¡Œã€‚因为在这ç§æƒ…况下,
+通过调度æ¥èŠ‚约能é‡çš„潜力是最大的。
+
+EAS实际使用的EMä¸æ˜¯ç”±è°ƒåº¦å™¨ç»´æŠ¤çš„,而是一个专门的框架。关于这个框架的细节和
+它æ供的内容,请å‚考其文档(è§Documentation/power/energy-model.rst)。
+
+
+2. 背景和术语
+-------------
+
+从一开始就说清楚定义:
+ - èƒ½é‡ = [焦耳] (比如供电设备上的电池æ供的资æºï¼‰
+ - 功率 = 能é‡/时间 = [焦耳/秒] = [瓦特]
+
+ EAS的目标是最å°åŒ–能é‡æ¶ˆè€—,åŒæ—¶ä»èƒ½å°†å·¥ä½œå®Œæˆã€‚也就是说,我们è¦æœ€å¤§åŒ–::
+
+ 性能 [指令数/秒]
+ ----------------
+ 功率 [瓦特]
+
+它等效于最å°åŒ–::
+
+ èƒ½é‡ [焦耳]
+ -----------
+ 指令数
+
+åŒæ—¶ä»ç„¶èŽ·å¾—“良好â€çš„性能。当å‰è°ƒåº¦å™¨åªè€ƒè™‘性能目标,因此该å¼å­æœ¬è´¨ä¸Šæ˜¯ä¸€ä¸ª
+å¯é€‰çš„优化目标,它åŒæ—¶è€ƒè™‘了两个目标:能é‡æ•ˆçŽ‡å’Œæ€§èƒ½ã€‚
+
+引入EM的想法是为了让调度器评估其决策的影å“,而ä¸æ˜¯ç›²ç›®åœ°åº”用å¯èƒ½ä»…在部分
+å¹³å°æœ‰æ­£é¢æ•ˆæžœçš„节能技术。åŒæ—¶ï¼ŒEM必须尽å¯èƒ½çš„简å•ï¼Œä»¥æœ€å°åŒ–调度器的时延
+å½±å“。
+
+简而言之,EAS改å˜äº†CFS任务分é…ç»™CPUçš„æ–¹å¼ã€‚当调度器决定一个任务应该在哪里
+è¿è¡Œæ—¶ï¼ˆåœ¨å”¤é†’期间),EM被用æ¥åœ¨ä¸æŸå®³ç³»ç»Ÿåžå率的情况下,从几个较好的候选
+CPU中挑选一个ç»é¢„测能é‡æ¶ˆè€—最优的CPU。EAS的预测ä¾èµ–于对平å°æ‹“扑结构特定元素
+的了解,包括CPU的“算力â€ï¼Œä»¥åŠå®ƒä»¬å„自的能é‡æˆæœ¬ã€‚
+
+
+3. 拓扑信æ¯
+-----------
+
+EAS(以åŠè°ƒåº¦å™¨çš„剩余部分)使用“算力â€çš„概念æ¥åŒºåˆ†ä¸åŒè®¡ç®—åžå率的CPU。一个
+CPU的“算力â€ä»£è¡¨äº†å®ƒåœ¨æœ€é«˜é¢‘率下è¿è¡Œæ—¶èƒ½å®Œæˆçš„工作é‡ï¼Œä¸”这个值是相对系统中
+算力最大的CPU而言的。算力值被归一化为1024以内,并且å¯ä¸Žç”±å®žä½“负载跟踪
+(PELT)机制算出的利用率信å·åšå¯¹æ¯”。由于有算力值和利用率值,EAS能够估计一个
+任务/CPU有多大/有多忙,并在评估性能与能é‡æ—¶å°†å…¶è€ƒè™‘在内。CPU算力由特定体系
+结构实现的arch_scale_cpu_capacity()回调函数æ供。
+
+EAS使用的其余平å°ä¿¡æ¯æ˜¯ç›´æŽ¥ä»Žèƒ½é‡æ¨¡åž‹ï¼ˆEM)框架中读å–的。一个平å°çš„EM是一张
+表,表中æ¯é¡¹ä»£è¡¨ç³»ç»Ÿä¸­ä¸€ä¸ªâ€œæ€§èƒ½åŸŸâ€çš„功率æˆæœ¬ã€‚(若è¦äº†è§£æ›´å¤šå…³äºŽæ€§èƒ½åŸŸçš„细节,
+è§Documentation/power/energy-model.rst)
+
+当调度域被建立或é‡æ–°å»ºç«‹æ—¶ï¼Œè°ƒåº¦å™¨ç®¡ç†å¯¹æ‹“扑代ç ä¸­EM对象的引用。对于æ¯ä¸ªæ ¹åŸŸ
+(rd),调度器维护一个与当å‰rd->span相交的所有性能域的å•å‘链表。链表中的æ¯ä¸ª
+节点都包å«ä¸€ä¸ªæŒ‡å‘EM框架所æ供的结构体em_perf_domain的指针。
+
+链表被附加在根域上,以应对独å çš„cpusetçš„é…置。由于独å çš„cpuset的边界ä¸ä¸€å®šä¸Ž
+性能域的边界一致,ä¸åŒæ ¹åŸŸçš„链表å¯èƒ½åŒ…å«é‡å¤çš„元素。
+
+示例1
+ 让我们考虑一个有12个CPUçš„å¹³å°ï¼Œåˆ†æˆ3个性能域,(pd0,pd4å’Œpd8),按以下
+ æ–¹å¼ç»„织::
+
+ CPUs: 0 1 2 3 4 5 6 7 8 9 10 11
+ PDs: |--pd0--|--pd4--|---pd8---|
+ RDs: |----rd1----|-----rd2-----|
+
+ 现在,考虑用户空间决定将系统分æˆä¸¤ä¸ªç‹¬å çš„cpusets,因此创建了两个独立的根域,
+ æ¯ä¸ªæ ¹åŸŸåŒ…å«6个CPU。这两个根域在上图中被表示为rd1å’Œrd2。由于pd4与rd1å’Œrd2
+ 都有交集,它将åŒæ—¶å‡ºçŽ°äºŽé™„加在这两个根域的“->pdâ€é“¾è¡¨ä¸­:
+
+ * rd1->pd: pd0 -> pd4
+ * rd2->pd: pd4 -> pd8
+
+ 请注æ„,调度器将为pd4创建两个é‡å¤çš„链表节点(æ¯ä¸ªé“¾è¡¨ä¸­å„一个)。然而这
+ 两个节点æŒæœ‰æŒ‡å‘åŒä¸€ä¸ªEM框架的共享数æ®ç»“构的指针。
+
+由于对这些链表的访问å¯èƒ½ä¸Žçƒ­æ’æ‹”åŠå…¶å®ƒäº‹ä»¶å¹¶å‘å‘生,因此它们å—RCUé”ä¿æŠ¤ï¼Œå°±åƒ
+被调度器æ“控的拓扑结构体中剩下字段一样。
+
+EASåŒæ ·ç»´æŠ¤äº†ä¸€ä¸ªé™æ€é”®ï¼ˆsched_energy_present),当至少有一个根域满足EAS
+å¯åŠ¨çš„所有æ¡ä»¶æ—¶ï¼Œè¿™ä¸ªé”®å°±ä¼šè¢«å¯åŠ¨ã€‚在第6节中总结了这些æ¡ä»¶ã€‚
+
+
+4. 能é‡æ„ŸçŸ¥ä»»åŠ¡æ”¾ç½®
+-------------------
+
+EAS覆盖了CFS的任务唤醒平衡代ç ã€‚在唤醒平衡时,它使用平å°çš„EMå’ŒPELTä¿¡å·æ¥é€‰æ‹©èŠ‚能
+的目标CPU。当EAS被å¯ç”¨æ—¶ï¼Œselect_task_rq_fair()调用find_energy_efficient_cpu()
+æ¥åšä»»åŠ¡æ”¾ç½®å†³å®šã€‚这个函数寻找在æ¯ä¸ªæ€§èƒ½åŸŸä¸­å¯»æ‰¾å…·æœ‰æœ€é«˜å‰©ä½™ç®—力(CPU算力 - CPU
+利用率)的CPU,因为它能让我们ä¿æŒæœ€ä½Žçš„频率。然åŽï¼Œè¯¥å‡½æ•°æ£€æŸ¥å°†ä»»åŠ¡æ”¾åœ¨æ–°CPU相较
+ä¾ç„¶æ”¾åœ¨ä¹‹å‰æ´»åŠ¨çš„prev_cpu是å¦å¯ä»¥èŠ‚çœèƒ½é‡ã€‚
+
+如果唤醒的任务被è¿ç§»ï¼Œfind_energy_efficient_cpu()使用compute_energy()æ¥ä¼°ç®—
+系统将消耗多少能é‡ã€‚compute_energy()检查å„CPU当å‰çš„利用率情况,并å°è¯•è°ƒæ•´æ¥
+“模拟â€ä»»åŠ¡è¿ç§»ã€‚EM框架æ供了API em_pd_energy()计算æ¯ä¸ªæ€§èƒ½åŸŸåœ¨ç»™å®šçš„利用率æ¡ä»¶
+下的预期能é‡æ¶ˆè€—。
+
+下é¢è¯¦ç»†ä»‹ç»ä¸€ä¸ªä¼˜åŒ–能é‡æ¶ˆè€—的任务放置决策的例å­ã€‚
+
+示例2
+ 让我们考虑一个有两个独立性能域的(伪)平å°ï¼Œæ¯ä¸ªæ€§èƒ½åŸŸå«æœ‰2个CPU。CPU0å’ŒCPU1
+ 是å°æ ¸ï¼ŒCPU2å’ŒCPU3是大核。
+
+ 调度器必须决定将任务P放在哪个CPU上,这个任务的util_avg = 200(平å‡åˆ©ç”¨çŽ‡ï¼‰ï¼Œ
+ prev_cpu = 0(上一次è¿è¡Œåœ¨CPU0)。
+
+ ç›®å‰CPU的利用率情况如下图所示。CPU 0-3çš„util_avg分别为400ã€100ã€600å’Œ500。
+ æ¯ä¸ªæ€§èƒ½åŸŸæœ‰ä¸‰ä¸ªæ“作性能值(OPP)。与æ¯ä¸ªOPP相关的CPU算力和功率æˆæœ¬åˆ—在能é‡
+ 模型表中。P的util_avg在图中显示为"PP"::
+
+ CPU util.
+ 1024 - - - - - - - Energy Model
+ +-----------+-------------+
+ | Little | Big |
+ 768 ============= +-----+-----+------+------+
+ | Cap | Pwr | Cap | Pwr |
+ +-----+-----+------+------+
+ 512 =========== - ##- - - - - | 170 | 50 | 512 | 400 |
+ ## ## | 341 | 150 | 768 | 800 |
+ 341 -PP - - - - ## ## | 512 | 300 | 1024 | 1700 |
+ PP ## ## +-----+-----+------+------+
+ 170 -## - - - - ## ##
+ ## ## ## ##
+ ------------ -------------
+ CPU0 CPU1 CPU2 CPU3
+
+ Current OPP: ===== Other OPP: - - - util_avg (100 each): ##
+
+
+ find_energy_efficient_cpu()将首先在两个性能域中寻找具有最大剩余算力的CPU。
+ 在这个例å­ä¸­æ˜¯CPU1å’ŒCPU3。然åŽï¼Œå®ƒå°†ä¼°ç®—,当P被放在它们中的任æ„一个时,系统的
+ 能耗,并检查这样åšæ˜¯å¦ä¼šæ¯”把P放在CPU0上节çœä¸€äº›èƒ½é‡ã€‚EASå‡å®šOPPséµå¾ªåˆ©ç”¨çŽ‡
+ (这与CPUFreq监管器schedutil的行为一致。关于这个问题的更多细节,è§ç¬¬6节)。
+
+ **情况1. P被è¿ç§»åˆ°CPU1**::
+
+ 1024 - - - - - - -
+
+ Energy calculation:
+ 768 ============= * CPU0: 200 / 341 * 150 = 88
+ * CPU1: 300 / 341 * 150 = 131
+ * CPU2: 600 / 768 * 800 = 625
+ 512 - - - - - - - ##- - - - - * CPU3: 500 / 768 * 800 = 520
+ ## ## => total_energy = 1364
+ 341 =========== ## ##
+ PP ## ##
+ 170 -## - - PP- ## ##
+ ## ## ## ##
+ ------------ -------------
+ CPU0 CPU1 CPU2 CPU3
+
+
+ **情况2. P被è¿ç§»åˆ°CPU3**::
+
+ 1024 - - - - - - -
+
+ Energy calculation:
+ 768 ============= * CPU0: 200 / 341 * 150 = 88
+ * CPU1: 100 / 341 * 150 = 43
+ PP * CPU2: 600 / 768 * 800 = 625
+ 512 - - - - - - - ##- - -PP - * CPU3: 700 / 768 * 800 = 729
+ ## ## => total_energy = 1485
+ 341 =========== ## ##
+ ## ##
+ 170 -## - - - - ## ##
+ ## ## ## ##
+ ------------ -------------
+ CPU0 CPU1 CPU2 CPU3
+
+ **情况3. Pä¾æ—§ç•™åœ¨prev_cpu/CPU0**::
+
+ 1024 - - - - - - -
+
+ Energy calculation:
+ 768 ============= * CPU0: 400 / 512 * 300 = 234
+ * CPU1: 100 / 512 * 300 = 58
+ * CPU2: 600 / 768 * 800 = 625
+ 512 =========== - ##- - - - - * CPU3: 500 / 768 * 800 = 520
+ ## ## => total_energy = 1437
+ 341 -PP - - - - ## ##
+ PP ## ##
+ 170 -## - - - - ## ##
+ ## ## ## ##
+ ------------ -------------
+ CPU0 CPU1 CPU2 CPU3
+
+ 从这些计算结果æ¥çœ‹ï¼Œæƒ…况1的总能é‡æœ€ä½Žã€‚所以从节约能é‡çš„角度看,CPU1是最佳候选
+ 者。
+
+大核通常比å°æ ¸æ›´è€—电,因此主è¦åœ¨ä»»åŠ¡ä¸é€‚åˆåœ¨å°æ ¸è¿è¡Œæ—¶ä½¿ç”¨ã€‚然而,å°æ ¸å¹¶ä¸æ€»æ˜¯æ¯”
+大核节能。举例æ¥è¯´ï¼Œå¯¹äºŽæŸäº›ç³»ç»Ÿï¼Œå°æ ¸çš„高OPPå¯èƒ½æ¯”大核的低OPP能é‡æ¶ˆè€—更高。因此,
+如果å°æ ¸åœ¨æŸä¸€ç‰¹å®šæ—¶é—´ç‚¹åˆšå¥½æœ‰è¶³å¤Ÿçš„利用率,在此刻被唤醒的å°ä»»åŠ¡æ”¾åœ¨å¤§æ ¸æ‰§è¡Œå¯èƒ½
+会更节能,尽管它在å°æ ¸ä¸Šè¿è¡Œä¹Ÿæ˜¯åˆé€‚的。
+
+å³ä½¿åœ¨å¤§æ ¸æ‰€æœ‰OPP都ä¸å¦‚å°æ ¸OPP节能的情况下,在æŸäº›ç‰¹å®šæ¡ä»¶ä¸‹ï¼Œä»¤å°ä»»åŠ¡è¿è¡Œåœ¨å¤§æ ¸
+上ä¾ç„¶å¯èƒ½èŠ‚能。事实上,将一个任务放在一个å°æ ¸ä¸Šå¯èƒ½å¯¼è‡´æ•´ä¸ªæ€§èƒ½åŸŸçš„OPPæ高,这将
+增加已ç»åœ¨è¯¥æ€§èƒ½åŸŸè¿è¡Œçš„任务的能é‡æˆæœ¬ã€‚如果唤醒的任务被放在一个大核上,它的执行
+æˆæœ¬å¯èƒ½æ¯”放在å°æ ¸ä¸Šæ›´é«˜ï¼Œä½†è¿™ä¸ä¼šå½±å“å°æ ¸ä¸Šçš„其它任务,这些任务将继续以较低的OPP
+è¿è¡Œã€‚因此,当考虑CPU消耗的总能é‡æ—¶ï¼Œåœ¨å¤§æ ¸ä¸Šè¿è¡Œä¸€ä¸ªä»»åŠ¡çš„é¢å¤–æˆæœ¬å¯èƒ½å°äºŽä¸ºæ‰€æœ‰
+其它è¿è¡Œåœ¨å°æ ¸çš„任务æ高OPPçš„æˆæœ¬ã€‚
+
+上é¢çš„例å­å‡ ä¹Žä¸å¯èƒ½ä»¥ä¸€ç§é€šç”¨çš„æ–¹å¼å¾—到正确的结果;åŒæ—¶ï¼Œå¯¹äºŽæ‰€æœ‰å¹³å°ï¼Œåœ¨ä¸çŸ¥é“
+系统所有CPUæ¯ä¸ªä¸åŒOPPçš„è¿è¡Œæˆæœ¬æ—¶ï¼Œä¹Ÿæ— æ³•å¾—到正确的结果。得益于基于EM的设计,
+EAS应该能够正确处ç†è¿™äº›é—®é¢˜è€Œä¸ä¼šå¼•å‘太多麻烦。然而,为了确ä¿å¯¹é«˜åˆ©ç”¨çŽ‡åœºæ™¯çš„
+åžå率造æˆçš„å½±å“最å°åŒ–,EASåŒæ ·å®žçŽ°äº†å¦å¤–一ç§å«â€œè¿‡åº¦åˆ©ç”¨çŽ‡â€çš„机制。
+
+
+5. 过度利用率
+-------------
+
+从一般的角度æ¥çœ‹ï¼ŒEAS能æ供最大帮助的是那些涉åŠä½Žã€ä¸­CPU利用率的使用场景。æ¯å½“CPU
+密集型的长任务è¿è¡Œï¼Œå®ƒä»¬å°†éœ€è¦æ‰€æœ‰çš„å¯ç”¨CPU算力,调度器将没有什么办法æ¥èŠ‚çœèƒ½é‡åŒæ—¶
+åˆä¸æŸå®³åžå率。为了é¿å…EASæŸå®³æ€§èƒ½ï¼Œä¸€æ—¦CPU被使用的算力超过80%,它将被标记为“过度
+利用â€ã€‚åªè¦æ ¹åŸŸä¸­æ²¡æœ‰CPU是过度利用状æ€ï¼Œè´Ÿè½½å‡è¡¡è¢«ç¦ç”¨ï¼Œè€ŒEAS将覆盖唤醒平衡代ç ã€‚
+EAS很å¯èƒ½å°†è´Ÿè½½æ”¾ç½®åœ¨ç³»ç»Ÿä¸­èƒ½é‡æ•ˆçŽ‡æœ€é«˜çš„CPU而ä¸æ˜¯å…¶å®ƒCPU上,åªè¦ä¸æŸå®³åžå率。
+因此,负载å‡è¡¡å™¨è¢«ç¦ç”¨ä»¥é˜²æ­¢å®ƒæ‰“ç ´EASå‘现的节能任务放置。当系统未处于过度利用状æ€æ—¶ï¼Œ
+这样åšæ˜¯å®‰å…¨çš„,因为低于80%的临界点æ„味ç€ï¼š
+
+ a. 所有的CPU都有一些空闲时间,所以EAS使用的利用率信å·å¾ˆå¯èƒ½å‡†ç¡®åœ°ä»£è¡¨å„ç§ä»»åŠ¡
+ 的“大å°â€ã€‚
+ b. 所有任务,ä¸ç®¡å®ƒä»¬çš„nice值是多大,都应该被æ供了足够多的CPU算力。
+ c. 既然有多余的算力,那么所有的任务都必须定期阻塞/休眠,在唤醒时进行平衡就足够
+ 了。
+
+åªè¦ä¸€ä¸ªCPU利用率超过80%的临界点,上述三个å‡è®¾ä¸­è‡³å°‘有一个是ä¸æ­£ç¡®çš„。在这ç§æƒ…况下,
+整个根域的“过度利用â€æ ‡å¿—被设置,EAS被ç¦ç”¨ï¼Œè´Ÿè½½å‡è¡¡å™¨è¢«é‡æ–°å¯ç”¨ã€‚通过这样åšï¼Œè°ƒåº¦å™¨
+åˆå›žåˆ°äº†åœ¨CPU密集的æ¡ä»¶ä¸‹åŸºäºŽè´Ÿè½½çš„算法åšè´Ÿè½½å‡è¡¡ã€‚这更好地尊é‡äº†ä»»åŠ¡çš„nice值。
+
+由于过度利用率的概念在很大程度上ä¾èµ–于检测系统中是å¦æœ‰ä¸€äº›ç©ºé—²æ—¶é—´ï¼Œæ‰€ä»¥å¿…须考虑
+(比CFS)更高优先级的调度类(以åŠä¸­æ–­ï¼‰â€œçªƒå–â€çš„CPU算力。åƒè¿™æ ·ï¼Œå¯¹è¿‡åº¦ä½¿ç”¨çŽ‡çš„检测
+ä¸ä»…è¦è€ƒè™‘CFS任务使用的算力,还需è¦è€ƒè™‘其它调度类和中断。
+
+
+6. EASçš„ä¾èµ–å’Œè¦æ±‚
+------------------
+
+能é‡æ„ŸçŸ¥è°ƒåº¦ä¾èµ–系统的CPU具有特定的硬件属性,以åŠå†…核中的其它特性被å¯ç”¨ã€‚本节列出
+了这些ä¾èµ–,并对如何满足这些ä¾èµ–æ供了æ示。
+
+
+6.1 - éžå¯¹ç§°CPU拓扑
+^^^^^^^^^^^^^^^^^^^
+
+
+如简介所æ,目å‰åªæœ‰éžå¯¹ç§°CPU拓扑结构的平å°æ”¯æŒEAS。通过在è¿è¡Œæ—¶æŸ¥è¯¢
+SD_ASYM_CPUCAPACITY_FULL标志ä½æ˜¯å¦åœ¨åˆ›å»ºè°ƒåº¦åŸŸæ—¶å·²è®¾ç½®æ¥æ£€æŸ¥è¿™ä¸€è¦æ±‚是å¦æ»¡è¶³ã€‚
+
+å‚阅Documentation/scheduler/sched-capacity.rst以了解在sched_domain层次结构
+中设置此标志ä½æ‰€éœ€æ»¡è¶³çš„è¦æ±‚。
+
+请注æ„,EAS并éžä»Žæ ¹æœ¬ä¸Šä¸ŽSMPä¸å…¼å®¹ï¼Œä½†åœ¨SMPå¹³å°ä¸Šè¿˜æ²¡æœ‰è§‚察到明显的节能。这一
+é™åˆ¶å¯ä»¥åœ¨å°†æ¥è¿›è¡Œä¿®æ”¹ï¼Œå¦‚果被è¯æ˜Žä¸æ˜¯è¿™æ ·çš„è¯ã€‚
+
+
+6.2 - 当å‰çš„能é‡æ¨¡åž‹
+^^^^^^^^^^^^^^^^^^^^
+
+EAS使用一个平å°çš„EMæ¥ä¼°ç®—调度决策对能é‡çš„å½±å“。因此,你的平å°å¿…é¡»å‘EM框架æä¾›
+能é‡æˆæœ¬è¡¨ï¼Œä»¥å¯åŠ¨EAS。è¦åšåˆ°è¿™ä¸€ç‚¹ï¼Œè¯·å‚阅文档
+Documentation/power/energy-model.rst中的独立EM框架部分。
+
+å¦è¯·æ³¨æ„,调度域需è¦åœ¨EM注册åŽé‡å»ºï¼Œä»¥ä¾¿å¯åŠ¨EAS。
+
+EAS使用EM对能é‡ä½¿ç”¨çŽ‡è¿›è¡Œé¢„测决策,因此它在检查任务放置的å¯èƒ½é€‰é¡¹æ—¶æ›´åŠ æ³¨é‡
+差异。对于EASæ¥è¯´ï¼ŒEM的功率值是以毫瓦还是以“抽象刻度â€ä¸ºå•ä½è¡¨ç¤ºå¹¶ä¸é‡è¦ã€‚
+
+
+
+6.3 - 能é‡æ¨¡åž‹å¤æ‚度
+^^^^^^^^^^^^^^^^^^^^
+
+任务唤醒路径是时延æ•æ„Ÿçš„。当一个平å°çš„EM太å¤æ‚(太多CPU,太多性能域,太多状æ€
+等),在唤醒路径中使用它的æˆæœ¬å°±ä¼šå‡é«˜åˆ°ä¸å¯æŽ¥å—。能é‡æ„ŸçŸ¥å”¤é†’算法的å¤æ‚度为:
+
+ C = Nd * (Nc + Ns)
+
+其中:Nd是性能域的数é‡ï¼›Nc是CPUçš„æ•°é‡ï¼›Ns是OPP的总数(例如:对于两个性能域,
+æ¯ä¸ªåŸŸæœ‰4个OPP,则Ns = 8)。
+
+当调度域建立时,å¤æ‚性检查是在根域上进行的。如果一个根域的å¤æ‚度Cæ°å¥½é«˜äºŽå®Œå…¨
+主观设定的EM_MAX_COMPLEXITY阈值(在本文写作时,是2048),则EASä¸ä¼šåœ¨æ­¤æ ¹åŸŸ
+å¯åŠ¨ã€‚
+
+如果你的平å°çš„能é‡æ¨¡åž‹çš„å¤æ‚度太高,EAS无法在这个根域上使用,但你真的想用,
+那么你就åªå‰©ä¸‹ä¸¤ä¸ªå¯èƒ½çš„选择:
+
+ 1. 将你的系统拆分æˆåˆ†ç¦»çš„ã€è¾ƒå°çš„ã€ä½¿ç”¨ç‹¬å cpuset的根域,并在æ¯ä¸ªæ ¹åŸŸå±€éƒ¨
+ å¯ç”¨EAS。这个方案的好处是开箱å³ç”¨ï¼Œä½†ç¼ºç‚¹æ˜¯æ— æ³•åœ¨æ ¹åŸŸä¹‹é—´å®žçŽ°è´Ÿè½½å‡è¡¡ï¼Œ
+ è¿™å¯èƒ½ä¼šå¯¼è‡´æ€»ä½“系统负载ä¸å‡è¡¡ã€‚
+ 2. æ交补ä¸ä»¥é™ä½ŽEAS唤醒算法的å¤æ‚度,从而使其能够在åˆç†çš„时间内处ç†æ›´å¤§
+ 的EM。
+
+
+6.4 - Schedutil监管器
+^^^^^^^^^^^^^^^^^^^^^
+
+EAS试图预测CPU在ä¸ä¹…çš„å°†æ¥ä¼šåœ¨å“ªä¸ªOPP下è¿è¡Œï¼Œä»¥ä¼°ç®—它们的能é‡æ¶ˆè€—。为了åšåˆ°
+这一点,它å‡å®šCPUçš„OPPè·ŸéšCPU利用率å˜åŒ–而å˜åŒ–。
+
+尽管在实践中很难对这一å‡è®¾çš„准确性æ供硬性ä¿è¯ï¼ˆå› ä¸ºï¼Œä¸¾ä¾‹æ¥è¯´ç¡¬ä»¶å¯èƒ½ä¸ä¼šåš
+它被è¦æ±‚åšçš„事情),相对于其他CPUFreq监管器,schedutil至少_请求_使用利用率
+ä¿¡å·è®¡ç®—的频率。因此,与EAS一起使用的唯一åˆç†çš„监管器是schedutil,因为它是
+唯一一个在频率请求和能é‡é¢„测之间æä¾›æŸç§ç¨‹åº¦çš„一致性的监管器。
+
+ä¸æ”¯æŒå°†EAS与schedutil之外的任何其它监管器一起使用。
+
+
+6.5 刻度ä¸å˜æ€§ä½¿ç”¨çŽ‡ä¿¡å·
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+为了对ä¸åŒçš„CPU和所有的性能状æ€åšå‡ºå‡†ç¡®çš„预测,EAS需è¦é¢‘率ä¸å˜çš„å’ŒCPUä¸å˜çš„
+PELTä¿¡å·ã€‚这些信å·å¯ä»¥é€šè¿‡æ¯ä¸ªä½“系结构定义的arch_scale{cpu,freq}_capacity()
+回调函数获å–。
+
+ä¸æ”¯æŒåœ¨æ²¡æœ‰å®žçŽ°è¿™ä¸¤ä¸ªå›žè°ƒå‡½æ•°çš„å¹³å°ä¸Šä½¿ç”¨EAS。
+
+
+6.6 多线程(SMT)
+^^^^^^^^^^^^^^^^^
+
+当å‰å®žçŽ°çš„EAS是ä¸æ„ŸçŸ¥SMT的,因此无法利用多线程硬件节约能é‡ã€‚EAS认为线程是独立的
+CPU,这实际上对性能和能é‡æ¶ˆè€—都是ä¸åˆ©çš„。
+
+ä¸æ”¯æŒåœ¨SMT上使用EAS。
diff --git a/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
new file mode 100644
index 000000000000..9107f0c0b979
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-nice-design.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=====================
+调度器nice值设计
+=====================
+
+本文档解释了新的Linux调度器中修改和精简åŽçš„nice级别的实现æ€è·¯ã€‚
+
+Linuxçš„nice级别总是éžå¸¸è„†å¼±ï¼Œäººä»¬æŒç»­ä¸æ–­åœ°ç¼ ç€æˆ‘们,让nice +19的任务å ç”¨
+更少的CPU时间。
+
+ä¸å¹¸çš„是,在旧的调度器中,这ä¸æ˜¯é‚£ä¹ˆå®¹æ˜“实现的(å¦åˆ™æˆ‘们早就åšåˆ°äº†ï¼‰ï¼Œå› ä¸ºå¯¹
+nice级别的支æŒåœ¨åŽ†å²ä¸Šæ˜¯ä¸Žæ—¶é—´ç‰‡é•¿åº¦è€¦åˆçš„,而时间片å•ä½æ˜¯ç”±HZ滴答驱动的,
+所以最å°çš„时间片是1/HZ。
+
+在O(1)调度器中(2003年),我们改å˜äº†è´Ÿçš„nice级别,使它们比2.4内核更强
+(人们对这一å˜åŒ–很满æ„),而且我们还故æ„校正了线性时间片准则,使得nice +19
+的级别 _正好_ 是1 jiffy。为了让大家更好地ç†è§£å®ƒï¼Œæ—¶é—´ç‰‡çš„图会是这样的(质é‡
+ä¸ä½³çš„ASCII艺术æ醒ï¼ï¼‰::
+
+
+ A
+ \ | [timeslice length]
+ \ |
+ \ |
+ \ |
+ \ |
+ \|___100msecs
+ |^ . _
+ | ^ . _
+ | ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20 | +19
+ |
+ |
+
+因此,如果有人真的想renice任务,相较线性规则,+19会给出更大的效果(改å˜
+ABIæ¥æ‰©å±•ä¼˜å…ˆçº§çš„解决方案在早期就被放弃了)。
+
+è¿™ç§æ–¹æ³•åœ¨ä¸€å®šç¨‹åº¦ä¸Šå¥æ•ˆäº†ä¸€æ®µæ—¶é—´ï¼Œä½†åŽæ¥HZ=1000时,它导致1 jiffy为
+1ms,这æ„味ç€0.1%çš„CPU使用率,我们认为这有点过度。过度 _ä¸æ˜¯_ 因为它表示
+çš„CPU使用率过å°ï¼Œè€Œæ˜¯å› ä¸ºå®ƒå¼•å‘了过于频ç¹ï¼ˆæ¯æ¯«ç§’1次)的é‡æ–°è°ƒåº¦ï¼ˆå› æ­¤ä¼š
+ç ´å缓存,等等。请记ä½ï¼Œç¡¬ä»¶æ›´å¼±ã€cacheæ›´å°æ˜¯å¾ˆä¹…以å‰çš„事了,当时人们在
+nice +19级别è¿è¡Œæ•°é‡é¢‡å¤šçš„应用程åºï¼‰ã€‚
+
+因此,对于HZ=1000,我们将nice +19改为5毫秒,因为这感觉åƒæ˜¯æ­£ç¡®çš„最å°
+粒度——这相当于5%çš„CPU利用率。但nice +19的根本的HZæ•æ„Ÿå±žæ€§ä¾ç„¶ä¿æŒä¸å˜ï¼Œ
+我们没有收到过关于nice +19在CPU利用率方é¢å¤ª _å¼±_ 的任何抱怨,我们åªæ”¶åˆ°
+过它(ä¾ç„¶ï¼‰å¤ª _强_ 的抱怨 :-)。
+
+总结一下:我们一直想让niceå„级别一致性更强,但在HZå’Œjiffiesçš„é™åˆ¶ä¸‹ï¼Œä»¥åŠ
+nice级别与时间片ã€è°ƒåº¦ç²’度耦åˆæ˜¯ä»¤äººè®¨åŽŒçš„设计,这一目标并ä¸çœŸæ­£å¯è¡Œã€‚
+
+第二个关于Linux nice级别支æŒçš„抱怨是(ä¸é‚£ä¹ˆé¢‘ç¹ï¼Œä½†ä»ç„¶å®šæœŸå‘生),它
+在原点周围的ä¸å¯¹ç§°æ€§ï¼ˆä½ å¯ä»¥åœ¨ä¸Šé¢çš„图片中看到),或者更准确地说:事实上
+nice级别的行为å–决于 _ç»å¯¹çš„_ nice级别,而nice应用程åºæŽ¥å£æœ¬èº«ä»Žæ ¹æœ¬ä¸Š
+说是“相对â€çš„:
+
+ int nice(int inc);
+
+ asmlinkage long sys_nice(int increment)
+
+(第一个是glibc的应用程åºæŽ¥å£ï¼Œç¬¬äºŒä¸ªæ˜¯syscall的应用程åºæŽ¥å£ï¼‰
+注æ„,“incâ€æ˜¯ç›¸å¯¹å½“å‰nice级别而言的,类似bash的“niceâ€å‘½ä»¤ç­‰å·¥å…·æ˜¯è¿™ä¸ª
+相对性应用程åºæŽ¥å£çš„é•œåƒã€‚
+
+在旧的调度器中,举例æ¥è¯´ï¼Œå¦‚果你以nice +1å¯åŠ¨ä¸€ä¸ªä»»åŠ¡ï¼Œå¹¶ä»¥nice +2å¯åŠ¨
+å¦ä¸€ä¸ªä»»åŠ¡ï¼Œè¿™ä¸¤ä¸ªä»»åŠ¡çš„CPU分é…å°†å–决于父外壳程åºçš„nice级别——如果它是
+nice -10,那么CPU的分é…ä¸åŒäºŽ+5或+10。
+
+第三个关于Linux nice级别支æŒçš„抱怨是,负数nice级别“ä¸å¤Ÿæœ‰åŠ›â€ï¼Œä»¥å¾ˆå¤šäºº
+ä¸å¾—ä¸è¯‰è¯¸äºŽå®žæ—¶è°ƒåº¦ä¼˜å…ˆçº§æ¥è¿è¡ŒéŸ³é¢‘(和其它多媒体)应用程åºï¼Œæ¯”如
+SCHED_FIFO。但这也造æˆäº†å…¶å®ƒé—®é¢˜ï¼šSCHED_FIFO未被è¯æ˜Žæ˜¯å…于饥饿的,一个
+有问题的SCHED_FIFO应用程åºä¹Ÿä¼šé”ä½è¿è¡Œè‰¯å¥½çš„系统。
+
+v2.6.23版内核的新调度器解决了这三ç§ç±»åž‹çš„抱怨:
+
+为了解决第一个抱怨(nice级别ä¸å¤Ÿâ€œæœ‰åŠ›â€ï¼‰ï¼Œè°ƒåº¦å™¨ä¸Žâ€œæ—¶é—´ç‰‡â€ã€HZ的概念
+解耦(调度粒度被处ç†æˆä¸€ä¸ªå’Œnice级别独立的概念),因此有å¯èƒ½å®žçŽ°æ›´å¥½ã€
+更一致的nice +19支æŒï¼šåœ¨æ–°çš„调度器中,nice +19的任务得到一个HZ无关的
+1.5%CPU使用率,而ä¸æ˜¯æ—§ç‰ˆè°ƒåº¦å™¨ä¸­3%-5%-9%çš„å¯å˜èŒƒå›´ã€‚
+
+为了解决第二个抱怨(niceå„级别ä¸ä¸€è‡´ï¼‰ï¼Œæ–°è°ƒåº¦å™¨ä»¤è°ƒç”¨nice(1)对å„任务的
+CPU利用率有相åŒçš„å½±å“,无论其ç»å¯¹nice级别如何。所以在新调度器上,è¿è¡Œä¸€ä¸ª
+nice +10和一个nice +11的任务会与è¿è¡Œä¸€ä¸ªnice -5和一个nice -4的任务的
+CPU利用率分割是相åŒçš„(一个会得到55%çš„CPU,å¦ä¸€ä¸ªä¼šå¾—到45%)。这是为什么
+nice级别被改为“乘法â€ï¼ˆæˆ–指数)——这样的è¯ï¼Œä¸ç®¡ä½ ä»Žå“ªä¸ªçº§åˆ«å¼€å§‹ï¼Œâ€œç›¸å¯¹â€
+结果将总是一样的。
+
+第三个抱怨(负数nice级别ä¸å¤Ÿâ€œæœ‰åŠ›â€ï¼Œå¹¶è¿«ä½¿éŸ³é¢‘应用程åºåœ¨æ›´å±é™©çš„
+SCHED_FIFO调度策略下è¿è¡Œï¼‰å‡ ä¹Žè¢«æ–°çš„调度器自动解决了:更强的负数级别
+具有é‡æ–°æ ¡æ­£nice级别动æ€èŒƒå›´çš„自动化副作用。
diff --git a/Documentation/translations/zh_CN/scheduler/sched-stats.rst b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
new file mode 100644
index 000000000000..1c68c3d1c283
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-stats.rst
+
+:翻译:
+
+ å”艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+==============
+调度器统计数æ®
+==============
+
+第15版schedstats去掉了sched_yield的一些计数器:yld_exp_empty,yld_act_empty
+å’Œyld_both_empty。在其它方é¢å’Œç¬¬14版完全相åŒã€‚
+
+第14版schedstats包括对sched_domains(译注:调度域)的支æŒï¼Œè¯¥ç‰¹æ€§è¿›å…¥å†…æ ¸
+主线2.6.20,ä¸è¿‡è¿™ä¸€ç‰ˆschedstats与2.6.13-2.6.19内核的版本12的统计数æ®æ˜¯å®Œå…¨
+相åŒçš„(内核未å‘布第13版)。有些计数器按æ¯ä¸ªè¿è¡Œé˜Ÿåˆ—统计是更有æ„义的,其它则
+按æ¯ä¸ªè°ƒåº¦åŸŸç»Ÿè®¡æ˜¯æ›´æœ‰æ„义的。注æ„,调度域(以åŠå®ƒä»¬çš„附属信æ¯ï¼‰ä»…在开å¯
+CONFIG_SMP的机器上是相关的和å¯ç”¨çš„。
+
+在第14版schedstat中,æ¯ä¸ªè¢«åˆ—出的CPU至少会有一级域统计数æ®ï¼Œä¸”很å¯èƒ½æœ‰ä¸€ä¸ª
+以上的域。在这个实现中,域没有特别的å字,但是编å·æœ€é«˜çš„域通常在机器上所有的
+CPU上仲è£å¹³è¡¡ï¼Œè€Œdomain0是最紧密èšç„¦çš„域,有时仅在一对CPU之间进行平衡。此时,
+没有任何体系结构需è¦3层以上的域。域统计数æ®ä¸­çš„第一个字段是一个ä½å›¾ï¼Œè¡¨æ˜Žå“ªäº›
+CPUå—该域的影å“。
+
+这些字段是计数器,而且åªèƒ½é€’增。使用这些字段的程åºå°†éœ€è¦ä»ŽåŸºçº¿è§‚测开始,然åŽåœ¨
+åŽç»­æ¯ä¸€ä¸ªè§‚测中计算出计数器的å˜åŒ–。一个能以这ç§æ–¹å¼å¤„ç†å…¶ä¸­å¾ˆå¤šå­—段的perl脚本
+å¯è§
+
+ http://eaglet.pdxhosts.com/rick/linux/schedstat/
+
+请注æ„,任何这样的脚本都必须是特定于版本的,改å˜ç‰ˆæœ¬çš„主è¦åŽŸå› æ˜¯è¾“出格å¼çš„å˜åŒ–。
+对于那些希望编写自己的脚本的人,å¯ä»¥å‚考这里æè¿°çš„å„个字段。
+
+CPU统计数æ®
+-----------
+cpu<N> 1 2 3 4 5 6 7 8 9
+
+第一个字段是sched_yield()的统计数æ®ï¼š
+
+ 1) sched_yield()被调用了#次
+
+接下æ¥çš„三个是schedule()的统计数æ®ï¼š
+
+ 2) 这个字段是一个过时的数组过期计数,在O(1)调度器中使用。为了ABI兼容性,
+ 我们ä¿ç•™äº†å®ƒï¼Œä½†å®ƒæ€»æ˜¯è¢«è®¾ç½®ä¸º0。
+ 3) schedule()被调用了#次
+ 4) 调用schedule()导致处ç†å™¨å˜ä¸ºç©ºé—²äº†#次
+
+接下æ¥çš„两个是try_to_wake_up()的统计数æ®ï¼š
+
+ 5) try_to_wake_up()被调用了#次
+ 6) 调用try_to_wake_up()导致本地CPU被唤醒了#次
+
+接下æ¥çš„三个统计数æ®æ述了调度延迟:
+
+ 7) 本处ç†å™¨è¿è¡Œä»»åŠ¡çš„总时间,å•ä½æ˜¯jiffies
+ 8) 本处ç†å™¨ä»»åŠ¡ç­‰å¾…è¿è¡Œçš„时间,å•ä½æ˜¯jiffies
+ 9) 本CPUè¿è¡Œäº†#个时间片
+
+域统计数æ®
+----------
+
+对于æ¯ä¸ªè¢«æè¿°çš„CPU,和它相关的æ¯ä¸€ä¸ªè°ƒåº¦åŸŸå‡ä¼šäº§ç”Ÿä¸‹é¢ä¸€è¡Œæ•°æ®ï¼ˆæ³¨æ„,如果
+CONFIG_SMP没有被定义,那么*没有*调度域被使用,这些行ä¸ä¼šå‡ºçŽ°åœ¨è¾“出中)。
+
+domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+
+第一个字段是一个ä½æŽ©ç ï¼Œè¡¨æ˜Žè¯¥åŸŸåœ¨æ“作哪些CPU。
+
+接下æ¥çš„24个字段是load_balance()函数的å„个统计数æ®ï¼ŒæŒ‰ç©ºé—²ç±»åž‹åˆ†ç»„(空闲,
+ç¹å¿™ï¼Œæ–°ç©ºé—²ï¼‰ï¼š
+
+
+ 1) 当CPU空闲时,load_balance()在这个调度域中被调用了#次
+ 2) 当CPU空闲时,load_balance()在这个调度域中被调用,但是å‘现负载无需
+ å‡è¡¡#次
+ 3) 当CPU空闲时,load_balance()在这个调度域中被调用,试图è¿ç§»1个或更多
+ 任务且失败了#次
+ 4) 当CPU空闲时,load_balance()在这个调度域中被调用,å‘现ä¸å‡è¡¡ï¼ˆå¦‚果有)
+ #次
+ 5) 当CPU空闲时,pull_task()在这个调度域中被调用#次
+ 6) 当CPU空闲时,尽管目标任务是热缓存状æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次
+ 7) 当CPU空闲时,load_balance()在这个调度域中被调用,未能找到更ç¹å¿™çš„
+ 队列#次
+ 8) 当CPU空闲时,在调度域中找到了更ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组
+ #次
+ 9) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域中被调用了#次
+ 10) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域中被调用,但是å‘现负载无需
+ å‡è¡¡#次
+ 11) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域中被调用,试图è¿ç§»1个或更多
+ 任务且失败了#次
+ 12) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域中被调用,å‘现ä¸å‡è¡¡ï¼ˆå¦‚果有)
+ #次
+ 13) 当CPUç¹å¿™æ—¶ï¼Œpull_task()在这个调度域中被调用#次
+ 14) 当CPUç¹å¿™æ—¶ï¼Œå°½ç®¡ç›®æ ‡ä»»åŠ¡æ˜¯çƒ­ç¼“存状æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次
+ 15) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域中被调用,未能找到更ç¹å¿™çš„
+ 队列#次
+ 16) 当CPUç¹å¿™æ—¶ï¼Œåœ¨è°ƒåº¦åŸŸä¸­æ‰¾åˆ°äº†æ›´ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组
+ #次
+ 17) 当CPU新空闲时,load_balance()在这个调度域中被调用了#次
+ 18) 当CPU新空闲时,load_balance()在这个调度域中被调用,但是å‘现负载无需
+ å‡è¡¡#次
+ 19) 当CPU新空闲时,load_balance()在这个调度域中被调用,试图è¿ç§»1个或更多
+ 任务且失败了#次
+ 20) 当CPU新空闲时,load_balance()在这个调度域中被调用,å‘现ä¸å‡è¡¡ï¼ˆå¦‚果有)
+ #次
+ 21) 当CPU新空闲时,pull_task()在这个调度域中被调用#次
+ 22) 当CPU新空闲时,尽管目标任务是热缓存状æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次
+ 23) 当CPU新空闲时,load_balance()在这个调度域中被调用,未能找到更ç¹å¿™çš„
+ 队列#次
+ 24) 当CPU新空闲时,在调度域中找到了更ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组
+ #次
+
+接下æ¥çš„3个字段是active_load_balance()函数的å„个统计数æ®ï¼š
+
+ 25) active_load_balance()被调用了#次
+ 26) active_load_balance()被调用,试图è¿ç§»1个或更多任务且失败了#次
+ 27) active_load_balance()被调用,æˆåŠŸè¿ç§»äº†#次任务
+
+接下æ¥çš„3个字段是sched_balance_exec()函数的å„个统计数æ®ï¼š
+
+ 28) sbe_cntä¸å†è¢«ä½¿ç”¨
+ 29) sbe_balancedä¸å†è¢«ä½¿ç”¨
+ 30) sbe_pushedä¸å†è¢«ä½¿ç”¨
+
+接下æ¥çš„3个字段是sched_balance_fork()函数的å„个统计数æ®ï¼š
+
+ 31) sbf_cntä¸å†è¢«ä½¿ç”¨
+ 32) sbf_balancedä¸å†è¢«ä½¿ç”¨
+ 33) sbf_pushedä¸å†è¢«ä½¿ç”¨
+
+接下æ¥çš„3个字段是try_to_wake_up()函数的å„个统计数æ®ï¼š
+
+ 34) 在这个调度域中调用try_to_wake_up()唤醒任务时,任务在调度域中一个
+ 和上次è¿è¡Œä¸åŒçš„æ–°CPU上è¿è¡Œäº†#次
+ 35) 在这个调度域中调用try_to_wake_up()唤醒任务时,任务被è¿ç§»åˆ°å‘生唤醒
+ çš„CPU次数为#,因为该任务在原CPU是冷缓存状æ€
+ 36) 在这个调度域中调用try_to_wake_up()唤醒任务时,引å‘被动负载å‡è¡¡#次
+
+/proc/<pid>/schedstat
+---------------------
+schedstats还添加了一个新的/proc/<pid>/schedstat文件,æ¥æ供一些进程级的
+相åŒä¿¡æ¯ã€‚这个文件中,有三个字段与该进程相关:
+
+ 1) 在CPU上è¿è¡ŒèŠ±è´¹çš„时间
+ 2) 在è¿è¡Œé˜Ÿåˆ—上等待的时间
+ 3) 在CPU上è¿è¡Œäº†#个时间片
+
+å¯ä»¥å¾ˆå®¹æ˜“地编写一个程åºï¼Œåˆ©ç”¨è¿™äº›é¢å¤–的字段æ¥æŠ¥å‘Šä¸€ä¸ªç‰¹å®šçš„进程或一组进程在
+调度器策略下的表现如何。这样的程åºçš„一个简å•ç‰ˆæœ¬å¯åœ¨ä¸‹é¢çš„链接找到
+
+ http://eaglet.pdxhosts.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/translations/zh_CN/vm/active_mm.rst b/Documentation/translations/zh_CN/vm/active_mm.rst
new file mode 100644
index 000000000000..366609ea4f37
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/active_mm.rst
@@ -0,0 +1,85 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/active_mm.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=========
+Active MM
+=========
+
+这是一å°linux之父回å¤å¼€å‘者的一å°é‚®ä»¶ï¼Œæ‰€ä»¥ç¿»è¯‘时我尽é‡ä¿æŒé‚®ä»¶æ ¼å¼çš„完整。
+
+::
+
+ List: linux-kernel
+ Subject: Re: active_mm
+ From: Linus Torvalds <torvalds () transmeta ! com>
+ Date: 1999-07-30 21:36:24
+
+ 因为我并ä¸ç»å¸¸å†™è§£é‡Šï¼Œæ‰€ä»¥å·²ç»æŠ„é€åˆ°linux-kernel邮件列表,而当我åšè¿™äº›ï¼Œ
+ 且更多的人在阅读它们时,我觉得棒æžäº†ã€‚
+
+ 1999å¹´7月30æ—¥ 星期五, David Mosberger 写é“:
+ >
+ > 是å¦æœ‰ä¸€ä¸ªç®€çŸ­çš„æ述,说明task_struct中的
+ > "mm" 和 "active_mm"应该如何使用? (如果
+ > 这个问题在邮件列表中讨论过,我表示歉æ„--我刚
+ > 刚度å‡å›žæ¥ï¼Œæœ‰ä¸€æ®µæ—¶é—´æ²¡èƒ½å…³æ³¨linux-kernel了)。
+
+ 基本上,新的设定是:
+
+ - 我们有“真实地å€ç©ºé—´â€å’Œâ€œåŒ¿å地å€ç©ºé—´â€ã€‚区别在于,匿å地å€ç©ºé—´æ ¹æœ¬ä¸å…³å¿ƒç”¨
+ 户级页表,所以当我们åšä¸Šä¸‹æ–‡åˆ‡æ¢åˆ°åŒ¿å地å€ç©ºé—´æ—¶ï¼Œæˆ‘们åªæ˜¯è®©ä»¥å‰çš„地å€ç©ºé—´
+ 处于活动状æ€ã€‚
+
+ 一个“匿å地å€ç©ºé—´â€çš„明显用途是任何ä¸éœ€è¦ä»»ä½•ç”¨æˆ·æ˜ å°„的线程--所有的内核线
+ 程基本上都属于这一类,但å³ä½¿æ˜¯â€œçœŸæ­£çš„â€çº¿ç¨‹ä¹Ÿå¯ä»¥æš‚时说在一定时间内它们ä¸
+ 会对用户空间感兴趣,调度器ä¸å¦¨è¯•ç€é¿å…在切æ¢VM状æ€ä¸Šæµªè´¹æ—¶é—´ã€‚ç›®å‰åªæœ‰è€
+ å¼çš„bdflush sync能åšåˆ°è¿™ä¸€ç‚¹ã€‚
+
+ - “tsk->mmâ€ æŒ‡å‘ â€œçœŸå®žåœ°å€ç©ºé—´â€ã€‚对于一个匿å进程æ¥è¯´ï¼Œtsk->mm将是NULL,
+ 其逻辑原因是匿å进程实际上根本就 “没有†真正的地å€ç©ºé—´ã€‚
+
+ - 然而,我们显然需è¦è·Ÿè¸ªæˆ‘们为这样的匿å用户“å·ç”¨â€äº†å“ªä¸ªåœ°å€ç©ºé—´ã€‚为此,我们
+ 有 “tsk->active_mmâ€ï¼Œå®ƒæ˜¾ç¤ºäº†å½“å‰æ´»åŠ¨çš„地å€ç©ºé—´æ˜¯ä»€ä¹ˆã€‚
+
+ 规则是,对于一个有真实地å€ç©ºé—´çš„进程(å³tsk->mm是 non-NULL),active_mm
+ 显然必须与真实的mm相åŒã€‚
+
+ 对于一个匿å进程,tsk->mm == NULL,而tsk->active_mm是匿å进程è¿è¡Œæ—¶
+ “借用â€çš„mm。当匿å进程被调度走时,借用的地å€ç©ºé—´è¢«è¿”回并清除。
+
+ 为了支æŒæ‰€æœ‰è¿™äº›ï¼Œâ€œstruct mm_structâ€çŽ°åœ¨æœ‰ä¸¤ä¸ªè®¡æ•°å™¨ï¼šä¸€ä¸ªæ˜¯ “mm_usersâ€
+ 计数器,å³æœ‰å¤šå°‘ “真正的地å€ç©ºé—´ç”¨æˆ·â€ï¼Œå¦ä¸€ä¸ªæ˜¯ “mm_countâ€è®¡æ•°å™¨ï¼Œå³ “lazyâ€
+ 用户(å³åŒ¿å用户)的数é‡ï¼Œå¦‚果有任何真正的用户,则加1。
+
+ 通常情况下,至少有一个真正的用户,但也å¯èƒ½æ˜¯çœŸæ­£çš„用户在å¦ä¸€ä¸ªCPU上退出,而
+ 一个lazy的用户ä»åœ¨æ´»åŠ¨ï¼Œæ‰€ä»¥ä½ å®žé™…上得到的情况是,你有一个地å€ç©ºé—´ **åª**
+ 被lazy的用户使用。这通常是一个短暂的生命周期状æ€ï¼Œå› ä¸ºä¸€æ—¦è¿™ä¸ªçº¿ç¨‹è¢«å®‰æŽ’给一
+ 个真正的线程,这个 “僵尸†mm就会被释放,因为 “mm_countâ€å˜æˆäº†é›¶ã€‚
+
+ å¦å¤–,一个新的规则是,**没有人** å†æŠŠ “init_mm†作为一个真正的MM了。
+ “init_mmâ€åº”该被认为åªæ˜¯ä¸€ä¸ª “没有其他上下文时的lazy上下文â€ï¼Œäº‹å®žä¸Šï¼Œå®ƒä¸»
+ è¦æ˜¯åœ¨å¯åŠ¨æ—¶ä½¿ç”¨ï¼Œå½“时还没有真正的VM被创建。因此,用æ¥æ£€æŸ¥çš„代ç 
+
+ if (current->mm == &init_mm)
+
+ 一般æ¥è¯´ï¼Œåº”该用
+
+ if (!current->mm)
+
+ å–代上é¢çš„写法(这更有æ„义--测试基本上是 “我们是å¦æœ‰ä¸€ä¸ªç”¨æˆ·çŽ¯å¢ƒâ€ï¼Œå¹¶ä¸”通常
+ 由缺页异常处ç†ç¨‹åºå’Œç±»ä¼¼çš„东西æ¥å®Œæˆï¼‰ã€‚
+
+ 总之,我刚æ‰åœ¨ftp.kernel.org上放了一个pre-patch-2.3.13-1,因为它ç¨å¾®æ”¹
+ å˜äº†æŽ¥å£ä»¥é€‚é…alpha(è°ä¼šæƒ³åˆ°å‘¢ï¼Œä½†alpha体系结构上下文切æ¢ä»£ç å®žé™…上最终是
+ 最丑陋的之一--ä¸åƒå…¶ä»–架构的MM和寄存器状æ€æ˜¯åˆ†å¼€çš„,alphaçš„PALcode将两者
+ 连接起æ¥ï¼Œä½ éœ€è¦åŒæ—¶åˆ‡æ¢ä¸¤è€…)。
+
+ (文档æ¥æº http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/translations/zh_CN/vm/balance.rst b/Documentation/translations/zh_CN/vm/balance.rst
new file mode 100644
index 000000000000..e98a47ef24a8
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/balance.rst
@@ -0,0 +1,81 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/balance.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+内存平衡
+========
+
+2000年1月开始,作者:Kanoj Sarcar <kanoj@sgi.com>
+
+对于 !__GFP_HIGH å’Œ !__GFP_KSWAPD_RECLAIM 以åŠéž __GFP_IO 的分é…,需è¦è¿›è¡Œ
+内存平衡。
+
+调用者é¿å…回收的第一个原因是调用者由于æŒæœ‰è‡ªæ—‹é”或处于中断环境中而无法ç¡çœ ã€‚第二个
+原因å¯èƒ½æ˜¯ï¼Œè°ƒç”¨è€…æ„¿æ„在ä¸äº§ç”Ÿé¡µé¢å›žæ”¶å¼€é”€çš„情况下分é…失败。这å¯èƒ½å‘生在有0阶回退
+选项的机会主义高阶分é…请求中。在这ç§æƒ…况下,调用者å¯èƒ½ä¹Ÿå¸Œæœ›é¿å…唤醒kswapd。
+
+__GFP_IO分é…请求是为了防止文件系统死é”。
+
+在没有éžç¡çœ åˆ†é…请求的情况下,åšå¹³è¡¡ä¼¼ä¹Žæ˜¯æœ‰å®³çš„。页é¢å›žæ”¶å¯ä»¥è¢«æ‡’散地å¯åŠ¨ï¼Œä¹Ÿå°±æ˜¯
+说,åªæœ‰åœ¨éœ€è¦çš„时候(也就是区域的空闲内存为0),而ä¸æ˜¯è®©å®ƒæˆä¸ºä¸€ä¸ªä¸»åŠ¨çš„过程。
+
+也就是说,内核应该å°è¯•ä»Žç›´æŽ¥æ˜ å°„池中满足对直接映射页的请求,而ä¸æ˜¯å›žé€€åˆ°dma池中,
+这样就å¯ä»¥ä¿æŒdma池为dma请求(ä¸ç®¡æ˜¯ä¸æ˜¯åŽŸå­çš„)所填充。类似的争论也适用于高内存
+和直接映射的页é¢ã€‚相å,如果有很多空闲的dma页,最好是通过从dma池中分é…一个æ¥æ»¡è¶³
+常规的内存请求,而ä¸æ˜¯äº§ç”Ÿå¸¸è§„区域平衡的开销。
+
+在2.2中,åªæœ‰å½“空闲页总数低于总内存的1/64时,æ‰ä¼šå¯åŠ¨å†…存平衡/页é¢å›žæ”¶ã€‚如果dma
+和常规内存的比例åˆé€‚,å³ä½¿dma区完全空了,也很å¯èƒ½ä¸ä¼šè¿›è¡Œå¹³è¡¡ã€‚2.2å·²ç»åœ¨ä¸åŒå†…å­˜
+大å°çš„生产机器上è¿è¡Œï¼Œå³ä½¿æœ‰è¿™ä¸ªé—®é¢˜å­˜åœ¨ï¼Œä¼¼ä¹Žä¹Ÿåšå¾—ä¸é”™ã€‚在2.3中,由于HIGHMEMçš„
+存在,这个问题å˜å¾—更加严é‡ã€‚
+
+在2.3中,区域平衡å¯ä»¥ç”¨ä¸¤ç§æ–¹å¼ä¹‹ä¸€æ¥å®Œæˆï¼šæ ¹æ®åŒºåŸŸçš„大å°ï¼ˆå¯èƒ½æ˜¯ä½Žçº§åŒºåŸŸçš„大å°ï¼‰ï¼Œ
+我们å¯ä»¥åœ¨åˆå§‹åŒ–阶段决定在平衡任何区域时应该争å–多少空闲页。好的方é¢æ˜¯ï¼Œåœ¨å¹³è¡¡çš„æ—¶
+候,我们ä¸éœ€è¦çœ‹ä½Žçº§åŒºçš„大å°ï¼Œåçš„æ–¹é¢æ˜¯ï¼Œæˆ‘们å¯èƒ½ä¼šå› ä¸ºå¿½ç•¥ä½Žçº§åŒºå¯èƒ½è¾ƒä½Žçš„使用率
+而åšè¿‡äºŽé¢‘ç¹çš„平衡。å¦å¤–,åªè¦å¯¹åˆ†é…程åºç¨ä½œä¿®æ”¹ï¼Œå°±æœ‰å¯èƒ½å°†memclass()å®ç®€åŒ–为一
+个简å•çš„ç­‰å¼ã€‚
+
+å¦ä¸€ä¸ªå¯èƒ½çš„解决方案是,我们åªåœ¨ä¸€ä¸ªåŒº **å’Œ** 其所有低级区的空闲内存低于该区åŠå…¶
+低级区总内存的1/64时进行平衡。这就解决了2.2的平衡问题,并尽å¯èƒ½åœ°ä¿æŒäº†ä¸Ž2.2行为
+的接近。å¦å¤–,平衡算法在å„ç§æž¶æž„上的工作方å¼ä¹Ÿæ˜¯ä¸€æ ·çš„,这些架构有ä¸åŒæ•°é‡å’Œç±»åž‹çš„
+内存区。如果我们想å˜å¾—更花哨一点,我们å¯ä»¥åœ¨æœªæ¥ä¸ºä¸åŒåŒºåŸŸçš„自由页é¢åˆ†é…ä¸åŒçš„æƒé‡ã€‚
+
+请注æ„,如果普通区的大å°ä¸Ždma区相比是巨大的,那么在决定是å¦å¹³è¡¡æ™®é€šåŒºçš„时候,考虑
+空闲的dma页就å˜å¾—ä¸é‚£ä¹ˆé‡è¦äº†ã€‚那么第一个解决方案就å˜å¾—更有å¸å¼•åŠ›ã€‚
+
+所附的补ä¸å®žçŽ°äº†ç¬¬äºŒä¸ªè§£å†³æ–¹æ¡ˆã€‚它还 “修å¤â€äº†ä¸¤ä¸ªé—®é¢˜ï¼šé¦–先,在低内存æ¡ä»¶ä¸‹ï¼Œkswapd
+被唤醒,就åƒ2.2中的éžç¡çœ åˆ†é…。第二,HIGHMEM区也被平衡了,以便给replace_with_highmem()
+一个争å–获得HIGHMEM页的机会,åŒæ—¶ç¡®ä¿HIGHMEM分é…ä¸ä¼šè½å›žæ™®é€šåŒºã€‚这也确ä¿äº†HIGHMEM
+页ä¸ä¼šè¢«æ³„露(例如,在一个HIGHMEM页在交æ¢ç¼“存中但没有被任何人使用的情况下)。
+
+kswapd还需è¦çŸ¥é“它应该平衡哪些区。kswapd主è¦æ˜¯åœ¨æ— æ³•è¿›è¡Œå¹³è¡¡çš„情况下需è¦çš„,å¯èƒ½
+是因为所有的分é…请求都æ¥è‡ªä¸­æ–­ä¸Šä¸‹æ–‡ï¼Œè€Œæ‰€æœ‰çš„进程上下文都在ç¡çœ ã€‚对于2.3,
+kswapd并ä¸çœŸæ­£éœ€è¦å¹³è¡¡é«˜å†…存区,因为中断上下文并ä¸è¯·æ±‚高内存页。kswapd看zone
+结构体中的zone_wake_kswapd字段æ¥å†³å®šä¸€ä¸ªåŒºæ˜¯å¦éœ€è¦å¹³è¡¡ã€‚
+
+如果从进程内存和shm中å·å–页é¢å¯ä»¥å‡è½»è¯¥é¡µé¢èŠ‚点中任何区的内存压力,而该区的内存压力
+å·²ç»ä½ŽäºŽå…¶æ°´ä½ï¼Œåˆ™ä¼šè¿›è¡Œå·å–。
+
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd:
+这些是æ¯ä¸ªåŒºçš„字段,用于确定一个区何时需è¦å¹³è¡¡ã€‚当页é¢æ•°ä½ŽäºŽæ°´ä½[WMARK_MIN]时,
+hysteric 的字段low_on_memory被设置。这个字段会一直被设置,直到空闲页数å˜æˆæ°´ä½
+[WMARK_HIGH]。当low_on_memory被设置时,页é¢åˆ†é…请求将å°è¯•é‡Šæ”¾è¯¥åŒºåŸŸçš„一些页é¢ï¼ˆå¦‚æžœ
+请求中设置了GFP_WAIT)。与此相å的是,决定唤醒kswapd以释放一些区的页。这个决定ä¸æ˜¯åŸºäºŽ
+hysteresis 的,而是当空闲页的数é‡ä½ŽäºŽwatermark[WMARK_LOW]时就会进行;在这ç§æƒ…况下,
+zone_wake_kswapd也被设置。
+
+
+我所å¬åˆ°çš„(超棒的)想法:
+
+1. 动æ€ç»åŽ†åº”该影å“平衡:å¯ä»¥è·Ÿè¸ªä¸€ä¸ªåŒºçš„失败请求的数é‡ï¼Œå¹¶å馈到平衡方案中(jalvo@mbay.net)。
+
+2. 实现一个类似于replace_with_highmem()çš„replace_with_regular(),以ä¿ç•™dma页é¢ã€‚
+ (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/translations/zh_CN/vm/damon/api.rst b/Documentation/translations/zh_CN/vm/damon/api.rst
new file mode 100644
index 000000000000..21143eea4ebe
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/api.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/api.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=======
+APIå‚考
+=======
+
+内核空间的程åºå¯ä»¥ä½¿ç”¨ä¸‹é¢çš„APIæ¥ä½¿ç”¨DAMONçš„æ¯ä¸ªåŠŸèƒ½ã€‚你所需è¦åšçš„就是引用 ``damon.h`` ,
+它ä½äºŽæºä»£ç æ ‘çš„include/linux/。
+
+结构体
+======
+
+该API在以下内核代ç ä¸­:
+
+include/linux/damon.h
+
+
+函数
+====
+
+该API在以下内核代ç ä¸­:
+
+mm/damon/core.c
diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst
new file mode 100644
index 000000000000..05f66c02740a
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/design.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/design.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+====
+设计
+====
+
+å¯é…置的层
+==========
+
+DAMONæ供了数æ®è®¿é—®ç›‘控功能,åŒæ—¶ä½¿å…¶å‡†ç¡®æ€§å’Œå¼€é”€å¯æŽ§ã€‚基本的访问监控需è¦ä¾èµ–于目标地å€ç©ºé—´
+并为之优化的基元。å¦ä¸€æ–¹é¢ï¼Œä½œä¸ºDAMON的核心,准确性和开销的æƒè¡¡æœºåˆ¶æ˜¯åœ¨çº¯é€»è¾‘空间中。DAMON
+将这两部分分离在ä¸åŒçš„层中,并定义了它的接å£ï¼Œä»¥å…许å„ç§ä½Žå±‚次的基元实现与核心逻辑的é…置。
+
+由于这ç§åˆ†ç¦»çš„设计和å¯é…置的接å£ï¼Œç”¨æˆ·å¯ä»¥é€šè¿‡é…置核心逻辑和适当的低级基元实现æ¥æ‰©å±•DAMONçš„
+任何地å€ç©ºé—´ã€‚如果没有æä¾›åˆé€‚的,用户å¯ä»¥è‡ªå·±å®žçŽ°åŸºå…ƒã€‚
+
+例如,物ç†å†…å­˜ã€è™šæ‹Ÿå†…å­˜ã€äº¤æ¢ç©ºé—´ã€é‚£äº›ç‰¹å®šçš„进程ã€NUMA节点ã€æ–‡ä»¶å’Œæ”¯æŒçš„内存设备将被支æŒã€‚
+å¦å¤–,如果æŸäº›æž¶æž„或设备支æŒç‰¹æ®Šçš„优化访问检查基元,这些基元将很容易被é…置。
+
+
+特定地å€ç©ºé—´åŸºå…ƒçš„å‚考实现
+==========================
+
+基本访问监测的低级基元被定义为两部分。:
+
+1. 确定地å€ç©ºé—´çš„监测目标地å€èŒƒå›´
+2. 目标空间中特定地å€èŒƒå›´çš„访问检查。
+
+DAMONç›®å‰ä¸ºç‰©ç†å’Œè™šæ‹Ÿåœ°å€ç©ºé—´æ供了基元的实现。下é¢ä¸¤ä¸ªå°èŠ‚æ述了这些工作的方å¼ã€‚
+
+
+基于VMA的目标地å€èŒƒå›´æž„造
+-------------------------
+
+这仅仅是针对虚拟地å€ç©ºé—´åŸºå…ƒçš„实现。对于物ç†åœ°å€ç©ºé—´ï¼Œåªæ˜¯è¦æ±‚用户手动设置监控目标地å€èŒƒå›´ã€‚
+
+在进程的超级巨大的虚拟地å€ç©ºé—´ä¸­ï¼Œåªæœ‰å°éƒ¨åˆ†è¢«æ˜ å°„到物ç†å†…存并被访问。因此,跟踪未映射的地
+å€åŒºåŸŸåªæ˜¯ä¸€ç§æµªè´¹ã€‚然而,由于DAMONå¯ä»¥ä½¿ç”¨è‡ªé€‚应区域调整机制æ¥å¤„ç†ä¸€å®šç¨‹åº¦çš„噪声,所以严
+æ ¼æ¥è¯´ï¼Œè·Ÿè¸ªæ¯ä¸€ä¸ªæ˜ å°„并ä¸æ˜¯å¿…须的,但在æŸäº›æƒ…况下甚至会产生很高的开销。也就是说,监测目标
+内部过于巨大的未映射区域应该被移除,以ä¸å ç”¨è‡ªé€‚应机制的时间。
+
+出于这个原因,这个实现将å¤æ‚的映射转æ¢ä¸ºä¸‰ä¸ªä¸åŒçš„区域,覆盖地å€ç©ºé—´çš„æ¯ä¸ªæ˜ å°„区域。这三个
+区域之间的两个空隙是给定地å€ç©ºé—´ä¸­ä¸¤ä¸ªæœ€å¤§çš„未映射区域。这两个最大的未映射区域是堆和最上é¢
+çš„mmap()区域之间的间隙,以åŠåœ¨å¤§å¤šæ•°æƒ…况下最下é¢çš„mmap()区域和堆之间的间隙。因为这些间隙
+在通常的地å€ç©ºé—´ä¸­æ˜¯å¼‚常巨大的,排除这些间隙就足以åšå‡ºåˆç†çš„æƒè¡¡ã€‚下é¢è¯¦ç»†è¯´æ˜Žäº†è¿™ä¸€ç‚¹::
+
+ <heap>
+ <BIG UNMAPPED REGION 1>
+ <uppermost mmap()-ed region>
+ (small mmap()-ed regions and munmap()-ed regions)
+ <lowermost mmap()-ed region>
+ <BIG UNMAPPED REGION 2>
+ <stack>
+
+
+基于PTE访问ä½çš„访问检查
+-----------------------
+
+物ç†å’Œè™šæ‹Ÿåœ°å€ç©ºé—´çš„实现都使用PTE Accessed-bit进行基本访问检查。唯一的区别在于从地å€ä¸­
+找到相关的PTE访问ä½çš„æ–¹å¼ã€‚虚拟地å€çš„实现是为该地å€çš„目标任务查找页表,而物ç†åœ°å€çš„实现则
+是查找与该地å€æœ‰æ˜ å°„关系的æ¯ä¸€ä¸ªé¡µè¡¨ã€‚通过这ç§æ–¹å¼ï¼Œå®žçŽ°è€…找到并清除下一个采样目标地å€çš„ä½ï¼Œ
+并检查该ä½æ˜¯å¦åœ¨ä¸€ä¸ªé‡‡æ ·å‘¨æœŸåŽå†æ¬¡è®¾ç½®ã€‚è¿™å¯èƒ½ä¼šå¹²æ‰°å…¶ä»–使用访问ä½çš„内核å­ç³»ç»Ÿï¼Œå³ç©ºé—²é¡µè·Ÿ
+踪和回收逻辑。为了é¿å…è¿™ç§å¹²æ‰°ï¼ŒDAMON使其与空闲页é¢è·Ÿè¸ªç›¸äº’排斥,并使用 ``PG_idle`` å’Œ
+``PG_young`` 页é¢æ ‡å¿—æ¥è§£å†³ä¸Žå›žæ”¶é€»è¾‘的冲çªï¼Œå°±åƒç©ºé—²é¡µé¢è·Ÿè¸ªé‚£æ ·ã€‚
+
+
+独立于地å€ç©ºé—´çš„核心机制
+========================
+
+下é¢å››ä¸ªéƒ¨åˆ†åˆ†åˆ«æ述了DAMONçš„æ ¸å¿ƒæœºåˆ¶å’Œäº”ä¸ªç›‘æµ‹å±žæ€§ï¼Œå³ ``采样间隔`` 〠``èšé›†é—´éš”`` ã€
+``区域更新间隔`` 〠``最å°åŒºåŸŸæ•°`` å’Œ ``最大区域数`` 。
+
+
+访问频率监测
+------------
+
+DAMON的输出显示了在给定的时间内哪些页é¢çš„访问频率是多少。访问频率的分辨率是通过设置
+``采样间隔`` å’Œ ``èšé›†é—´éš”`` æ¥æŽ§åˆ¶çš„。详细地说,DAMON检查æ¯ä¸ª ``采样间隔`` 对æ¯
+个页é¢çš„访问,并将结果汇总。æ¢å¥è¯è¯´ï¼Œè®¡ç®—æ¯ä¸ªé¡µé¢çš„访问次数。在æ¯ä¸ª ``èšåˆé—´éš”`` 过
+去åŽï¼ŒDAMON调用先å‰ç”±ç”¨æˆ·æ³¨å†Œçš„回调函数,以便用户å¯ä»¥é˜…读èšåˆçš„结果,然åŽå†æ¸…除这些结
+果。这å¯ä»¥ç”¨ä»¥ä¸‹ç®€å•çš„伪代ç æ¥æè¿°::
+
+ while monitoring_on:
+ for page in monitoring_target:
+ if accessed(page):
+ nr_accesses[page] += 1
+ if time() % aggregation_interval == 0:
+ for callback in user_registered_callbacks:
+ callback(monitoring_target, nr_accesses)
+ for page in monitoring_target:
+ nr_accesses[page] = 0
+ sleep(sampling interval)
+
+è¿™ç§æœºåˆ¶çš„监测开销将éšç€ç›®æ ‡å·¥ä½œè´Ÿè½½è§„模的增长而任æ„增加。
+
+
+基于区域的抽样调查
+------------------
+
+为了é¿å…开销的无é™åˆ¶å¢žåŠ ï¼ŒDAMONå°†å‡å®šå…·æœ‰ç›¸åŒè®¿é—®é¢‘率的相邻页é¢å½’入一个区域。åªè¦ä¿æŒ
+这个å‡è®¾ï¼ˆä¸€ä¸ªåŒºåŸŸå†…的页é¢å…·æœ‰ç›¸åŒçš„访问频率),该区域内就åªéœ€è¦æ£€æŸ¥ä¸€ä¸ªé¡µé¢ã€‚因此,对
+于æ¯ä¸ª ``采样间隔`` ,DAMON在æ¯ä¸ªåŒºåŸŸä¸­éšæœºæŒ‘选一个页é¢ï¼Œç­‰å¾…一个 ``采样间隔`` ,检
+查该页é¢æ˜¯å¦åŒæ—¶è¢«è®¿é—®ï¼Œå¦‚果被访问则增加该区域的访问频率。因此,监测开销是å¯ä»¥é€šè¿‡è®¾ç½®
+区域的数é‡æ¥æŽ§åˆ¶çš„。DAMONå…许用户设置最å°å’Œæœ€å¤§çš„区域数é‡æ¥è¿›è¡Œæƒè¡¡ã€‚
+
+然而,如果å‡è®¾æ²¡æœ‰å¾—到ä¿è¯ï¼Œè¿™ä¸ªæ–¹æ¡ˆå°±ä¸èƒ½ä¿æŒè¾“出的质é‡ã€‚
+
+
+适应性区域调整
+--------------
+
+å³ä½¿æœ€åˆçš„监测目标区域被很好地构建以满足å‡è®¾ï¼ˆåŒä¸€åŒºåŸŸå†…的页é¢å…·æœ‰ç›¸ä¼¼çš„访问频率),数
+æ®è®¿é—®æ¨¡å¼ä¹Ÿä¼šè¢«åŠ¨æ€åœ°æ”¹å˜ã€‚这将导致监测质é‡ä¸‹é™ã€‚为了尽å¯èƒ½åœ°ä¿æŒå‡è®¾ï¼ŒDAMONæ ¹æ®æ¯ä¸ª
+区域的访问频率自适应地进行åˆå¹¶å’Œæ‹†åˆ†ã€‚
+
+对于æ¯ä¸ª ``èšé›†åŒºé—´`` ,它比较相邻区域的访问频率,如果频率差异较å°ï¼Œå°±åˆå¹¶è¿™äº›åŒºåŸŸã€‚
+然åŽï¼Œåœ¨å®ƒæŠ¥å‘Šå¹¶æ¸…除æ¯ä¸ªåŒºåŸŸçš„èšåˆæŽ¥å…¥é¢‘率åŽï¼Œå¦‚果区域总数ä¸è¶…过用户指定的最大区域数,
+它将æ¯ä¸ªåŒºåŸŸæ‹†åˆ†ä¸ºä¸¤ä¸ªæˆ–三个区域。
+
+通过这ç§æ–¹å¼ï¼ŒDAMONæ供了其最佳的质é‡å’Œæœ€å°çš„开销,åŒæ—¶ä¿æŒäº†ç”¨æˆ·ä¸ºå…¶æƒè¡¡è®¾å®šçš„ç•Œé™ã€‚
+
+
+动æ€ç›®æ ‡ç©ºé—´æ›´æ–°å¤„ç†
+--------------------
+
+监测目标地å€èŒƒå›´å¯ä»¥åŠ¨æ€æ”¹å˜ã€‚例如,虚拟内存å¯ä»¥åŠ¨æ€åœ°è¢«æ˜ å°„和解映射。物ç†å†…å­˜å¯ä»¥è¢«
+热æ’拔。
+
+由于在æŸäº›æƒ…况下å˜åŒ–å¯èƒ½ç›¸å½“频ç¹ï¼ŒDAMON检查动æ€å†…存映射的å˜åŒ–,并仅在用户指定的时间
+间隔( ``区域更新间隔`` )内将其应用于抽象的目标区域。
diff --git a/Documentation/translations/zh_CN/vm/damon/faq.rst b/Documentation/translations/zh_CN/vm/damon/faq.rst
new file mode 100644
index 000000000000..07b4ac19407d
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/faq.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/faq.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+常è§é—®é¢˜
+========
+
+为什么是一个新的å­ç³»ç»Ÿï¼Œè€Œä¸æ˜¯æ‰©å±•perf或其他用户空间工具?
+==========================================================
+
+首先,因为它需è¦å°½å¯èƒ½çš„è½»é‡çº§ï¼Œä»¥ä¾¿å¯ä»¥åœ¨çº¿ä½¿ç”¨ï¼Œæ‰€ä»¥åº”该é¿å…任何ä¸å¿…è¦çš„开销,如内核-用户
+空间的上下文切æ¢æˆæœ¬ã€‚第二,DAMON的目标是被包括内核在内的其他程åºæ‰€ä½¿ç”¨ã€‚因此,对特定工具
+(如perf)的ä¾èµ–性是ä¸å¯å–的。这就是DAMON在内核空间实现的两个最大的原因。
+
+
+“闲置页é¢è·Ÿè¸ªâ€ 或 “perf mem†å¯ä»¥æ›¿ä»£DAMONå—?
+==============================================
+
+闲置页跟踪是物ç†åœ°å€ç©ºé—´è®¿é—®æ£€æŸ¥çš„一个低层次的原始方法。“perf memâ€ä¹Ÿæ˜¯ç±»ä¼¼çš„,尽管它å¯ä»¥
+使用采样æ¥å‡å°‘开销。å¦ä¸€æ–¹é¢ï¼ŒDAMON是一个更高层次的框架,用于监控å„ç§åœ°å€ç©ºé—´ã€‚它专注于内
+存管ç†ä¼˜åŒ–,并æä¾›å¤æ‚的精度/开销处ç†æœºåˆ¶ã€‚因此,“空闲页é¢è·Ÿè¸ªâ€ å’Œ “perf mem†å¯ä»¥æä¾›
+DAMON输出的一个å­é›†ï¼Œä½†ä¸èƒ½æ›¿ä»£DAMON。
+
+
+DAMON是å¦åªæ”¯æŒè™šæ‹Ÿå†…存?
+=========================
+
+ä¸ï¼ŒDAMON的核心是独立于地å€ç©ºé—´çš„。用户å¯ä»¥åœ¨DAMON核心上实现和é…置特定地å€ç©ºé—´çš„低级原始
+部分,包括监测目标区域的构造和实际的访问检查。通过这ç§æ–¹å¼ï¼ŒDAMON用户å¯ä»¥ç”¨ä»»ä½•è®¿é—®æ£€æŸ¥æŠ€
+术æ¥ç›‘测任何地å€ç©ºé—´ã€‚
+
+尽管如此,DAMON默认为虚拟内存和物ç†å†…å­˜æ供了基于vma/rmap跟踪和PTE访问ä½æ£€æŸ¥çš„地å€ç©ºé—´
+相关功能的实现,以供å‚考和方便使用。
+
+
+我å¯ä»¥ç®€å•åœ°ç›‘测页é¢çš„粒度å—?
+==============================
+
+是的,你å¯ä»¥é€šè¿‡è®¾ç½® ``min_nr_regions`` 属性高于工作集大å°é™¤ä»¥é¡µé¢å¤§å°çš„值æ¥å®žçŽ°ã€‚
+因为监视目标区域的大å°è¢«å¼ºåˆ¶ä¸º ``>=page size`` ,所以区域分割ä¸ä¼šäº§ç”Ÿä»»ä½•å½±å“。
diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
new file mode 100644
index 000000000000..84d36d90c9b0
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -0,0 +1,33 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/index.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+==========================
+DAMON:æ•°æ®è®¿é—®ç›‘视器
+==========================
+
+DAMON是Linux内核的一个数æ®è®¿é—®ç›‘控框架å­ç³»ç»Ÿã€‚DAMON的核心机制使其æˆä¸º
+(该核心机制详è§(Documentation/translations/zh_CN/vm/damon/design.rst))
+
+ - *准确度* (监测输出对DRAM级别的内存管ç†è¶³å¤Ÿæœ‰ç”¨ï¼›ä½†å¯èƒ½ä¸é€‚åˆCPU Cache级别),
+ - *è½»é‡çº§* (监控开销低到å¯ä»¥åœ¨çº¿åº”用),以åŠ
+ - *å¯æ‰©å±•* (无论目标工作负载的大å°ï¼Œå¼€é”€çš„上é™å€¼éƒ½åœ¨æ’定范围内)。
+
+因此,利用这个框架,内核的内存管ç†æœºåˆ¶å¯ä»¥åšå‡ºé«˜çº§å†³ç­–。会导致高数æ®è®¿é—®ç›‘控开销的实
+验性内存管ç†ä¼˜åŒ–工作å¯ä»¥å†æ¬¡è¿›è¡Œã€‚åŒæ—¶ï¼Œåœ¨ç”¨æˆ·ç©ºé—´ï¼Œæœ‰ä¸€äº›ç‰¹æ®Šå·¥ä½œè´Ÿè½½çš„用户å¯ä»¥ç¼–写
+个性化的应用程åºï¼Œä»¥ä¾¿æ›´å¥½åœ°äº†è§£å’Œä¼˜åŒ–他们的工作负载和系统。
+
+.. toctree::
+ :maxdepth: 2
+
+ faq
+ design
+ api
+
diff --git a/Documentation/translations/zh_CN/vm/free_page_reporting.rst b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
new file mode 100644
index 000000000000..31d6c34b956b
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
@@ -0,0 +1,38 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/_free_page_reporting.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==========
+空闲页报告
+==========
+
+空闲页报告是一个API,设备å¯ä»¥é€šè¿‡å®ƒæ¥æ³¨å†ŒæŽ¥æ”¶ç³»ç»Ÿå½“å‰æœªä½¿ç”¨çš„页é¢åˆ—表。这在虚拟
+化的情况下是很有用的,客户机能够使用这些数æ®æ¥é€šçŸ¥ç®¡ç†å™¨å®ƒä¸å†ä½¿ç”¨å†…存中的æŸäº›é¡µ
+é¢ã€‚
+
+对于驱动,通常是气çƒé©±åŠ¨è¦ä½¿ç”¨è¿™ä¸ªåŠŸèƒ½ï¼Œå®ƒå°†åˆ†é…å’Œåˆå§‹åŒ–一个page_reporting_dev_info
+结构体。它è¦å¡«å……的结构体中的字段是用于处ç†æ•£ç‚¹åˆ—表的 "report" 函数指针。它还必
+é¡»ä¿è¯æ¯æ¬¡è°ƒç”¨è¯¥å‡½æ•°æ—¶èƒ½å¤„ç†è‡³å°‘相当于PAGE_REPORTING_CAPACITY的散点列表æ¡ç›®ã€‚
+å‡è®¾æ²¡æœ‰å…¶ä»–页é¢æŠ¥å‘Šè®¾å¤‡å·²ç»æ³¨å†Œï¼Œ 对page_reporting_register的调用将å‘报告框
+架注册页é¢æŠ¥å‘ŠæŽ¥å£ã€‚
+
+一旦注册,页é¢æŠ¥å‘ŠAPI将开始å‘驱动报告æˆæ‰¹çš„页é¢ã€‚API将在接å£è¢«æ³¨å†ŒåŽ2秒开始报告
+页é¢ï¼Œå¹¶åœ¨ä»»ä½•è¶³å¤Ÿé«˜çš„页é¢è¢«é‡Šæ”¾ä¹‹åŽ2秒继续报告。
+
+报告的页é¢å°†è¢«å­˜å‚¨åœ¨ä¼ é€’给报告函数的散列表中,最åŽä¸€ä¸ªæ¡ç›®çš„结æŸä½è¢«è®¾ç½®åœ¨æ¡ç›®
+nent-1中。 当页é¢è¢«æŠ¥å‘Šå‡½æ•°å¤„ç†æ—¶ï¼Œåˆ†é…器将无法访问它们。一旦报告函数完æˆï¼Œè¿™äº›
+页将被返回到它们所获得的自由区域。
+
+在移除使用空闲页报告的驱动之å‰ï¼Œæœ‰å¿…è¦è°ƒç”¨page_reporting_unregister,以移除
+ç›®å‰è¢«ç©ºé—²é¡µæŠ¥å‘Šä½¿ç”¨çš„page_reporting_dev_info结构体。这样åšå°†é˜»æ­¢è¿›ä¸€æ­¥çš„报
+告通过该接å£å‘出。如果å¦ä¸€ä¸ªé©±åŠ¨æˆ–åŒä¸€é©±åŠ¨è¢«æ³¨å†Œï¼Œå®ƒå°±æœ‰å¯èƒ½æ¢å¤å‰ä¸€ä¸ªé©±åŠ¨åœ¨æŠ¥å‘Š
+空闲页方é¢çš„工作。
+
+
+Alexander Duyck, 2019年12月04日
diff --git a/Documentation/translations/zh_CN/vm/highmem.rst b/Documentation/translations/zh_CN/vm/highmem.rst
new file mode 100644
index 000000000000..018838e58c3e
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/highmem.rst
@@ -0,0 +1,128 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/highmem.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==========
+高内存处ç†
+==========
+
+作者: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+.. contents:: :local:
+
+高内存是什么?
+==============
+
+当物ç†å†…存的大å°æŽ¥è¿‘或超过虚拟内存的最大大å°æ—¶ï¼Œå°±ä¼šä½¿ç”¨é«˜å†…存(highmem)。在这一点上,内
+æ ¸ä¸å¯èƒ½åœ¨ä»»ä½•æ—¶å€™éƒ½ä¿æŒæ‰€æœ‰å¯ç”¨çš„物ç†å†…存的映射。这æ„味ç€å†…核需è¦å¼€å§‹ä½¿ç”¨å®ƒæƒ³è®¿é—®çš„物ç†å†…
+存的临时映射。
+
+没有被永久映射覆盖的那部分(物ç†ï¼‰å†…存就是我们所说的 "高内存"。对于这个边界的确切ä½ç½®ï¼Œæœ‰
+å„ç§æž¶æž„上的é™åˆ¶ã€‚
+
+例如,在i386架构中,我们选择将内核映射到æ¯ä¸ªè¿›ç¨‹çš„虚拟空间,这样我们就ä¸å¿…为内核的进入/退
+出付出全部的TLB作废代价。这æ„味ç€å¯ç”¨çš„虚拟内存空间(i386上为4GiB)必须在用户和内核空间之
+间进行划分。
+
+使用这ç§æ–¹æ³•çš„架构的传统分é…æ–¹å¼æ˜¯3:1,3GiB用于用户空间,顶部的1GiB用于内核空间。::
+
+ +--------+ 0xffffffff
+ | Kernel |
+ +--------+ 0xc0000000
+ | |
+ | User |
+ | |
+ +--------+ 0x00000000
+
+è¿™æ„味ç€å†…核在任何时候最多å¯ä»¥æ˜ å°„1GiB的物ç†å†…存,但是由于我们需è¦è™šæ‹Ÿåœ°å€ç©ºé—´æ¥åšå…¶ä»–事
+情--包括访问其余物ç†å†…存的临时映射--实际的直接映射通常会更少(通常在~896MiBå·¦å³ï¼‰ã€‚
+
+其他有mm上下文标签的TLB的架构å¯ä»¥æœ‰ç‹¬ç«‹çš„内核和用户映射。然而,一些硬件(如一些ARM)在使
+用mm上下文标签时,其虚拟空间有é™ã€‚
+
+
+临时虚拟映射
+============
+
+内核包å«å‡ ç§åˆ›å»ºä¸´æ—¶æ˜ å°„的方法。:
+
+* vmap(). è¿™å¯ä»¥ç”¨æ¥å°†å¤šä¸ªç‰©ç†é¡µé•¿æœŸæ˜ å°„到一个连续的虚拟空间。它需è¦synchronization
+ æ¥è§£é™¤æ˜ å°„。
+
+* kmap(). è¿™å…许对å•ä¸ªé¡µé¢è¿›è¡ŒçŸ­æœŸæ˜ å°„。它需è¦synchronization,但在一定程度上被摊销。
+ 当以嵌套方å¼ä½¿ç”¨æ—¶ï¼Œå®ƒä¹Ÿå¾ˆå®¹æ˜“出现死é”,因此ä¸å»ºè®®åœ¨æ–°ä»£ç ä¸­ä½¿ç”¨å®ƒã€‚
+
+* kmap_atomic(). è¿™å…许对å•ä¸ªé¡µé¢è¿›è¡Œéžå¸¸çŸ­çš„时间映射。由于映射被é™åˆ¶åœ¨å‘布它的CPU上,
+ 它表现得很好,但å‘布任务因此被è¦æ±‚留在该CPU上直到它完æˆï¼Œä»¥å…其他任务å–代它的映射。
+
+ kmap_atomic() 也å¯ä»¥ç”±ä¸­æ–­ä¸Šä¸‹æ–‡ä½¿ç”¨ï¼Œå› ä¸ºå®ƒä¸ç¡çœ ï¼Œè€Œä¸”调用者å¯èƒ½åœ¨è°ƒç”¨kunmap_atomic()
+ 之åŽæ‰ç¡çœ ã€‚
+
+ å¯ä»¥å‡è®¾k[un]map_atomic()ä¸ä¼šå¤±è´¥ã€‚
+
+
+使用kmap_atomic
+===============
+
+何时何地使用 kmap_atomic() 是很直接的。当代ç æƒ³è¦è®¿é—®ä¸€ä¸ªå¯èƒ½ä»Žé«˜å†…存(è§__GFP_HIGHMEM)
+分é…的页é¢çš„内容时,例如在页缓存中的页é¢ï¼Œå°±ä¼šä½¿ç”¨å®ƒã€‚该API有两个函数,它们的使用方å¼ä¸Ž
+下é¢ç±»ä¼¼::
+
+ /* 找到感兴趣的页é¢ã€‚ */
+ struct page *page = find_get_page(mapping, offset);
+
+ /* 获得对该页内容的访问æƒã€‚ */
+ void *vaddr = kmap_atomic(page);
+
+ /* 对该页的内容åšä¸€äº›å¤„ç†ã€‚ */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /* 解除该页é¢çš„映射。 */
+ kunmap_atomic(vaddr);
+
+注æ„,kunmap_atomic()调用的是kmap_atomic()调用的结果而ä¸æ˜¯å‚数。
+
+如果你需è¦æ˜ å°„两个页é¢ï¼Œå› ä¸ºä½ æƒ³ä»Žä¸€ä¸ªé¡µé¢å¤åˆ¶åˆ°å¦ä¸€ä¸ªé¡µé¢ï¼Œä½ éœ€è¦ä¿æŒkmap_atomic调用严
+格嵌套,如::
+
+ vaddr1 = kmap_atomic(page1);
+ vaddr2 = kmap_atomic(page2);
+
+ memcpy(vaddr1, vaddr2, PAGE_SIZE);
+
+ kunmap_atomic(vaddr2);
+ kunmap_atomic(vaddr1);
+
+
+临时映射的æˆæœ¬
+==============
+
+创建临时映射的代价å¯èƒ½ç›¸å½“高。体系架构必须æ“作内核的页表ã€æ•°æ®TLBå’Œ/或MMU的寄存器。
+
+如果CONFIG_HIGHMEM没有被设置,那么内核会å°è¯•ç”¨ä¸€ç‚¹è®¡ç®—æ¥åˆ›å»ºæ˜ å°„,将页é¢ç»“构地å€è½¬æ¢æˆ
+指å‘页é¢å†…容的指针,而ä¸æ˜¯åŽ»æ£é¼“映射。在这ç§æƒ…况下,解映射æ“作å¯èƒ½æ˜¯ä¸€ä¸ªç©ºæ“作。
+
+如果CONFIG_MMU没有被设置,那么就ä¸å¯èƒ½æœ‰ä¸´æ—¶æ˜ å°„和高内存。在这ç§æƒ…况下,也将使用计算方法。
+
+
+i386 PAE
+========
+
+在æŸäº›æƒ…况下,i386 架构将å…许你在 32 ä½æœºå™¨ä¸Šå®‰è£…多达 64GiB 的内存。但这有一些åŽæžœ:
+
+* Linux需è¦ä¸ºç³»ç»Ÿä¸­çš„æ¯ä¸ªé¡µé¢å»ºç«‹ä¸€ä¸ªé¡µå¸§ç»“构,而且页帧需è¦é©»åœ¨æ°¸ä¹…映射中,这æ„味ç€ï¼š
+
+* 你最多å¯ä»¥æœ‰896M/sizeof(struct page)页帧;由于页结构体是32字节的,所以最终会有
+ 112G的页;然而,内核需è¦åœ¨å†…存中存储更多的页帧......
+
+* PAE使你的页表å˜å¤§--这使系统å˜æ…¢ï¼Œå› ä¸ºæ›´å¤šçš„æ•°æ®éœ€è¦åœ¨TLB填充等方é¢è¢«è®¿é—®ã€‚一个好处
+ 是,PAE有更多的PTEä½ï¼Œå¯ä»¥æä¾›åƒNXå’ŒPAT这样的高级功能。
+
+一般的建议是,你ä¸è¦åœ¨32ä½æœºå™¨ä¸Šä½¿ç”¨è¶…过8GiB的空间--尽管更多的空间å¯èƒ½å¯¹ä½ å’Œä½ çš„工作
+é‡æœ‰ç”¨ï¼Œä½†ä½ å‡ ä¹Žæ˜¯é ä½ è‡ªå·±--ä¸è¦æŒ‡æœ›å†…核开å‘者真的会很关心事情的进展情况。
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
new file mode 100644
index 000000000000..a1d2f0356cc1
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -0,0 +1,53 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/index.rst
+
+:翻译:
+
+ å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================
+Linux内存管ç†æ–‡æ¡£
+=================
+
+这是一个关于Linux内存管ç†ï¼ˆmm)å­ç³»ç»Ÿå†…部的文档集,其中有ä¸åŒå±‚次的细节,包括注释
+和邮件列表的回å¤ï¼Œç”¨äºŽé˜è¿°æ•°æ®ç»“构和算法的基本情况。如果你正在寻找关于简å•åˆ†é…内存的建
+议,请å‚阅(Documentation/translations/zh_CN/core-api/memory-allocation.rst)。
+对于控制和调整指å—,请å‚阅(Documentation/admin-guide/mm/index)。
+TODO:待引用文档集被翻译完毕åŽè¯·åŠæ—¶ä¿®æ”¹æ­¤å¤„)
+
+.. toctree::
+ :maxdepth: 1
+
+ active_mm
+ balance
+ damon/index
+ free_page_reporting
+ highmem
+ ksm
+
+TODOLIST:
+* arch_pgtable_helpers
+* free_page_reporting
+* frontswap
+* hmm
+* hwpoison
+* hugetlbfs_reserv
+* memory-model
+* mmu_notifier
+* numa
+* overcommit-accounting
+* page_migration
+* page_frags
+* page_owner
+* page_table_check
+* remap_file_pages
+* slub
+* split_page_table_lock
+* transhuge
+* unevictable-lru
+* vmalloced-kernel-stacks
+* z3fold
+* zsmalloc
diff --git a/Documentation/translations/zh_CN/vm/ksm.rst b/Documentation/translations/zh_CN/vm/ksm.rst
new file mode 100644
index 000000000000..83b0c73984da
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/ksm.rst
@@ -0,0 +1,70 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/ksm.rst
+
+:翻译:
+
+ å¾é‘« xu xin <xu.xin16@zte.com.cn>
+
+============
+内核åŒé¡µåˆå¹¶
+============
+
+KSM 是一ç§èŠ‚çœå†…存的数æ®åŽ»é‡åŠŸèƒ½ï¼Œç”±CONFIG_KSM=yå¯ç”¨ï¼Œå¹¶åœ¨2.6.32版本时被添加
+到Linuxå†…æ ¸ã€‚è¯¦è§ ``mm/ksm.c`` 的实现,以åŠhttp://lwn.net/Articles/306704å’Œ
+https://lwn.net/Articles/330589
+
+KSM的用户空间的接å£åœ¨Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
+文档中有æ述。
+
+设计
+====
+
+概述
+----
+
+概述内容请è§mm/ksm.c文档中的“DOC: Overviewâ€
+
+逆映射
+------
+KSM维护ç€ç¨³å®šæ ‘中的KSM页的逆映射信æ¯ã€‚
+
+当KSM页é¢çš„共享数å°äºŽ ``max_page_sharing`` 的虚拟内存区域(VMAs)时,则代表了
+KSM页的稳定树其中的节点指å‘了一个rmap_item结构体类型的列表。åŒæ—¶ï¼Œè¿™ä¸ªKSM页
+çš„ ``page->mapping`` 指å‘了该稳定树节点。
+
+如果共享数超过了阈值,KSM将给稳定树添加第二个维度。稳定树就å˜æˆé“¾æŽ¥ä¸€ä¸ªæˆ–多
+个稳定树"副本"çš„"链"。æ¯ä¸ªå‰¯æœ¬éƒ½ä¿ç•™KSM页的逆映射信æ¯ï¼Œå…¶ä¸­ ``page->mapping``
+指å‘该"副本"。
+
+æ¯ä¸ªé“¾ä»¥åŠé“¾æŽ¥åˆ°è¯¥é“¾ä¸­çš„所有"副本"强制ä¸å˜çš„是,它们代表了相åŒçš„写ä¿æŠ¤å†…å­˜
+内容,尽管任中一个"副本"是由åŒä¸€ç‰‡å†…存区的ä¸åŒçš„KSMå¤åˆ¶é¡µæ‰€æŒ‡å‘的。
+
+这样一æ¥ï¼Œç›¸æ¯”与无é™çš„逆映射链表,稳定树的查找计算å¤æ‚性ä¸å—å½±å“。但在稳定树
+本身中ä¸èƒ½æœ‰é‡å¤çš„KSM页é¢å†…容ä»ç„¶æ˜¯å¼ºåˆ¶è¦æ±‚。
+
+ç”± ``max_page_sharing`` 强制决定的数æ®åŽ»é‡é™åˆ¶æ˜¯å¿…è¦çš„,以此æ¥é¿å…虚拟内存
+rmap链表å˜å¾—过大。rmapçš„é历具有O(N)çš„å¤æ‚度,其中N是共享页é¢çš„rmap_项(å³
+虚拟映射)的数é‡ï¼Œè€Œè¿™ä¸ªå…±äº«é¡µé¢çš„节点数é‡åˆè¢« ``max_page_sharing`` 所é™åˆ¶ã€‚
+因此,这有效地将线性O(N)计算å¤æ‚度从rmapé历中分散到ä¸åŒçš„KSM页é¢ä¸Šã€‚ksmdè¿›
+程在稳定节点"链"上的é历也是O(N),但这个N是稳定树"副本"çš„æ•°é‡ï¼Œè€Œä¸æ˜¯rmap项
+çš„æ•°é‡ï¼Œå› æ­¤å®ƒå¯¹ksmd性能没有显著影å“。实际上,最佳稳定树"副本"的候选节点将
+ä¿ç•™åœ¨"副本"列表的开头。
+
+``max_page_sharing`` 的值设置得高了会促使更快的内存åˆå¹¶ï¼ˆå› ä¸ºå°†æœ‰æ›´å°‘的稳定
+树副本排队进入稳定节点chain->hlist)和更高的数æ®åŽ»é‡ç³»æ•°ï¼Œä½†ä»£ä»·æ˜¯åœ¨äº¤æ¢ã€åŽ‹
+缩ã€NUMA平衡和页é¢è¿ç§»è¿‡ç¨‹ä¸­å¯èƒ½å¯¼è‡´KSM页的最大rmapé历速度较慢。
+
+``stable_node_dups/stable_node_chains`` çš„æ¯”å€¼è¿˜å— ``max_page_sharing`` 调控
+çš„å½±å“,高比值å¯èƒ½æ„味ç€ç¨³å®šèŠ‚点dup中存在碎片,这å¯ä»¥é€šè¿‡åœ¨ksmd中引入碎片算
+法æ¥è§£å†³ï¼Œè¯¥ç®—法将rmap项从一个稳定节点dupé‡å®šä½åˆ°å¦ä¸€ä¸ªç¨³å®šèŠ‚点dup,以便释放
+那些仅包å«æžå°‘rmap项的稳定节点"dup",但这å¯èƒ½ä¼šå¢žåŠ ksmd进程的CPU使用率,并å¯
+能会å‡æ…¢åº”用程åºåœ¨KSM页é¢ä¸Šçš„åªè¯»è®¡ç®—。
+
+KSM会定期扫æ稳定节点"链"中链接的所有稳定树"副本",以便删å‡è¿‡æ—¶äº†çš„稳定节点。
+è¿™ç§æ‰«æ的频率由 ``stable_node_chains_prune_millisecs`` 这个sysfs 接å£å®šä¹‰ã€‚
+
+å‚考
+====
+内核代ç è¯·è§mm/ksm.c。
+涉åŠçš„函数(mm_slot ksm_scan stable_node rmap_item)。
diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst
index f56f78ba7860..e1ce9d8c06f8 100644
--- a/Documentation/translations/zh_TW/index.rst
+++ b/Documentation/translations/zh_TW/index.rst
@@ -5,7 +5,7 @@
\renewcommand\thesection*
\renewcommand\thesubsection*
\kerneldocCJKon
- \kerneldocBeginTC
+ \kerneldocBeginTC{
.. _linux_doc_zh_tw:
@@ -174,4 +174,4 @@ TODOList:
.. raw:: latex
- \kerneldocEndTC
+ }\kerneldocEndTC
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 687efcf245c1..dfbc27d17ff7 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -115,6 +115,7 @@ Code Seq# Include File Comments
'B' 00-1F linux/cciss_ioctl.h conflict!
'B' 00-0F include/linux/pmu.h conflict!
'B' C0-FF advanced bbus <mailto:maassen@uni-freiburg.de>
+'B' 00-0F xen/xenbus_dev.h conflict!
'C' all linux/soundcard.h conflict!
'C' 01-2F linux/capi.h conflict!
'C' F0-FF drivers/net/wan/cosa.h conflict!
@@ -134,6 +135,7 @@ Code Seq# Include File Comments
'F' 80-8F linux/arcfb.h conflict!
'F' DD video/sstfb.h conflict!
'G' 00-3F drivers/misc/sgi-gru/grulib.h conflict!
+'G' 00-0F xen/gntalloc.h, xen/gntdev.h conflict!
'H' 00-7F linux/hiddev.h conflict!
'H' 00-0F linux/hidraw.h conflict!
'H' 01 linux/mei.h conflict!
@@ -176,6 +178,7 @@ Code Seq# Include File Comments
'P' 60-6F sound/sscape_ioctl.h conflict!
'P' 00-0F drivers/usb/class/usblp.c conflict!
'P' 01-09 drivers/misc/pci_endpoint_test.c conflict!
+'P' 00-0F xen/privcmd.h conflict!
'Q' all linux/soundcard.h
'R' 00-1F linux/random.h conflict!
'R' 01 linux/rfkill.h conflict!
@@ -253,7 +256,7 @@ Code Seq# Include File Comments
'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
<http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs>
'l' 40-7F linux/udf_fs_i.h in development:
- <http://sourceforge.net/projects/linux-udf/>
+ <https://github.com/pali/udftools>
'm' 00-09 linux/mmtimer.h conflict!
'm' all linux/mtio.h conflict!
'm' all linux/soundcard.h conflict!
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index bb8cfddbb22d..9f3172376ec3 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1394,7 +1394,7 @@ documentation when it pops into existence).
-------------------
:Capability: KVM_CAP_ENABLE_CAP
-:Architectures: mips, ppc, s390
+:Architectures: mips, ppc, s390, x86
:Type: vcpu ioctl
:Parameters: struct kvm_enable_cap (in)
:Returns: 0 on success; -1 on error
@@ -3268,6 +3268,7 @@ number.
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute.
------------------------
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
- KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+ KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -6995,6 +6997,20 @@ indicated by the fd to the VM this is called on.
This is intended to support intra-host migration of VMs between userspace VMMs,
upgrading the VMM process without interrupting the guest.
+7.30 KVM_CAP_PPC_AIL_MODE_3
+-------------------------------
+
+:Capability: KVM_CAP_PPC_AIL_MODE_3
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel supports the mode 3 setting for the
+"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location"
+resource that is controlled with the H_SET_MODE hypercall.
+
+This capability allows a guest kernel to use a better-performance mode for
+handling interrupts and system calls.
+
8. Other capabilities.
======================
diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index 2cafd3c3c6cb..d5ad96c795f6 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -664,7 +664,11 @@ one is input, the second one output.
* The fd channel - use file descriptor numbers for input/output. Example:
``con1=fd:0,fd:1.``
-* The port channel - listen on TCP port number. Example: ``con1=port:4321``
+* The port channel - start a telnet server on TCP port number. Example:
+ ``con1=port:4321``. The host must have /usr/sbin/in.telnetd (usually part of
+ a telnetd package) and the port-helper from the UML utilities (see the
+ information for the xterm channel below). UML will not boot until a client
+ connects.
* The pty and pts channels - use system pty/pts.
diff --git a/Documentation/vm/damon/design.rst b/Documentation/vm/damon/design.rst
index 210f0f50efd8..0cff6fac6b7e 100644
--- a/Documentation/vm/damon/design.rst
+++ b/Documentation/vm/damon/design.rst
@@ -13,12 +13,13 @@ primitives that dependent on and optimized for the target address space. On
the other hand, the accuracy and overhead tradeoff mechanism, which is the core
of DAMON, is in the pure logic space. DAMON separates the two parts in
different layers and defines its interface to allow various low level
-primitives implementations configurable with the core logic.
+primitives implementations configurable with the core logic. We call the low
+level primitives implementations monitoring operations.
Due to this separated design and the configurable interface, users can extend
-DAMON for any address space by configuring the core logics with appropriate low
-level primitive implementations. If appropriate one is not provided, users can
-implement the primitives on their own.
+DAMON for any address space by configuring the core logics with appropriate
+monitoring operations. If appropriate one is not provided, users can implement
+the operations on their own.
For example, physical memory, virtual memory, swap space, those for specific
processes, NUMA nodes, files, and backing memory devices would be supportable.
@@ -26,25 +27,24 @@ Also, if some architectures or devices support special optimized access check
primitives, those will be easily configurable.
-Reference Implementations of Address Space Specific Primitives
-==============================================================
+Reference Implementations of Address Space Specific Monitoring Operations
+=========================================================================
-The low level primitives for the fundamental access monitoring are defined in
-two parts:
+The monitoring operations are defined in two parts:
1. Identification of the monitoring target address range for the address space.
2. Access check of specific address range in the target space.
-DAMON currently provides the implementations of the primitives for the physical
+DAMON currently provides the implementations of the operations for the physical
and virtual address spaces. Below two subsections describe how those work.
VMA-based Target Address Range Construction
-------------------------------------------
-This is only for the virtual address space primitives implementation. That for
-the physical address space simply asks users to manually set the monitoring
-target address ranges.
+This is only for the virtual address space monitoring operations
+implementation. That for the physical address space simply asks users to
+manually set the monitoring target address ranges.
Only small parts in the super-huge virtual address space of the processes are
mapped to the physical memory and accessed. Thus, tracking the unmapped
@@ -84,9 +84,10 @@ table having a mapping to the address. In this way, the implementations find
and clear the bit(s) for next sampling target address and checks whether the
bit(s) set again after one sampling period. This could disturb other kernel
subsystems using the Accessed bits, namely Idle page tracking and the reclaim
-logic. To avoid such disturbances, DAMON makes it mutually exclusive with Idle
-page tracking and uses ``PG_idle`` and ``PG_young`` page flags to solve the
-conflict with the reclaim logic, as Idle page tracking does.
+logic. DAMON does nothing to avoid disturbing Idle page tracking, so handling
+the interference is the responsibility of sysadmins. However, it solves the
+conflict with the reclaim logic using ``PG_idle`` and ``PG_young`` page flags,
+as Idle page tracking does.
Address Space Independent Core Mechanisms
@@ -94,8 +95,8 @@ Address Space Independent Core Mechanisms
Below four sections describe each of the DAMON core mechanisms and the five
monitoring attributes, ``sampling interval``, ``aggregation interval``,
-``regions update interval``, ``minimum number of regions``, and ``maximum
-number of regions``.
+``update interval``, ``minimum number of regions``, and ``maximum number of
+regions``.
Access Frequency Monitoring
@@ -168,6 +169,8 @@ The monitoring target address range could dynamically changed. For example,
virtual memory could be dynamically mapped and unmapped. Physical memory could
be hot-plugged.
-As the changes could be quite frequent in some cases, DAMON checks the dynamic
-memory mapping changes and applies it to the abstracted target area only for
-each of a user-specified time interval (``regions update interval``).
+As the changes could be quite frequent in some cases, DAMON allows the
+monitoring operations to check dynamic changes including memory mapping changes
+and applies it to monitoring operations-related data structures such as the
+abstracted monitoring target memory area only for each of a user-specified time
+interval (``update interval``).
diff --git a/Documentation/vm/damon/faq.rst b/Documentation/vm/damon/faq.rst
index 11aea40eb328..dde7e2414ee6 100644
--- a/Documentation/vm/damon/faq.rst
+++ b/Documentation/vm/damon/faq.rst
@@ -31,7 +31,7 @@ Does DAMON support virtual memory only?
=======================================
No. The core of the DAMON is address space independent. The address space
-specific low level primitive parts including monitoring target regions
+specific monitoring operations including monitoring target regions
constructions and actual access checks can be implemented and configured on the
DAMON core by the users. In this way, DAMON users can monitor any address
space with any access check technique.
diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
index 9837fc8147dd..905555e3e483 100644
--- a/Documentation/vm/page_owner.rst
+++ b/Documentation/vm/page_owner.rst
@@ -26,9 +26,9 @@ fragmentation statistics can be obtained through gfp flag information of
each page. It is already implemented and activated if page owner is
enabled. Other usages are more than welcome.
-page owner is disabled in default. So, if you'd like to use it, you need
-to add "page_owner=on" into your boot cmdline. If the kernel is built
-with page owner and page owner is disabled in runtime due to no enabling
+page owner is disabled by default. So, if you'd like to use it, you need
+to add "page_owner=on" to your boot cmdline. If the kernel is built
+with page owner and page owner is disabled in runtime due to not enabling
boot option, runtime overhead is marginal. If disabled in runtime, it
doesn't require memory to store owner information, so there is no runtime
memory overhead. And, page owner inserts just two unlikely branches into
@@ -85,7 +85,7 @@ Usage
cat /sys/kernel/debug/page_owner > page_owner_full.txt
./page_owner_sort page_owner_full.txt sorted_page_owner.txt
- The general output of ``page_owner_full.txt`` is as follows:
+ The general output of ``page_owner_full.txt`` is as follows::
Page allocated via order XXX, ...
PFN XXX ...
@@ -100,7 +100,7 @@ Usage
and pages of buf, and finally sorts them according to the times.
See the result about who allocated each page
- in the ``sorted_page_owner.txt``. General output:
+ in the ``sorted_page_owner.txt``. General output::
XXX times, XXX pages:
Page allocated via order XXX, ...
diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst
index 81f521ff7ea7..1a09472f10a3 100644
--- a/Documentation/vm/page_table_check.rst
+++ b/Documentation/vm/page_table_check.rst
@@ -9,7 +9,7 @@ Page Table Check
Introduction
============
-Page table check allows to hardern the kernel by ensuring that some types of
+Page table check allows to harden the kernel by ensuring that some types of
the memory corruptions are prevented.
Page table check performs extra verifications at the time when new pages become
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f498f1d36cd3..982c8af853b9 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -21,6 +21,7 @@ x86-specific Documentation
tlb
mtrr
pat
+ intel-hfi
intel-iommu
intel_txt
amd-memory-encryption
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst
new file mode 100644
index 000000000000..49dea58ea4fb
--- /dev/null
+++ b/Documentation/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
index 076efd51ef1f..2e9b8b0f9a0f 100644
--- a/Documentation/x86/sva.rst
+++ b/Documentation/x86/sva.rst
@@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
thread can interact with a device. Threads that belong to the same
process share the same page tables, thus the same MSR value.
-PASID is cleared when a process is created. The PASID allocation and MSR
-programming may occur long after a process and its threads have been created.
-One thread must call iommu_sva_bind_device() to allocate the PASID for the
-process. If a thread uses ENQCMD without the MSR first being populated, a #GP
-will be raised. The kernel will update the PASID MSR with the PASID for all
-threads in the process. A single process PASID can be used simultaneously
-with multiple devices since they all share the same address space.
-
-One thread can call iommu_sva_unbind_device() to free the allocated PASID.
-The kernel will clear the PASID MSR for all threads belonging to the process.
-
-New threads inherit the MSR value from the parent.
+PASID Life Cycle Management
+===========================
+
+PASID is initialized as INVALID_IOASID (-1) when a process is created.
+
+Only processes that access SVA-capable devices need to have a PASID
+allocated. This allocation happens when a process opens/binds an SVA-capable
+device but finds no PASID for this process. Subsequent binds of the same, or
+other devices will share the same PASID.
+
+Although the PASID is allocated to the process by opening a device,
+it is not active in any of the threads of that process. It's loaded to the
+IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+to a device using the ENQCMD.
+
+That first access will trigger a #GP fault because the IA32_PASID MSR
+has not been initialized with the PASID value assigned to the process
+when the device was opened. The Linux #GP handler notes that a PASID has
+been allocated for the process, and so initializes the IA32_PASID MSR
+and returns so that the ENQCMD instruction is re-executed.
+
+On fork(2) or exec(2) the PASID is removed from the process as it no
+longer has the same address space that it had when the device was opened.
+
+On clone(2) the new task shares the same address space, so will be
+able to use the PASID allocated to the process. The IA32_PASID is not
+preemptively initialized as the PASID value might not be allocated yet or
+the kernel does not know whether this thread is going to access the device
+and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+init optimization. Since #GP faults have to be handled on any threads that
+were created before the PASID was assigned to the mm of the process, newly
+created threads might as well be treated in a consistent way.
+
+Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+all threads in unbind, free the PASID lazily only on mm exit.
+
+If a process does a close(2) of the device file descriptor and munmap(2)
+of the device MMIO portal, then the driver will unbind the device. The
+PASID is still marked VALID in the PASID_MSR for any threads in the
+process that accessed the device. But this is harmless as without the
+MMIO portal they cannot submit new work to the device.
Relationships
=============
diff --git a/MAINTAINERS b/MAINTAINERS
index 61d127e3314f..c10fbd13080a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -190,8 +190,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/driver-api/80211/cfg80211.rst
F: Documentation/networking/regulatory.rst
F: include/linux/ieee80211.h
@@ -1001,6 +1002,7 @@ L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/admin-guide/pm/amd-pstate.rst
F: drivers/cpufreq/amd-pstate*
+F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
AMD PTDMA DRIVER
M: Sanjay R Mehta <sanju.mehta@amd.com>
@@ -1619,6 +1621,7 @@ M: Olof Johansson <olof@lixom.net>
M: soc@kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
+C: irc://irc.libera.chat/armlinux
T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git
F: arch/arm/boot/dts/Makefile
F: arch/arm64/boot/dts/Makefile
@@ -1626,6 +1629,7 @@ F: arch/arm64/boot/dts/Makefile
ARM SUB-ARCHITECTURES
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
+C: irc://irc.libera.chat/armlinux
T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git
F: arch/arm/mach-*/
F: arch/arm/plat-*/
@@ -1766,7 +1770,7 @@ T: git https://github.com/AsahiLinux/linux.git
F: Documentation/devicetree/bindings/arm/apple.yaml
F: Documentation/devicetree/bindings/arm/apple/*
F: Documentation/devicetree/bindings/i2c/apple,i2c.yaml
-F: Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+F: Documentation/devicetree/bindings/interrupt-controller/apple,*
F: Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
F: Documentation/devicetree/bindings/pci/apple,pcie.yaml
F: Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml
@@ -1779,6 +1783,7 @@ F: drivers/irqchip/irq-apple-aic.c
F: drivers/mailbox/apple-mailbox.c
F: drivers/pinctrl/pinctrl-apple-gpio.c
F: drivers/soc/apple/*
+F: drivers/watchdog/apple_wdt.c
F: include/dt-bindings/interrupt-controller/apple-aic.h
F: include/dt-bindings/pinctrl/apple.h
F: include/linux/apple-mailbox.h
@@ -2250,7 +2255,7 @@ F: drivers/phy/mediatek/
ARM/Microchip (AT91) SoC support
M: Nicolas Ferre <nicolas.ferre@microchip.com>
M: Alexandre Belloni <alexandre.belloni@bootlin.com>
-M: Ludovic Desroches <ludovic.desroches@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@microchip.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
W: http://www.linux4sam.org
@@ -2568,11 +2573,14 @@ F: sound/soc/rockchip/
N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
+R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
S: Maintained
+C: irc://irc.libera.chat/linux-exynos
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git
F: Documentation/arm/samsung/
F: Documentation/devicetree/bindings/arm/samsung/
F: Documentation/devicetree/bindings/power/pd-samsung.yaml
@@ -2732,7 +2740,7 @@ N: stm32
N: stm
ARM/Synaptics SoC support
-M: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+M: Jisheng Zhang <jszhang@kernel.org>
M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
@@ -3049,6 +3057,12 @@ L: linux-hwmon@vger.kernel.org
S: Maintained
F: drivers/hwmon/asus_wmi_ec_sensors.c
+ASUS EC HARDWARE MONITOR DRIVER
+M: Eugene Shalygin <eugene.shalygin@gmail.com>
+L: linux-hwmon@vger.kernel.org
+S: Maintained
+F: drivers/hwmon/asus-ec-sensors.c
+
ASUS WIRELESS RADIO CONTROL DRIVER
M: João Paulo Rechi Vita <jprvita@gmail.com>
L: platform-driver-x86@vger.kernel.org
@@ -3132,11 +3146,9 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/ath5k
F: drivers/net/wireless/ath/ath5k/
ATHEROS ATH6KL WIRELESS DRIVER
-M: Kalle Valo <kvalo@kernel.org>
L: linux-wireless@vger.kernel.org
-S: Supported
+S: Orphan
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath6kl
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: drivers/net/wireless/ath/ath6kl/
ATI_REMOTE2 DRIVER
@@ -3193,6 +3205,7 @@ ATOMIC INFRASTRUCTURE
M: Will Deacon <will@kernel.org>
M: Peter Zijlstra <peterz@infradead.org>
R: Boqun Feng <boqun.feng@gmail.com>
+R: Mark Rutland <mark.rutland@arm.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: arch/*/include/asm/atomic*.h
@@ -3435,6 +3448,7 @@ F: Documentation/ABI/stable/sysfs-block
F: Documentation/block/
F: block/
F: drivers/block/
+F: include/linux/bio.h
F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c
@@ -3900,7 +3914,7 @@ M: Scott Branden <sbranden@broadcom.com>
M: bcm-kernel-feedback-list@broadcom.com
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
-T: git git://github.com/broadcom/cygnus-linux.git
+T: git git://github.com/broadcom/stblinux.git
F: arch/arm64/boot/dts/broadcom/northstar2/*
F: arch/arm64/boot/dts/broadcom/stingray/*
F: drivers/clk/bcm/clk-ns*
@@ -4156,9 +4170,8 @@ N: csky
K: csky
CA8210 IEEE-802.15.4 RADIO DRIVER
-M: Harry Morris <h.morris@cascoda.com>
L: linux-wpan@vger.kernel.org
-S: Maintained
+S: Orphan
W: https://github.com/Cascoda/ca8210-linux.git
F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt
F: drivers/net/ieee802154/ca8210.c
@@ -4543,6 +4556,7 @@ F: drivers/platform/chrome/
CHROMEOS EC CODEC DRIVER
M: Cheng-Yi Chiang <cychiang@chromium.org>
+M: Tzung-Bi Shih <tzungbi@google.com>
R: Guenter Roeck <groeck@chromium.org>
S: Maintained
F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
@@ -4908,7 +4922,8 @@ F: kernel/cgroup/cpuset.c
CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
M: Johannes Weiner <hannes@cmpxchg.org>
M: Michal Hocko <mhocko@kernel.org>
-M: Vladimir Davydov <vdavydov.dev@gmail.com>
+M: Roman Gushchin <roman.gushchin@linux.dev>
+M: Shakeel Butt <shakeelb@google.com>
L: cgroups@vger.kernel.org
L: linux-mm@kvack.org
S: Maintained
@@ -5311,6 +5326,7 @@ DATA ACCESS MONITOR
M: SeongJae Park <sj@kernel.org>
L: linux-mm@kvack.org
S: Maintained
+F: Documentation/ABI/testing/sysfs-kernel-mm-damon
F: Documentation/admin-guide/mm/damon/
F: Documentation/vm/damon/
F: include/linux/damon.h
@@ -5404,6 +5420,7 @@ F: drivers/platform/x86/dell/dell-rbtn.*
DELL LAPTOP SMM DRIVER
M: Pali Rohár <pali@kernel.org>
S: Maintained
+F: Documentation/ABI/obsolete/procfs-i8k
F: drivers/hwmon/dell-smm-hwmon.c
F: include/uapi/linux/i8k.h
@@ -5772,7 +5789,7 @@ F: tools/testing/selftests/dma/
DMA-BUF HEAPS FRAMEWORK
M: Sumit Semwal <sumit.semwal@linaro.org>
-R: Benjamin Gaignard <benjamin.gaignard@linaro.org>
+R: Benjamin Gaignard <benjamin.gaignard@collabora.com>
R: Liam Mark <lmark@codeaurora.org>
R: Laura Abbott <labbott@redhat.com>
R: Brian Starkey <Brian.Starkey@arm.com>
@@ -6301,8 +6318,8 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/vboxvideo/
DRM DRIVER FOR VMWARE VIRTUAL GPU
-M: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
M: Zack Rusin <zackr@vmware.com>
+R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com>
L: dri-devel@lists.freedesktop.org
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
@@ -6502,7 +6519,7 @@ F: Documentation/devicetree/bindings/display/rockchip/
F: drivers/gpu/drm/rockchip/
DRM DRIVERS FOR STI
-M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
+M: Alain Volmat <alain.volmat@foss.st.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@@ -6511,8 +6528,8 @@ F: drivers/gpu/drm/sti
DRM DRIVERS FOR STM
M: Yannick Fertre <yannick.fertre@foss.st.com>
+M: Raphael Gallais-Pou <raphael.gallais-pou@foss.st.com>
M: Philippe Cornu <philippe.cornu@foss.st.com>
-M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@@ -7006,12 +7023,6 @@ L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/sb_edac.c
-EDAC-SIFIVE
-M: Yash Shah <yash.shah@sifive.com>
-L: linux-edac@vger.kernel.org
-S: Supported
-F: drivers/edac/sifive_edac.c
-
EDAC-SKYLAKE
M: Tony Luck <tony.luck@intel.com>
L: linux-edac@vger.kernel.org
@@ -7182,7 +7193,7 @@ F: drivers/net/can/usb/etas_es58x/
ETHERNET BRIDGE
M: Roopa Prabhu <roopa@nvidia.com>
-M: Nikolay Aleksandrov <nikolay@nvidia.com>
+M: Nikolay Aleksandrov <razor@blackwall.org>
L: bridge@lists.linux-foundation.org (moderated for non-subscribers)
L: netdev@vger.kernel.org
S: Maintained
@@ -7208,8 +7219,10 @@ F: drivers/net/mdio/of_mdio.c
F: drivers/net/pcs/
F: drivers/net/phy/
F: include/dt-bindings/net/qca-ar803x.h
+F: include/linux/linkmode.h
F: include/linux/*mdio*.h
F: include/linux/mdio/*.h
+F: include/linux/mii.h
F: include/linux/of_net.h
F: include/linux/phy.h
F: include/linux/phy_fixed.h
@@ -7223,6 +7236,9 @@ F: net/core/of_net.c
EXEC & BINFMT API
R: Eric Biederman <ebiederm@xmission.com>
R: Kees Cook <keescook@chromium.org>
+L: linux-mm@kvack.org
+S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
F: arch/alpha/kernel/binfmt_loader.c
F: arch/x86/ia32/ia32_aout.c
F: fs/*binfmt_*.c
@@ -7230,6 +7246,7 @@ F: fs/exec.c
F: include/linux/binfmts.h
F: include/linux/elf.h
F: include/uapi/linux/binfmts.h
+F: include/uapi/linux/elf.h
F: tools/testing/selftests/exec/
N: asm/elf.h
N: binfmt
@@ -7265,7 +7282,9 @@ Extended Verification Module (EVM)
M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
F: security/integrity/evm/
+F: security/integrity/
EXTENSIBLE FIRMWARE INTERFACE (EFI)
M: Ard Biesheuvel <ardb@kernel.org>
@@ -7573,6 +7592,12 @@ S: Maintained
W: http://floatingpoint.sourceforge.net/emulator/index.html
F: arch/x86/math-emu/
+FRAMEBUFFER CORE
+M: Daniel Vetter <daniel@ffwll.ch>
+F: drivers/video/fbdev/core/
+S: Odd Fixes
+T: git git://anongit.freedesktop.org/drm/drm-misc
+
FRAMEBUFFER LAYER
M: Helge Deller <deller@gmx.de>
L: linux-fbdev@vger.kernel.org
@@ -7736,8 +7761,7 @@ M: Qiang Zhao <qiang.zhao@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/soc/fsl/qe/
-F: include/soc/fsl/*qe*.h
-F: include/soc/fsl/*ucc*.h
+F: include/soc/fsl/qe/
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
M: Li Yang <leoyang.li@nxp.com>
@@ -7768,6 +7792,7 @@ F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
F: include/linux/fsl/
+F: include/soc/fsl/
FREESCALE SOC FS_ENET DRIVER
M: Pantelis Antoniou <pantelis.antoniou@gmail.com>
@@ -8635,7 +8660,7 @@ S: Maintained
F: drivers/gpio/gpio-hisi.c
HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
-M: Zaibo Xu <xuzaibo@huawei.com>
+M: Longfang Liu <liulongfang@huawei.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: Documentation/ABI/testing/debugfs-hisi-hpre
@@ -8715,8 +8740,8 @@ F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
F: drivers/scsi/hisi_sas/
HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
-M: Zaibo Xu <xuzaibo@huawei.com>
M: Kai Ye <yekai13@huawei.com>
+M: Longfang Liu <liulongfang@huawei.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: Documentation/ABI/testing/debugfs-hisi-sec
@@ -8747,7 +8772,7 @@ F: Documentation/devicetree/bindings/mfd/hisilicon,hi6421-spmi-pmic.yaml
F: drivers/mfd/hi6421-spmi-pmic.c
HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
-M: Zaibo Xu <xuzaibo@huawei.com>
+M: Weili Qian <qianweili@huawei.com>
S: Maintained
F: drivers/crypto/hisilicon/trng/trng.c
@@ -9250,6 +9275,15 @@ S: Maintained
W: https://github.com/o2genum/ideapad-slidebar
F: drivers/input/misc/ideapad_slidebar.c
+IDMAPPED MOUNTS
+M: Christian Brauner <brauner@kernel.org>
+L: linux-fsdevel@vger.kernel.org
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
+F: Documentation/filesystems/idmappings.rst
+F: tools/testing/selftests/mount_setattr/
+F: include/linux/mnt_idmapping.h
+
IDT VersaClock 5 CLOCK DRIVER
M: Luca Ceresoli <luca@lucaceresoli.net>
S: Maintained
@@ -9507,6 +9541,7 @@ L: linux-integrity@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
F: security/integrity/ima/
+F: security/integrity/
INTEL 810/815 FRAMEBUFFER DRIVER
M: Antonino Daplas <adaplas@gmail.com>
@@ -10438,6 +10473,8 @@ KERNEL REGRESSIONS
M: Thorsten Leemhuis <linux@leemhuis.info>
L: regressions@lists.linux.dev
S: Supported
+F: Documentation/admin-guide/reporting-regressions.rst
+F: Documentation/process/handling-regressions.rst
KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <shuah@kernel.org>
@@ -10643,6 +10680,14 @@ F: include/linux/keyctl.h
F: include/uapi/linux/keyctl.h
F: security/keys/
+KEYS/KEYRINGS_INTEGRITY
+M: Jarkko Sakkinen <jarkko@kernel.org>
+M: Mimi Zohar <zohar@linux.ibm.com>
+L: linux-integrity@vger.kernel.org
+L: keyrings@vger.kernel.org
+S: Supported
+F: security/integrity/platform_certs
+
KFENCE
M: Alexander Potapenko <glider@google.com>
M: Marco Elver <elver@google.com>
@@ -10877,6 +10922,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: drivers/ata/pata_arasan_cf.c
F: include/linux/pata_arasan_cf_data.h
+LIBATA PATA DRIVERS
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
+L: linux-ide@vger.kernel.org
+F: drivers/ata/ata_*.c
+F: drivers/ata/pata_*.c
+
LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
M: Linus Walleij <linus.walleij@linaro.org>
L: linux-ide@vger.kernel.org
@@ -11366,8 +11417,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/networking/mac80211-injection.rst
F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst
F: drivers/net/wireless/mac80211_hwsim.[ch]
@@ -11651,7 +11703,7 @@ F: drivers/iio/proximity/mb1232.c
MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
R: Iskren Chernev <iskren.chernev@gmail.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Matheus Castello <matheus@castello.eng.br>
L: linux-pm@vger.kernel.org
@@ -11661,7 +11713,7 @@ F: drivers/power/supply/max17040_battery.c
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
R: Hans de Goede <hdegoede@redhat.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
R: Purism Kernel Team <kernel@puri.sm>
@@ -11695,7 +11747,7 @@ MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
M: Javier Martinez Canillas <javier@dowhile0.org>
L: linux-kernel@vger.kernel.org
S: Supported
-F: Documentation/devicetree/bindings/*/*max77802.txt
+F: Documentation/devicetree/bindings/*/*max77802.yaml
F: drivers/regulator/max77802-regulator.c
F: include/dt-bindings/*/*max77802.h
@@ -11706,23 +11758,26 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
F: drivers/power/supply/max77976_charger.c
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-pm@vger.kernel.org
S: Supported
+F: Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
F: drivers/power/supply/max14577_charger.c
F: drivers/power/supply/max77693_charger.c
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
M: Chanwoo Choi <cw00.choi@samsung.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-kernel@vger.kernel.org
S: Supported
+F: Documentation/devicetree/bindings/*/maxim,max14577.yaml
F: Documentation/devicetree/bindings/*/maxim,max77686.yaml
+F: Documentation/devicetree/bindings/*/maxim,max77843.yaml
F: Documentation/devicetree/bindings/clock/maxim,max77686.txt
-F: Documentation/devicetree/bindings/mfd/max14577.txt
F: Documentation/devicetree/bindings/mfd/max77693.txt
+F: drivers/*/*max77843.c
F: drivers/*/max14577*.c
F: drivers/*/max77686*.c
F: drivers/*/max77693*.c
@@ -12396,7 +12451,7 @@ F: include/uapi/linux/membarrier.h
F: kernel/sched/membarrier.c
MEMBLOCK
-M: Mike Rapoport <rppt@linux.ibm.com>
+M: Mike Rapoport <rppt@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/core-api/boot-time-mm.rst
@@ -12404,7 +12459,7 @@ F: include/linux/memblock.h
F: mm/memblock.c
MEMORY CONTROLLER DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
@@ -13294,8 +13349,8 @@ W: http://www.iptables.org/
W: http://www.nftables.org/
Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/
C: irc://irc.libera.chat/netfilter
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next.git
F: include/linux/netfilter*
F: include/linux/netfilter/
F: include/net/netfilter/
@@ -13352,6 +13407,7 @@ F: net/core/drop_monitor.c
NETWORKING DRIVERS
M: "David S. Miller" <davem@davemloft.net>
M: Jakub Kicinski <kuba@kernel.org>
+M: Paolo Abeni <pabeni@redhat.com>
L: netdev@vger.kernel.org
S: Maintained
Q: https://patchwork.kernel.org/project/netdevbpf/list/
@@ -13374,9 +13430,10 @@ NETWORKING DRIVERS (WIRELESS)
M: Kalle Valo <kvalo@kernel.org>
L: linux-wireless@vger.kernel.org
S: Maintained
-Q: http://patchwork.kernel.org/project/linux-wireless/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git
+W: https://wireless.wiki.kernel.org/
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/devicetree/bindings/net/wireless/
F: drivers/net/wireless/
@@ -13397,6 +13454,7 @@ F: tools/testing/selftests/drivers/net/dsa/
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
M: Jakub Kicinski <kuba@kernel.org>
+M: Paolo Abeni <pabeni@redhat.com>
L: netdev@vger.kernel.org
S: Maintained
Q: https://patchwork.kernel.org/project/netdevbpf/list/
@@ -13449,7 +13507,11 @@ L: netdev@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
F: arch/x86/net/*
+F: include/linux/ip.h
+F: include/linux/ipv6*
+F: include/net/fib*
F: include/net/ip*
+F: include/net/route.h
F: net/ipv4/
F: net/ipv6/
@@ -13510,10 +13572,6 @@ F: include/net/tls.h
F: include/uapi/linux/tls.h
F: net/tls/*
-NETWORKING [WIRELESS]
-L: linux-wireless@vger.kernel.org
-Q: http://patchwork.kernel.org/project/linux-wireless/list/
-
NETXEN (1/10) GbE SUPPORT
M: Manish Chopra <manishc@marvell.com>
M: Rahul Verma <rahulv@marvell.com>
@@ -13540,7 +13598,7 @@ F: include/uapi/linux/nexthop.h
F: net/ipv4/nexthop.c
NFC SUBSYSTEM
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-nfc@lists.01.org (subscribers-only)
L: netdev@vger.kernel.org
S: Maintained
@@ -13561,7 +13619,7 @@ F: tools/testing/selftests/nci/
NFS, SUNRPC, AND LOCKD CLIENTS
M: Trond Myklebust <trond.myklebust@hammerspace.com>
-M: Anna Schumaker <anna.schumaker@netapp.com>
+M: Anna Schumaker <anna@kernel.org>
L: linux-nfs@vger.kernel.org
S: Maintained
W: http://client.linux-nfs.org
@@ -13674,7 +13732,7 @@ F: scripts/nsdeps
NTB AMD DRIVER
M: Sanjay R Mehta <sanju.mehta@amd.com>
M: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
-L: linux-ntb@googlegroups.com
+L: ntb@lists.linux.dev
S: Supported
F: drivers/ntb/hw/amd/
@@ -13682,7 +13740,7 @@ NTB DRIVER CORE
M: Jon Mason <jdmason@kudzu.us>
M: Dave Jiang <dave.jiang@intel.com>
M: Allen Hubbe <allenbh@gmail.com>
-L: linux-ntb@googlegroups.com
+L: ntb@lists.linux.dev
S: Supported
W: https://github.com/jonmason/ntb/wiki
T: git git://github.com/jonmason/ntb.git
@@ -13694,13 +13752,13 @@ F: tools/testing/selftests/ntb/
NTB IDT DRIVER
M: Serge Semin <fancer.lancer@gmail.com>
-L: linux-ntb@googlegroups.com
+L: ntb@lists.linux.dev
S: Supported
F: drivers/ntb/hw/idt/
NTB INTEL DRIVER
M: Dave Jiang <dave.jiang@intel.com>
-L: linux-ntb@googlegroups.com
+L: ntb@lists.linux.dev
S: Supported
W: https://github.com/davejiang/linux/wiki
T: git https://github.com/davejiang/linux.git
@@ -13854,7 +13912,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -14379,6 +14437,7 @@ M: Rob Herring <robh+dt@kernel.org>
M: Frank Rowand <frowand.list@gmail.com>
L: devicetree@vger.kernel.org
S: Maintained
+C: irc://irc.libera.chat/devicetree
W: http://www.devicetree.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
F: Documentation/ABI/testing/sysfs-firmware-ofw
@@ -14390,6 +14449,7 @@ OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
M: Rob Herring <robh+dt@kernel.org>
L: devicetree@vger.kernel.org
S: Maintained
+C: irc://irc.libera.chat/devicetree
Q: http://patchwork.ozlabs.org/project/devicetree-bindings/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
F: Documentation/devicetree/
@@ -14576,8 +14636,9 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-M: Deep Shah <sdeep@vmware.com>
-M: "VMware, Inc." <pv-drivers@vmware.com>
+M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+R: Alexey Makhalov <amakhalov@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
L: x86@kernel.org
S: Supported
@@ -15123,7 +15184,7 @@ M: Ingo Molnar <mingo@redhat.com>
M: Arnaldo Carvalho de Melo <acme@kernel.org>
R: Mark Rutland <mark.rutland@arm.com>
R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
-R: Jiri Olsa <jolsa@redhat.com>
+R: Jiri Olsa <jolsa@kernel.org>
R: Namhyung Kim <namhyung@kernel.org>
L: linux-perf-users@vger.kernel.org
L: linux-kernel@vger.kernel.org
@@ -15278,11 +15339,13 @@ F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
M: Tomasz Figa <tomasz.figa@gmail.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
+R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
S: Maintained
+C: irc://irc.libera.chat/linux-exynos
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git
F: Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
@@ -15504,6 +15567,7 @@ F: drivers/net/ppp/pptp.c
PRESSURE STALL INFORMATION (PSI)
M: Johannes Weiner <hannes@cmpxchg.org>
+M: Suren Baghdasaryan <surenb@google.com>
S: Maintained
F: include/linux/psi*
F: kernel/sched/psi.c
@@ -15539,6 +15603,7 @@ M: Iurii Zaikin <yzaikin@google.com>
L: linux-kernel@vger.kernel.org
L: linux-fsdevel@vger.kernel.org
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git sysctl-next
F: fs/proc/proc_sysctl.c
F: include/linux/sysctl.h
F: kernel/sysctl-test.c
@@ -15886,6 +15951,7 @@ S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: drivers/net/wireless/ath/ath10k/
+F: Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
@@ -15893,11 +15959,12 @@ L: ath11k@lists.infradead.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: drivers/net/wireless/ath/ath11k/
+F: Documentation/devicetree/bindings/net/wireless/qcom,ath11k.txt
QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
-M: ath9k-devel@qca.qualcomm.com
+M: Toke Høiland-Jørgensen <toke@toke.dk>
L: linux-wireless@vger.kernel.org
-S: Supported
+S: Maintained
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k
F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
F: drivers/net/wireless/ath/ath9k/
@@ -15972,14 +16039,6 @@ F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt
F: drivers/misc/fastrpc.c
F: include/uapi/misc/fastrpc.h
-QUALCOMM GENERIC INTERFACE I2C DRIVER
-M: Akash Asthana <akashast@codeaurora.org>
-M: Mukesh Savaliya <msavaliy@codeaurora.org>
-L: linux-i2c@vger.kernel.org
-L: linux-arm-msm@vger.kernel.org
-S: Supported
-F: drivers/i2c/busses/i2c-qcom-geni.c
-
QUALCOMM HEXAGON ARCHITECTURE
M: Brian Cain <bcain@codeaurora.org>
L: linux-hexagon@vger.kernel.org
@@ -16051,8 +16110,8 @@ F: Documentation/devicetree/bindings/mtd/qcom,nandc.yaml
F: drivers/mtd/nand/raw/qcom_nandc.c
QUALCOMM RMNET DRIVER
-M: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
-M: Sean Tranchetti <stranche@codeaurora.org>
+M: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
+M: Sean Tranchetti <quic_stranche@quicinc.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst
@@ -16078,11 +16137,10 @@ F: Documentation/devicetree/bindings/media/*venus*
F: drivers/media/platform/qcom/venus/
QUALCOMM WCN36XX WIRELESS DRIVER
-M: Kalle Valo <kvalo@kernel.org>
+M: Loic Poulain <loic.poulain@linaro.org>
L: wcn36xx@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx
-T: git git://github.com/KrasnikovEugene/wcn36xx.git
F: drivers/net/wireless/ath/wcn36xx/
QUANTENNA QTNFMAC WIRELESS DRIVER
@@ -16184,6 +16242,7 @@ M: Jason A. Donenfeld <Jason@zx2c4.com>
T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
S: Maintained
F: drivers/char/random.c
+F: drivers/virt/vmgenid.c
RAPIDIO SUBSYSTEM
M: Matt Porter <mporter@kernel.crashing.org>
@@ -16298,6 +16357,8 @@ F: tools/testing/selftests/resctrl/
READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@kernel.org>
+M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
+M: Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h)
M: Josh Triplett <josh@joshtriplett.org>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -16345,6 +16406,7 @@ F: drivers/watchdog/realtek_otto_wdt.c
REALTEK RTL83xx SMI DSA ROUTER CHIPS
M: Linus Walleij <linus.walleij@linaro.org>
+M: Alvin Å ipraga <alsi@bang-olufsen.dk>
S: Maintained
F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
F: drivers/net/dsa/realtek-smi*
@@ -16464,6 +16526,14 @@ F: Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml
F: drivers/i2c/busses/i2c-rcar.c
F: drivers/i2c/busses/i2c-sh_mobile.c
+RENESAS R-CAR SATA DRIVER
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
+S: Supported
+L: linux-ide@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+F: drivers/ata/sata_rcar.c
+
RENESAS R-CAR THERMAL DRIVERS
M: Niklas Söderlund <niklas.soderlund@ragnatech.se>
L: linux-renesas-soc@vger.kernel.org
@@ -16532,8 +16602,9 @@ M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
+Q: https://patchwork.kernel.org/project/linux-wireless/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/ABI/stable/sysfs-class-rfkill
F: Documentation/driver-api/rfkill.rst
F: include/linux/rfkill.h
@@ -16798,8 +16869,8 @@ F: drivers/video/fbdev/savage/
S390
M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com>
-M: Christian Borntraeger <borntraeger@linux.ibm.com>
-R: Alexander Gordeev <agordeev@linux.ibm.com>
+M: Alexander Gordeev <agordeev@linux.ibm.com>
+R: Christian Borntraeger <borntraeger@linux.ibm.com>
R: Sven Schnelle <svens@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
@@ -16914,7 +16985,7 @@ W: http://www.ibm.com/developerworks/linux/linux390/
F: drivers/s390/scsi/zfcp_*
S3C ADC BATTERY DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-samsung-soc@vger.kernel.org
S: Odd Fixes
F: drivers/power/supply/s3c_adc_battery.c
@@ -16959,7 +17030,7 @@ F: Documentation/admin-guide/LSM/SafeSetID.rst
F: security/safesetid/
SAMSUNG AUDIO (ASoC) DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
@@ -16967,7 +17038,7 @@ F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -17002,7 +17073,7 @@ S: Maintained
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -17028,7 +17099,7 @@ F: drivers/media/platform/s3c-camif/
F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Krzysztof Opasiak <k.opasiak@samsung.com>
L: linux-nfc@lists.01.org (subscribers-only)
S: Maintained
@@ -17050,7 +17121,7 @@ S: Supported
F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Vladimir Zapolskiy <vz@mleia.com>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -17070,6 +17141,7 @@ SAMSUNG SOC CLOCK DRIVERS
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
M: Tomasz Figa <tomasz.figa@gmail.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
+R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-samsung-soc@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
@@ -17084,12 +17156,12 @@ F: include/linux/clk/samsung.h
F: include/linux/platform_data/clk-s3c2410.h
SAMSUNG SPI DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Andi Shyti <andi@etezian.org>
L: linux-spi@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/spi/spi-samsung.txt
+F: Documentation/devicetree/bindings/spi/samsung,spi*.yaml
F: drivers/spi/spi-s3c*
F: include/linux/platform_data/spi-s3c64xx.h
F: include/linux/spi/s3c24xx-fiq.h
@@ -17101,11 +17173,12 @@ S: Supported
F: drivers/net/ethernet/samsung/sxgbe/
SAMSUNG THERMAL DRIVER
-M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
-S: Supported
-T: git https://github.com/lmajewski/linux-samsung-thermal.git
+S: Maintained
+F: Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
F: drivers/thermal/samsung/
SAMSUNG USB2 PHY DRIVER
@@ -17706,6 +17779,21 @@ S: Maintained
W: http://www.winischhofer.at/linuxsisusbvga.shtml
F: drivers/usb/misc/sisusbvga/
+SL28 CPLD MFD DRIVER
+M: Michael Walle <michael@walle.cc>
+S: Maintained
+F: Documentation/devicetree/bindings/gpio/kontron,sl28cpld-gpio.yaml
+F: Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml
+F: Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc.yaml
+F: Documentation/devicetree/bindings/mfd/kontron,sl28cpld.yaml
+F: Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml
+F: Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml
+F: drivers/gpio/gpio-sl28cpld.c
+F: drivers/hwmon/sl28cpld-hwmon.c
+F: drivers/irqchip/irq-sl28cpld.c
+F: drivers/pwm/pwm-sl28cpld.c
+F: drivers/watchdog/sl28cpld_wdt.c
+
SLAB ALLOCATOR
M: Christoph Lameter <cl@linux.com>
M: Pekka Enberg <penberg@kernel.org>
@@ -17713,8 +17801,10 @@ M: David Rientjes <rientjes@google.com>
M: Joonsoo Kim <iamjoonsoo.kim@lge.com>
M: Andrew Morton <akpm@linux-foundation.org>
M: Vlastimil Babka <vbabka@suse.cz>
+R: Roman Gushchin <roman.gushchin@linux.dev>
L: linux-mm@kvack.org
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
F: include/linux/sl?b*.h
F: mm/sl?b*
@@ -18422,7 +18512,7 @@ F: Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
F: sound/soc/sti/
STI CEC DRIVER
-M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
+M: Alain Volmat <alain.volmat@foss.st.com>
S: Maintained
F: Documentation/devicetree/bindings/media/stih-cec.txt
F: drivers/media/cec/platform/sti/
@@ -18491,6 +18581,13 @@ S: Maintained
F: Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
F: drivers/rtc/rtc-sunplus.c
+SUNPLUS SPI CONTROLLER INTERFACE DRIVER
+M: Li-hao Kuo <lhjeff911@gmail.com>
+L: linux-spi@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
+F: drivers/spi/spi-sunplus-sp7021.c
+
SUPERH
M: Yoshinori Sato <ysato@users.sourceforge.jp>
M: Rich Felker <dalias@libc.org>
@@ -19438,6 +19535,15 @@ S: Maintained
F: Documentation/hwmon/tmp401.rst
F: drivers/hwmon/tmp401.c
+TMP464 HARDWARE MONITOR DRIVER
+M: Agathe Porte <agathe.porte@nokia.com>
+M: Guenter Roeck <linux@roeck-us.net>
+L: linux-hwmon@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
+F: Documentation/hwmon/tmp464.rst
+F: drivers/hwmon/tmp464.c
+
TMP513 HARDWARE MONITOR DRIVER
M: Eric Tremblay <etremblay@distech-controls.com>
L: linux-hwmon@vger.kernel.org
@@ -20587,30 +20693,33 @@ F: tools/testing/vsock/
VMWARE BALLOON DRIVER
M: Nadav Amit <namit@vmware.com>
-M: "VMware, Inc." <pv-drivers@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Deep Shah <sdeep@vmware.com>
-M: "VMware, Inc." <pv-drivers@vmware.com>
+M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+M: Alexey Makhalov <amakhalov@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
+L: x86@kernel.org
S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware
F: arch/x86/include/asm/vmware.h
F: arch/x86/kernel/cpu/vmware.c
VMWARE PVRDMA DRIVER
M: Bryan Tan <bryantan@vmware.com>
M: Vishnu Dasa <vdasa@vmware.com>
-M: VMware PV-Drivers <pv-drivers@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: drivers/infiniband/hw/vmw_pvrdma/
VMware PVSCSI driver
M: Vishal Bhakta <vbhakta@vmware.com>
-M: VMware PV-Drivers <pv-drivers@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: linux-scsi@vger.kernel.org
S: Maintained
F: drivers/scsi/vmw_pvscsi.c
@@ -20618,7 +20727,7 @@ F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
M: Vivek Thampi <vithampi@vmware.com>
-M: "VMware, Inc." <pv-drivers@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/ptp/ptp_vmw.c
@@ -20626,14 +20735,15 @@ F: drivers/ptp/ptp_vmw.c
VMWARE VMCI DRIVER
M: Jorgen Hansen <jhansen@vmware.com>
M: Vishnu Dasa <vdasa@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: linux-kernel@vger.kernel.org
-L: pv-drivers@vmware.com (private)
S: Maintained
F: drivers/misc/vmw_vmci/
VMWARE VMMOUSE SUBDRIVER
-M: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
-M: "VMware, Inc." <pv-drivers@vmware.com>
+M: Zack Rusin <zackr@vmware.com>
+R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com>
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/input/mouse/vmmouse.c
@@ -20641,7 +20751,7 @@ F: drivers/input/mouse/vmmouse.h
VMWARE VMXNET3 ETHERNET DRIVER
M: Ronak Doshi <doshir@vmware.com>
-M: pv-drivers@vmware.com
+R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/vmxnet3/
@@ -21237,6 +21347,11 @@ T: git https://github.com/Xilinx/linux-xlnx.git
F: Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
F: drivers/phy/xilinx/phy-zynqmp.c
+XILINX ZYNQMP SHA3 DRIVER
+M: Harsha <harsha.harsha@xilinx.com>
+S: Maintained
+F: drivers/crypto/xilinx/zynqmp-sha.c
+
XILINX EVENT MANAGEMENT DRIVER
M: Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
S: Maintained
@@ -21417,7 +21532,6 @@ THE REST
M: Linus Torvalds <torvalds@linux-foundation.org>
L: linux-kernel@vger.kernel.org
S: Buried alive in reporters
-Q: http://patchwork.kernel.org/project/LKML/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
F: *
F: */
diff --git a/Makefile b/Makefile
index 0fb4f94a6885..7214f075e1f0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
VERSION = 5
PATCHLEVEL = 17
SUBLEVEL = 0
-EXTRAVERSION = -rc1
-NAME = Gobble Gobble
+EXTRAVERSION =
+NAME = Superb Owl
# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
diff --git a/arch/Kconfig b/arch/Kconfig
index 678a80713b21..17709ac1030a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -599,21 +599,22 @@ config STACKPROTECTOR_STRONG
config ARCH_SUPPORTS_SHADOW_CALL_STACK
bool
help
- An architecture should select this if it supports Clang's Shadow
- Call Stack and implements runtime support for shadow stack
+ An architecture should select this if it supports the compiler's
+ Shadow Call Stack and implements runtime support for shadow stack
switching.
config SHADOW_CALL_STACK
- bool "Clang Shadow Call Stack"
- depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool "Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
help
- This option enables Clang's Shadow Call Stack, which uses a
- shadow stack to protect function return addresses from being
- overwritten by an attacker. More information can be found in
- Clang's documentation:
+ This option enables the compiler's Shadow Call Stack, which
+ uses a shadow stack to protect function return addresses from
+ being overwritten by an attacker. More information can be found
+ in the compiler's documentation:
- https://clang.llvm.org/docs/ShadowCallStack.html
+ - Clang: https://clang.llvm.org/docs/ShadowCallStack.html
+ - GCC: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#Instrumentation-Options
Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
@@ -1159,16 +1160,30 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
to the compiler, so it will attempt to add canary checks regardless
of the static branch state.
-config RANDOMIZE_KSTACK_OFFSET_DEFAULT
- bool "Randomize kernel stack offset on syscall entry"
+config RANDOMIZE_KSTACK_OFFSET
+ bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT
+ default y
depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000
help
The kernel stack offset can be randomized (after pt_regs) by
roughly 5 bits of entropy, frustrating memory corruption
attacks that depend on stack address determinism or
- cross-syscall address exposures. This feature is controlled
- by kernel boot param "randomize_kstack_offset=on/off", and this
- config chooses the default boot state.
+ cross-syscall address exposures.
+
+ The feature is controlled via the "randomize_kstack_offset=on/off"
+ kernel boot param, and if turned off has zero overhead due to its use
+ of static branches (see JUMP_LABEL).
+
+ If unsure, say Y.
+
+config RANDOMIZE_KSTACK_OFFSET_DEFAULT
+ bool "Default state of kernel stack offset randomization"
+ depends on RANDOMIZE_KSTACK_OFFSET
+ help
+ Kernel stack offset randomization is controlled by kernel boot param
+ "randomize_kstack_offset=on/off", and this config chooses the default
+ boot state.
config ARCH_OPTIONAL_KERNEL_RWX
def_bool n
@@ -1278,12 +1293,41 @@ config HAVE_STATIC_CALL_INLINE
config HAVE_PREEMPT_DYNAMIC
bool
+
+config HAVE_PREEMPT_DYNAMIC_CALL
+ bool
depends on HAVE_STATIC_CALL
- depends on GENERIC_ENTRY
+ select HAVE_PREEMPT_DYNAMIC
help
- Select this if the architecture support boot time preempt setting
- on top of static calls. It is strongly advised to support inline
- static call to avoid any overhead.
+ An architecture should select this if it can handle the preemption
+ model being selected at boot time using static calls.
+
+ Where an architecture selects HAVE_STATIC_CALL_INLINE, any call to a
+ preemption function will be patched directly.
+
+ Where an architecture does not select HAVE_STATIC_CALL_INLINE, any
+ call to a preemption function will go through a trampoline, and the
+ trampoline will be patched.
+
+ It is strongly advised to support inline static call to avoid any
+ overhead.
+
+config HAVE_PREEMPT_DYNAMIC_KEY
+ bool
+ depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO
+ select HAVE_PREEMPT_DYNAMIC
+ help
+ An architecture should select this if it can handle the preemption
+ model being selected at boot time using static keys.
+
+ Each preemption function will be given an early return based on a
+ static key. This should have slightly lower overhead than non-inline
+ static calls, as this effectively inlines each trampoline into the
+ start of its callee. This may avoid redundant work, and may
+ integrate better with CFI schemes.
+
+ This will have greater overhead than using inline static calls as
+ the call to the preemption function cannot be entirely elided.
config ARCH_WANT_LD_ORPHAN_WARN
bool
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 4e87783c90ad..14c97acea351 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -12,7 +12,6 @@ config ALPHA
select FORCE_PCI if !ALPHA_JENSEN
select PCI_DOMAINS if PCI
select PCI_SYSCALL if PCI
- select HAVE_AOUT
select HAVE_ASM_MODVERSIONS
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index 7f1ca30b115b..7e9336930880 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -62,7 +62,6 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=m
CONFIG_NFS_V3=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 02f0429f1068..170451fde043 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -233,6 +233,7 @@ pmd_page_vaddr(pmd_t pmd)
return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET;
}
+#define pmd_pfn(pmd) (pmd_val(pmd) >> 32)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
#define pud_page(pud) (pfn_to_page(pud_val(pud) >> 32))
diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
index 5aeb4fb3cb7c..e0de0c233ab9 100644
--- a/arch/alpha/include/asm/xor.h
+++ b/arch/alpha/include/asm/xor.h
@@ -5,24 +5,43 @@
* Optimized RAID-5 checksumming functions for alpha EV5 and EV6
*/
-extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
- unsigned long *);
-extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
-extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *, unsigned long *);
+extern void
+xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+extern void
+xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+extern void
+xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+extern void
+xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
-extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
- unsigned long *);
-extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
- unsigned long *, unsigned long *);
-extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
- unsigned long *, unsigned long *,
- unsigned long *);
-extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
- unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
+extern void
+xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+extern void
+xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+extern void
+xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+extern void
+xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
asm(" \n\
.text \n\
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index f6114d03357c..7511723b7669 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -76,14 +76,14 @@ pgd_alloc(struct mm_struct *mm)
pmd_t *
__bad_pagetable(void)
{
- memset((void *) EMPTY_PGT, 0, PAGE_SIZE);
+ memset(absolute_pointer(EMPTY_PGT), 0, PAGE_SIZE);
return (pmd_t *) EMPTY_PGT;
}
pte_t
__bad_page(void)
{
- memset((void *) EMPTY_PGE, 0, PAGE_SIZE);
+ memset(absolute_pointer(EMPTY_PGE), 0, PAGE_SIZE);
return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED));
}
@@ -253,7 +253,7 @@ void __init paging_init(void)
free_area_init(max_zone_pfn);
/* Initialize the kernel's ZERO_PGE. */
- memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+ memset(absolute_pointer(ZERO_PGE), 0, PAGE_SIZE);
}
#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 11b0ff26b97b..5001b796fb8d 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -31,7 +31,6 @@ static inline pmd_t pte_pmd(pte_t pte)
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
-#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot))
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index 8084ef2f6491..7848348719b2 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -161,6 +161,7 @@
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
+#define pmd_pfn(pmd) ((pmd_val(pmd) & PAGE_MASK) >> PAGE_SHIFT)
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
#define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c97cb40eebb..49652ef9eff4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,6 +5,7 @@ config ARM
select ARCH_32BIT_OFF_T
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
@@ -37,6 +38,7 @@ config ARM
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+ select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_LD_ORPHAN_WARN
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
@@ -1508,9 +1510,6 @@ config HW_PERF_EVENTS
def_bool y
depends on ARM_PMU
-config ARCH_WANT_GENERAL_HUGETLB
- def_bool y
-
config ARM_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 235ad559acb2..e41eca79c950 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -806,6 +806,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \
logicpd-som-lv-37xx-devkit.dtb \
omap3430-sdp.dtb \
omap3-beagle.dtb \
+ omap3-beagle-ab4.dtb \
omap3-beagle-xm.dtb \
omap3-beagle-xm-ab.dtb \
omap3-cm-t3517.dtb \
diff --git a/arch/arm/boot/dts/am335x-wega.dtsi b/arch/arm/boot/dts/am335x-wega.dtsi
index 673159d93a6a..f957fea8208e 100644
--- a/arch/arm/boot/dts/am335x-wega.dtsi
+++ b/arch/arm/boot/dts/am335x-wega.dtsi
@@ -55,7 +55,7 @@
2 1 0 0 /* # 0: INACTIVE, 1: TX, 2: RX */
>;
tx-num-evt = <16>;
- rt-num-evt = <16>;
+ rx-num-evt = <16>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
index 6dde51c2aed3..e4775bbceecc 100644
--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
@@ -118,7 +118,7 @@
};
pinctrl_fwqspid_default: fwqspid_default {
- function = "FWQSPID";
+ function = "FWSPID";
groups = "FWQSPID";
};
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index dff18fc9a906..21294f775a20 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -290,6 +290,7 @@
hvs: hvs@7e400000 {
compatible = "brcm,bcm2711-hvs";
+ reg = <0x7e400000 0x8000>;
interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 956a26d52a4c..0a11bacffc1f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3482,8 +3482,7 @@
ti,timer-pwm;
};
};
-
- target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
+ timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x2c000 0x4>,
<0x2c010 0x4>;
@@ -3511,7 +3510,7 @@
};
};
- target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
+ timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x2e000 0x4>,
<0x2e010 0x4>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 6b485cbed8d5..97ce0c4f1df7 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -160,7 +160,7 @@
target-module@48210000 {
compatible = "ti,sysc-omap4-simple", "ti,sysc";
power-domains = <&prm_mpu>;
- clocks = <&mpu_clkctrl DRA7_MPU_CLKCTRL 0>;
+ clocks = <&mpu_clkctrl DRA7_MPU_MPU_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -875,10 +875,10 @@
<0x58000014 4>;
reg-names = "rev", "syss";
ti,syss-mask = <1>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 0>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 10>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 11>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 0>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 10>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 11>;
clock-names = "fck", "hdmi_clk", "sys_clk", "tv_clk";
#address-cells = <1>;
#size-cells = <1>;
@@ -912,7 +912,7 @@
SYSC_OMAP2_SOFTRESET |
SYSC_OMAP2_AUTOIDLE)>;
ti,syss-mask = <1>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -939,8 +939,8 @@
<SYSC_IDLE_SMART>,
<SYSC_IDLE_SMART_WKUP>;
ti,sysc-mask = <(SYSC_OMAP4_SOFTRESET)>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>;
clock-names = "fck", "dss_clk";
#address-cells = <1>;
#size-cells = <1>;
@@ -979,7 +979,7 @@
compatible = "vivante,gc";
reg = <0x0 0x700>;
interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dss_clkctrl DRA7_BB2D_CLKCTRL 0>;
+ clocks = <&dss_clkctrl DRA7_DSS_BB2D_CLKCTRL 0>;
clock-names = "core";
};
};
@@ -1333,26 +1333,26 @@
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
- assigned-clocks = <&wkupaon_clkctrl DRA7_TIMER1_CLKCTRL 24>;
+ assigned-clocks = <&wkupaon_clkctrl DRA7_WKUPAON_TIMER1_CLKCTRL 24>;
assigned-clock-parents = <&sys_32k_ck>;
};
};
/* Local timers, see ARM architected timer wrap erratum i940 */
-&timer3_target {
+&timer15_target {
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
- assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
+ assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
assigned-clock-parents = <&timer_sys_clk_div>;
};
};
-&timer4_target {
+&timer16_target {
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
- assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
+ assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
assigned-clock-parents = <&timer_sys_clk_div>;
};
};
diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts
index 8cbaf1c81174..3b609d987d88 100644
--- a/arch/arm/boot/dts/imx23-evk.dts
+++ b/arch/arm/boot/dts/imx23-evk.dts
@@ -79,7 +79,6 @@
MX23_PAD_LCD_RESET__GPIO_1_18
MX23_PAD_PWM3__GPIO_1_29
MX23_PAD_PWM4__GPIO_1_30
- MX23_PAD_SSP1_DETECT__SSP1_DETECT
>;
fsl,drive-strength = <MXS_DRIVE_4mA>;
fsl,voltage = <MXS_VOLTAGE_HIGH>;
diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index d07d8f83456d..ccfa8e320be6 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -5,6 +5,8 @@
* Author: Fabio Estevam <fabio.estevam@freescale.com>
*/
+#include <dt-bindings/gpio/gpio.h>
+
/ {
aliases {
backlight = &backlight;
@@ -226,6 +228,7 @@
MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
+ MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0
>;
};
@@ -304,7 +307,7 @@
&usdhc3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc3>;
- non-removable;
+ cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
index b7ea37ad4e55..bcec98b96411 100644
--- a/arch/arm/boot/dts/imx7ulp.dtsi
+++ b/arch/arm/boot/dts/imx7ulp.dtsi
@@ -259,7 +259,7 @@
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
- assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+ assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
timeout-sec = <40>;
};
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 3be7cba603d5..26eaba3fa96f 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -59,7 +59,7 @@
};
uart_A: serial@84c0 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x84c0 0x18>;
interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
fifo-size = <128>;
@@ -67,7 +67,7 @@
};
uart_B: serial@84dc {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x84dc 0x18>;
interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
@@ -105,7 +105,7 @@
};
uart_C: serial@8700 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x8700 0x18>;
interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
@@ -228,7 +228,7 @@
};
uart_AO: serial@4c0 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart";
reg = <0x4c0 0x18>;
interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
index f80ddc98d3a2..9997a5d0333a 100644
--- a/arch/arm/boot/dts/meson8.dtsi
+++ b/arch/arm/boot/dts/meson8.dtsi
@@ -736,27 +736,27 @@
};
&uart_AO {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart";
+ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_A {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_B {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&usb0 {
diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi
index b49b7cbaed4e..94f1c03decce 100644
--- a/arch/arm/boot/dts/meson8b.dtsi
+++ b/arch/arm/boot/dts/meson8b.dtsi
@@ -724,27 +724,27 @@
};
&uart_AO {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart";
+ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_A {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_B {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&usb0 {
diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts
new file mode 100644
index 000000000000..990ff2d84686
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/dts-v1/;
+
+#include "omap3-beagle.dts"
+
+/ {
+ model = "TI OMAP3 BeagleBoard A to B4";
+ compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
+};
+
+/*
+ * Workaround for capacitor C70 issue, see "Boards revision A and < B5"
+ * section at https://elinux.org/BeagleBoard_Community
+ */
+
+/* Unusable as clocksource because of unreliable oscillator */
+&counter32k {
+ status = "disabled";
+};
+
+/* Unusable as clockevent because of unreliable oscillator, allow to idle */
+&timer1_target {
+ /delete-property/ti,no-reset-on-init;
+ /delete-property/ti,no-idle;
+ timer@0 {
+ /delete-property/ti,timer-alwon;
+ };
+};
+
+/* Preferred always-on timer for clocksource */
+&timer12_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ /* Always clocked by secure_32k_fck */
+ };
+};
+
+/* Preferred timer for clockevent */
+&timer2_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ assigned-clocks = <&gpt2_fck>;
+ assigned-clock-parents = <&sys_ck>;
+ };
+};
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index f9f34b8458e9..0548b391334f 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -304,39 +304,6 @@
phys = <0 &hsusb2_phy>;
};
-/* Unusable as clocksource because of unreliable oscillator */
-&counter32k {
- status = "disabled";
-};
-
-/* Unusable as clockevent because if unreliable oscillator, allow to idle */
-&timer1_target {
- /delete-property/ti,no-reset-on-init;
- /delete-property/ti,no-idle;
- timer@0 {
- /delete-property/ti,timer-alwon;
- };
-};
-
-/* Preferred always-on timer for clocksource */
-&timer12_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- /* Always clocked by secure_32k_fck */
- };
-};
-
-/* Preferred timer for clockevent */
-&timer2_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- assigned-clocks = <&gpt2_fck>;
- assigned-clock-parents = <&sys_ck>;
- };
-};
-
&twl_gpio {
ti,use-leds;
/* pullups: BIT(1) */
diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
index 5e55198e4576..54cd37336be7 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
+++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
@@ -158,6 +158,24 @@
status = "disabled";
};
+/* Unusable as clockevent because if unreliable oscillator, allow to idle */
+&timer1_target {
+ /delete-property/ti,no-reset-on-init;
+ /delete-property/ti,no-idle;
+ timer@0 {
+ /delete-property/ti,timer-alwon;
+ };
+};
+
+/* Preferred timer for clockevent */
+&timer12_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ /* Always clocked by secure_32k_fck */
+ };
+};
+
&twl_gpio {
ti,use-leds;
/*
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index c2995a280729..162d0726b008 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -14,36 +14,3 @@
display2 = &tv0;
};
};
-
-/* Unusable as clocksource because of unreliable oscillator */
-&counter32k {
- status = "disabled";
-};
-
-/* Unusable as clockevent because if unreliable oscillator, allow to idle */
-&timer1_target {
- /delete-property/ti,no-reset-on-init;
- /delete-property/ti,no-idle;
- timer@0 {
- /delete-property/ti,timer-alwon;
- };
-};
-
-/* Preferred always-on timer for clocksource */
-&timer12_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- /* Always clocked by secure_32k_fck */
- };
-};
-
-/* Preferred timer for clockevent */
-&timer2_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- assigned-clocks = <&gpt2_fck>;
- assigned-clock-parents = <&sys_ck>;
- };
-};
diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi
index 8eed9e3a92e9..5868eb512f69 100644
--- a/arch/arm/boot/dts/rk322x.dtsi
+++ b/arch/arm/boot/dts/rk322x.dtsi
@@ -718,8 +718,8 @@
interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
assigned-clocks = <&cru SCLK_HDMI_PHY>;
assigned-clock-parents = <&hdmi_phy>;
- clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>;
- clock-names = "isfr", "iahb", "cec";
+ clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>;
+ clock-names = "iahb", "isfr", "cec";
pinctrl-names = "default";
pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>;
resets = <&cru SRST_HDMI_P>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index aaaa61875701..45a9d9b908d2 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -971,7 +971,7 @@
status = "disabled";
};
- crypto: cypto-controller@ff8a0000 {
+ crypto: crypto@ff8a0000 {
compatible = "rockchip,rk3288-crypto";
reg = <0x0 0xff8a0000 0x0 0x4000>;
interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts
index 367ba48aac3e..b587e4ec11e5 100644
--- a/arch/arm/boot/dts/spear320-hmi.dts
+++ b/arch/arm/boot/dts/spear320-hmi.dts
@@ -235,7 +235,6 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x41>;
- irq-over-gpio;
irq-gpios = <&gpiopinctrl 29 0x4>;
id = <0>;
blocks = <0x5>;
diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
index 580ca497f312..f8c5899fbdba 100644
--- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
+++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
@@ -185,10 +185,6 @@
cap-sd-highspeed;
cap-mmc-highspeed;
/* All direction control is used */
- st,sig-dir-cmd;
- st,sig-dir-dat0;
- st,sig-dir-dat2;
- st,sig-dir-dat31;
st,sig-pin-fbclk;
full-pwr-cycle;
vmmc-supply = <&ab8500_ldo_aux3_reg>;
diff --git a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
index d35fb79d2f51..4db43324dafa 100644
--- a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
+++ b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
@@ -5,7 +5,13 @@
/ {
/* Version of Nyan Big with 1080p panel */
- panel {
- compatible = "auo,b133htn01";
+ host1x@50000000 {
+ dpaux@545c0000 {
+ aux-bus {
+ panel: panel {
+ compatible = "auo,b133htn01";
+ };
+ };
+ };
};
};
diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts
index 1d2aac2cb6d0..fdc1d64dfff9 100644
--- a/arch/arm/boot/dts/tegra124-nyan-big.dts
+++ b/arch/arm/boot/dts/tegra124-nyan-big.dts
@@ -13,12 +13,15 @@
"google,nyan-big-rev1", "google,nyan-big-rev0",
"google,nyan-big", "google,nyan", "nvidia,tegra124";
- panel: panel {
- compatible = "auo,b133xtn01";
-
- power-supply = <&vdd_3v3_panel>;
- backlight = <&backlight>;
- ddc-i2c-bus = <&dpaux>;
+ host1x@50000000 {
+ dpaux@545c0000 {
+ aux-bus {
+ panel: panel {
+ compatible = "auo,b133xtn01";
+ backlight = <&backlight>;
+ };
+ };
+ };
};
mmc@700b0400 { /* SD Card on this bus */
diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts
index 677babde6460..abdf4456826f 100644
--- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts
+++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts
@@ -15,12 +15,15 @@
"google,nyan-blaze-rev0", "google,nyan-blaze",
"google,nyan", "nvidia,tegra124";
- panel: panel {
- compatible = "samsung,ltn140at29-301";
-
- power-supply = <&vdd_3v3_panel>;
- backlight = <&backlight>;
- ddc-i2c-bus = <&dpaux>;
+ host1x@50000000 {
+ dpaux@545c0000 {
+ aux-bus {
+ panel: panel {
+ compatible = "samsung,ltn140at29-301";
+ backlight = <&backlight>;
+ };
+ };
+ };
};
sound {
diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts
index 232c90604df9..6a9592ceb5f2 100644
--- a/arch/arm/boot/dts/tegra124-venice2.dts
+++ b/arch/arm/boot/dts/tegra124-venice2.dts
@@ -48,6 +48,13 @@
dpaux@545c0000 {
vdd-supply = <&vdd_3v3_panel>;
status = "okay";
+
+ aux-bus {
+ panel: panel {
+ compatible = "lg,lp129qe";
+ backlight = <&backlight>;
+ };
+ };
};
};
@@ -1080,13 +1087,6 @@
};
};
- panel: panel {
- compatible = "lg,lp129qe";
- power-supply = <&vdd_3v3_panel>;
- backlight = <&backlight>;
- ddc-i2c-bus = <&dpaux>;
- };
-
vdd_mux: regulator-mux {
compatible = "regulator-fixed";
regulator-name = "+VDD_MUX";
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index e849367c0566..b58d45a03607 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -258,7 +258,6 @@ CONFIG_MINIX_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index ec84d80096b1..0788a892e160 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -309,7 +309,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_SMB_FS=m
CONFIG_CIFS=m
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 6db871d4e077..015b7ef237de 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -283,7 +283,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_SMB_FS=m
CONFIG_CIFS=m
diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig
index 4dfe321a79f6..5b485722ccf9 100644
--- a/arch/arm/configs/integrator_defconfig
+++ b/arch/arm/configs/integrator_defconfig
@@ -79,7 +79,6 @@ CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig
index 18a21faa834c..de2cc6759cae 100644
--- a/arch/arm/configs/iop32x_defconfig
+++ b/arch/arm/configs/iop32x_defconfig
@@ -94,7 +94,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 33c917df7b32..b1bcb858216b 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -213,7 +213,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arm/configs/lart_defconfig b/arch/arm/configs/lart_defconfig
index b6ddb9884326..2dfa33d7dca3 100644
--- a/arch/arm/configs/lart_defconfig
+++ b/arch/arm/configs/lart_defconfig
@@ -59,7 +59,6 @@ CONFIG_CRAMFS=m
CONFIG_NFS_FS=m
CONFIG_NFS_V3=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/arm/configs/netwinder_defconfig b/arch/arm/configs/netwinder_defconfig
index 2e3b20ef0db1..c3c171cec91b 100644
--- a/arch/arm/configs/netwinder_defconfig
+++ b/arch/arm/configs/netwinder_defconfig
@@ -71,7 +71,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_SMB_FS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig
index d06aa64e05a1..c2d79a67f81f 100644
--- a/arch/arm/configs/versatile_defconfig
+++ b/arch/arm/configs/versatile_defconfig
@@ -86,7 +86,6 @@ CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_850=m
CONFIG_NLS_ISO8859_1=m
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig
index 989599ce5300..c28539bfd128 100644
--- a/arch/arm/configs/viper_defconfig
+++ b/arch/arm/configs/viper_defconfig
@@ -145,7 +145,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig
index d3b98c4d225b..25bb6995f105 100644
--- a/arch/arm/configs/zeus_defconfig
+++ b/arch/arm/configs/zeus_defconfig
@@ -160,7 +160,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index 7d0cc7f226a5..7b61032f29fa 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -758,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt)
ENDPROC(aesbs_cbc_decrypt)
.macro next_ctr, q
- vmov.32 \q\()h[1], r10
+ vmov \q\()h, r9, r10
adds r10, r10, #1
- vmov.32 \q\()h[0], r9
adcs r9, r9, #0
- vmov.32 \q\()l[1], r8
+ vmov \q\()l, r7, r8
adcs r8, r8, #0
- vmov.32 \q\()l[0], r7
adc r7, r7, #0
vrev32.8 \q, \q
.endm
/*
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
- * int rounds, int blocks, u8 ctr[], u8 final[])
+ * int rounds, int bytes, u8 ctr[])
*/
ENTRY(aesbs_ctr_encrypt)
mov ip, sp
push {r4-r10, lr}
- ldm ip, {r5-r7} // load args 4-6
- teq r7, #0
- addne r5, r5, #1 // one extra block if final != 0
-
+ ldm ip, {r5, r6} // load args 4-5
vld1.8 {q0}, [r6] // load counter
vrev32.8 q1, q0
vmov r9, r10, d3
@@ -792,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt)
adc r7, r7, #0
99: vmov q1, q0
+ sub lr, r5, #1
vmov q2, q0
+ adr ip, 0f
vmov q3, q0
+ and lr, lr, #112
vmov q4, q0
+ cmp r5, #112
vmov q5, q0
+ sub ip, ip, lr, lsl #1
vmov q6, q0
+ add ip, ip, lr, lsr #2
vmov q7, q0
-
- adr ip, 0f
- sub lr, r5, #1
- and lr, lr, #7
- cmp r5, #8
- sub ip, ip, lr, lsl #5
- sub ip, ip, lr, lsl #2
- movlt pc, ip // computed goto if blocks < 8
+ movle pc, ip // computed goto if bytes < 112
next_ctr q1
next_ctr q2
@@ -820,12 +814,14 @@ ENTRY(aesbs_ctr_encrypt)
bl aesbs_encrypt8
adr ip, 1f
- and lr, r5, #7
- cmp r5, #8
- movgt r4, #0
- ldrle r4, [sp, #40] // load final in the last round
- sub ip, ip, lr, lsl #2
- movlt pc, ip // computed goto if blocks < 8
+ sub lr, r5, #1
+ cmp r5, #128
+ bic lr, lr, #15
+ ands r4, r5, #15 // preserves C flag
+ teqcs r5, r5 // set Z flag if not last iteration
+ sub ip, ip, lr, lsr #2
+ rsb r4, r4, #16
+ movcc pc, ip // computed goto if bytes < 128
vld1.8 {q8}, [r1]!
vld1.8 {q9}, [r1]!
@@ -834,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt)
vld1.8 {q12}, [r1]!
vld1.8 {q13}, [r1]!
vld1.8 {q14}, [r1]!
- teq r4, #0 // skip last block if 'final'
-1: bne 2f
+1: subne r1, r1, r4
vld1.8 {q15}, [r1]!
-2: adr ip, 3f
- cmp r5, #8
- sub ip, ip, lr, lsl #3
- movlt pc, ip // computed goto if blocks < 8
+ add ip, ip, #2f - 1b
veor q0, q0, q8
- vst1.8 {q0}, [r0]!
veor q1, q1, q9
- vst1.8 {q1}, [r0]!
veor q4, q4, q10
- vst1.8 {q4}, [r0]!
veor q6, q6, q11
- vst1.8 {q6}, [r0]!
veor q3, q3, q12
- vst1.8 {q3}, [r0]!
veor q7, q7, q13
- vst1.8 {q7}, [r0]!
veor q2, q2, q14
+ bne 3f
+ veor q5, q5, q15
+
+ movcc pc, ip // computed goto if bytes < 128
+
+ vst1.8 {q0}, [r0]!
+ vst1.8 {q1}, [r0]!
+ vst1.8 {q4}, [r0]!
+ vst1.8 {q6}, [r0]!
+ vst1.8 {q3}, [r0]!
+ vst1.8 {q7}, [r0]!
vst1.8 {q2}, [r0]!
- teq r4, #0 // skip last block if 'final'
- W(bne) 5f
-3: veor q5, q5, q15
+2: subne r0, r0, r4
vst1.8 {q5}, [r0]!
-4: next_ctr q0
+ next_ctr q0
- subs r5, r5, #8
+ subs r5, r5, #128
bgt 99b
vst1.8 {q0}, [r6]
pop {r4-r10, pc}
-5: vst1.8 {q5}, [r4]
- b 4b
+3: adr lr, .Lpermute_table + 16
+ cmp r5, #16 // Z flag remains cleared
+ sub lr, lr, r4
+ vld1.8 {q8-q9}, [lr]
+ vtbl.8 d16, {q5}, d16
+ vtbl.8 d17, {q5}, d17
+ veor q5, q8, q15
+ bcc 4f // have to reload prev if R5 < 16
+ vtbx.8 d10, {q2}, d18
+ vtbx.8 d11, {q2}, d19
+ mov pc, ip // branch back to VST sequence
+
+4: sub r0, r0, r4
+ vshr.s8 q9, q9, #7 // create mask for VBIF
+ vld1.8 {q8}, [r0] // reload
+ vbif q5, q8, q9
+ vst1.8 {q5}, [r0]
+ pop {r4-r10, pc}
ENDPROC(aesbs_ctr_encrypt)
+ .align 6
+.Lpermute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
.macro next_tweak, out, in, const, tmp
vshr.s64 \tmp, \in, #63
vand \tmp, \tmp, \const
@@ -888,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt)
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[], int reorder_last_tweak)
*/
+ .align 6
__xts_prepare8:
vld1.8 {q14}, [r7] // load iv
vmov.i32 d30, #0x87 // compose tweak mask vector
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 5c6cd3c63cbc..f00f042ef357 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -37,7 +37,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
- int rounds, int blocks, u8 ctr[], u8 final[]);
+ int rounds, int blocks, u8 ctr[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], int);
@@ -243,32 +243,25 @@ static int ctr_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes > 0) {
- unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
- u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ int bytes = walk.nbytes;
- if (walk.nbytes < walk.total) {
- blocks = round_down(blocks,
- walk.stride / AES_BLOCK_SIZE);
- final = NULL;
- }
+ if (unlikely(bytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) - bytes,
+ src, bytes);
+ else if (walk.nbytes < walk.total)
+ bytes &= ~(8 * AES_BLOCK_SIZE - 1);
kernel_neon_begin();
- aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->rk, ctx->rounds, blocks, walk.iv, final);
+ aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv);
kernel_neon_end();
- if (final) {
- u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
- u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ if (unlikely(bytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - bytes, bytes);
- crypto_xor_cpy(dst, src, final,
- walk.total % AES_BLOCK_SIZE);
-
- err = skcipher_walk_done(&walk, 0);
- break;
- }
- err = skcipher_walk_done(&walk,
- walk.nbytes - blocks * AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - bytes);
}
return err;
diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
index 17c1c3bfe2f5..763c73beea2d 100644
--- a/arch/arm/crypto/blake2s-shash.c
+++ b/arch/arm/crypto/blake2s-shash.c
@@ -13,12 +13,12 @@
static int crypto_blake2s_update_arm(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+ return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
{
- return crypto_blake2s_final(desc, out, blake2s_compress);
+ return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 7d23d4bb2168..aee73ef5b3dc 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -107,6 +107,16 @@
.endm
#endif
+#if __LINUX_ARM_ARCH__ < 7
+ .macro dsb, args
+ mcr p15, 0, r0, c7, c10, 4
+ .endm
+
+ .macro isb, args
+ mcr p15, 0, r0, c7, c5, 4
+ .endm
+#endif
+
.macro asm_trace_hardirqs_off, save=1
#if defined(CONFIG_TRACE_IRQFLAGS)
.if \save
@@ -288,6 +298,7 @@
*/
#define ALT_UP(instr...) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
9997: instr ;\
.if . - 9997b == 2 ;\
@@ -299,6 +310,7 @@
.popsection
#define ALT_UP_B(label) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
W(b) . + (label - 9998b) ;\
.popsection
diff --git a/arch/arm/include/asm/paravirt_api_clock.h b/arch/arm/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/arm/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 70fe69bdcce2..92abd4cd8ca2 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -208,6 +208,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
}
#define pmd_offset pmd_offset
+#define pmd_pfn(pmd) (__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+
#define pmd_large(pmd) (pmd_val(pmd) & 2)
#define pmd_leaf(pmd) (pmd_val(pmd) & 2)
#define pmd_bad(pmd) (pmd_val(pmd) & 2)
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 6af68edfa53a..bdc35c0e8dfb 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_struct *p);
#define __ALT_SMP_ASM(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
+ " .align 2\n" \
" .long 9998b - .\n" \
" " up "\n" \
" .popsection\n"
diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
new file mode 100644
index 000000000000..85f9e538fb32
--- /dev/null
+++ b/arch/arm/include/asm/spectre.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+enum {
+ SPECTRE_UNAFFECTED,
+ SPECTRE_MITIGATED,
+ SPECTRE_VULNERABLE,
+};
+
+enum {
+ __SPECTRE_V2_METHOD_BPIALL,
+ __SPECTRE_V2_METHOD_ICIALLU,
+ __SPECTRE_V2_METHOD_SMC,
+ __SPECTRE_V2_METHOD_HVC,
+ __SPECTRE_V2_METHOD_LOOP8,
+};
+
+enum {
+ SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL),
+ SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU),
+ SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC),
+ SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC),
+ SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8),
+};
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+void spectre_v2_update_state(unsigned int state, unsigned int methods);
+#else
+static inline void spectre_v2_update_state(unsigned int state,
+ unsigned int methods)
+{}
+#endif
+
+int spectre_bhb_update_vectors(unsigned int method);
+
+#endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 36fbc3329252..32dbfd81f42a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/memory.h>
#include <asm/domain.h>
+#include <asm/unaligned.h>
#include <asm/unified.h>
#include <asm/compiler.h>
@@ -497,7 +498,10 @@ do { \
} \
default: __err = __get_user_bad(); break; \
} \
- *(type *)(dst) = __val; \
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \
+ put_unaligned(__val, (type *)(dst)); \
+ else \
+ *(type *)(dst) = __val; /* aligned by caller */ \
if (__err) \
goto err_label; \
} while (0)
@@ -507,7 +511,9 @@ do { \
const type *__pk_ptr = (dst); \
unsigned long __dst = (unsigned long)__pk_ptr; \
int __err = 0; \
- type __val = *(type *)src; \
+ type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
+ ? get_unaligned((type *)(src)) \
+ : *(type *)(src); /* aligned by caller */ \
switch (sizeof(type)) { \
case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \
case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \
diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h
index 4a91428c324d..fad45c884e98 100644
--- a/arch/arm/include/asm/vmlinux.lds.h
+++ b/arch/arm/include/asm/vmlinux.lds.h
@@ -26,6 +26,19 @@
#define ARM_MMU_DISCARD(x) x
#endif
+/*
+ * ld.lld does not support NOCROSSREFS:
+ * https://github.com/ClangBuiltLinux/linux/issues/1609
+ */
+#ifdef CONFIG_LD_IS_LLD
+#define NOCROSSREFS
+#endif
+
+/* Set start/end symbol names to the LMA for the section */
+#define ARM_LMA(sym, section) \
+ sym##_start = LOADADDR(section); \
+ sym##_end = LOADADDR(section) + SIZEOF(section)
+
#define PROC_INFO \
. = ALIGN(4); \
__proc_info_begin = .; \
@@ -110,19 +123,31 @@
* only thing that matters is their relative offsets
*/
#define ARM_VECTORS \
- __vectors_start = .; \
- .vectors 0xffff0000 : AT(__vectors_start) { \
- *(.vectors) \
+ __vectors_lma = .; \
+ OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \
+ .vectors { \
+ *(.vectors) \
+ } \
+ .vectors.bhb.loop8 { \
+ *(.vectors.bhb.loop8) \
+ } \
+ .vectors.bhb.bpiall { \
+ *(.vectors.bhb.bpiall) \
+ } \
} \
- . = __vectors_start + SIZEOF(.vectors); \
- __vectors_end = .; \
+ ARM_LMA(__vectors, .vectors); \
+ ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \
+ ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \
+ . = __vectors_lma + SIZEOF(.vectors) + \
+ SIZEOF(.vectors.bhb.loop8) + \
+ SIZEOF(.vectors.bhb.bpiall); \
\
- __stubs_start = .; \
- .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \
+ __stubs_lma = .; \
+ .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \
*(.stubs) \
} \
- . = __stubs_start + SIZEOF(.stubs); \
- __stubs_end = .; \
+ ARM_LMA(__stubs, .stubs); \
+ . = __stubs_lma + SIZEOF(.stubs); \
\
PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
index aefddec79286..669cad5194d3 100644
--- a/arch/arm/include/asm/xor.h
+++ b/arch/arm/include/asm/xor.h
@@ -44,7 +44,8 @@
: "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
static void
-xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned int lines = bytes / sizeof(unsigned long) / 4;
register unsigned int a1 __asm__("r4");
@@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned int lines = bytes / sizeof(unsigned long) / 4;
register unsigned int a1 __asm__("r4");
@@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned int lines = bytes / sizeof(unsigned long) / 2;
register unsigned int a1 __asm__("r8");
@@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned int lines = bytes / sizeof(unsigned long) / 2;
register unsigned int a1 __asm__("r8");
@@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = {
extern struct xor_block_template const xor_block_neon_inner;
static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
if (in_interrupt()) {
xor_arm4regs_2(bytes, p1, p2);
@@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
if (in_interrupt()) {
xor_arm4regs_3(bytes, p1, p2, p3);
@@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
if (in_interrupt()) {
xor_arm4regs_4(bytes, p1, p2, p3, p4);
@@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
if (in_interrupt()) {
xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ae295a3bcfef..6ef3b535b7bf 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -106,4 +106,6 @@ endif
obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o
+obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
+
extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5cd057859fe9..ee3f7a599181 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1002,12 +1002,11 @@ vector_\name:
sub lr, lr, #\correction
.endif
- @
- @ Save r0, lr_<exception> (parent PC) and spsr_<exception>
- @ (parent CPSR)
- @
+ @ Save r0, lr_<exception> (parent PC)
stmia sp, {r0, lr} @ save r0, lr
- mrs lr, spsr
+
+ @ Save spsr_<exception> (parent CPSR)
+2: mrs lr, spsr
str lr, [sp, #8] @ save spsr
@
@@ -1028,6 +1027,44 @@ vector_\name:
movs pc, lr @ branch to handler in SVC mode
ENDPROC(vector_\name)
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ .subsection 1
+ .align 5
+vector_bhb_loop8_\name:
+ .if \correction
+ sub lr, lr, #\correction
+ .endif
+
+ @ Save r0, lr_<exception> (parent PC)
+ stmia sp, {r0, lr}
+
+ @ bhb workaround
+ mov r0, #8
+3: b . + 4
+ subs r0, r0, #1
+ bne 3b
+ dsb
+ isb
+ b 2b
+ENDPROC(vector_bhb_loop8_\name)
+
+vector_bhb_bpiall_\name:
+ .if \correction
+ sub lr, lr, #\correction
+ .endif
+
+ @ Save r0, lr_<exception> (parent PC)
+ stmia sp, {r0, lr}
+
+ @ bhb workaround
+ mcr p15, 0, r0, c7, c5, 6 @ BPIALL
+ @ isb not needed due to "movs pc, lr" in the vector stub
+ @ which gives a "context synchronisation".
+ b 2b
+ENDPROC(vector_bhb_bpiall_\name)
+ .previous
+#endif
+
.align 2
@ handler addresses follow this label
1:
@@ -1036,6 +1073,10 @@ ENDPROC(vector_\name)
.section .stubs, "ax", %progbits
@ This must be the first word
.word vector_swi
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ .word vector_bhb_loop8_swi
+ .word vector_bhb_bpiall_swi
+#endif
vector_rst:
ARM( swi SYS_ERROR0 )
@@ -1150,8 +1191,10 @@ vector_addrexcptn:
* FIQ "NMI" handler
*-----------------------------------------------------------------------------
* Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
- * systems.
+ * systems. This must be the last vector stub, so lets place it in its own
+ * subsection.
*/
+ .subsection 2
vector_stub fiq, FIQ_MODE, 4
.long __fiq_usr @ 0 (USR_26 / USR_32)
@@ -1184,6 +1227,30 @@ vector_addrexcptn:
W(b) vector_irq
W(b) vector_fiq
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ .section .vectors.bhb.loop8, "ax", %progbits
+.L__vectors_bhb_loop8_start:
+ W(b) vector_rst
+ W(b) vector_bhb_loop8_und
+ W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004
+ W(b) vector_bhb_loop8_pabt
+ W(b) vector_bhb_loop8_dabt
+ W(b) vector_addrexcptn
+ W(b) vector_bhb_loop8_irq
+ W(b) vector_bhb_loop8_fiq
+
+ .section .vectors.bhb.bpiall, "ax", %progbits
+.L__vectors_bhb_bpiall_start:
+ W(b) vector_rst
+ W(b) vector_bhb_bpiall_und
+ W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008
+ W(b) vector_bhb_bpiall_pabt
+ W(b) vector_bhb_bpiall_dabt
+ W(b) vector_addrexcptn
+ W(b) vector_bhb_bpiall_irq
+ W(b) vector_bhb_bpiall_fiq
+#endif
+
.data
.align 2
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index ac86c34682bb..dbc1913ee30b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -154,12 +154,36 @@ ENDPROC(ret_from_fork)
*/
.align 5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ENTRY(vector_bhb_loop8_swi)
+ sub sp, sp, #PT_REGS_SIZE
+ stmia sp, {r0 - r12}
+ mov r8, #8
+1: b 2f
+2: subs r8, r8, #1
+ bne 1b
+ dsb
+ isb
+ b 3f
+ENDPROC(vector_bhb_loop8_swi)
+
+ .align 5
+ENTRY(vector_bhb_bpiall_swi)
+ sub sp, sp, #PT_REGS_SIZE
+ stmia sp, {r0 - r12}
+ mcr p15, 0, r8, c7, c5, 6 @ BPIALL
+ isb
+ b 3f
+ENDPROC(vector_bhb_bpiall_swi)
+#endif
+ .align 5
ENTRY(vector_swi)
#ifdef CONFIG_CPU_V7M
v7m_exception_entry
#else
sub sp, sp, #PT_REGS_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
+3:
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
THUMB( mov r8, sp )
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 7bd30c0a4280..22f937e6f3ff 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
return 0;
}
-static struct undef_hook kgdb_brkpt_hook = {
+static struct undef_hook kgdb_brkpt_arm_hook = {
.instr_mask = 0xffffffff,
.instr_val = KGDB_BREAKINST,
- .cpsr_mask = MODE_MASK,
+ .cpsr_mask = PSR_T_BIT | MODE_MASK,
.cpsr_val = SVC_MODE,
.fn = kgdb_brk_fn
};
-static struct undef_hook kgdb_compiled_brkpt_hook = {
+static struct undef_hook kgdb_brkpt_thumb_hook = {
+ .instr_mask = 0xffff,
+ .instr_val = KGDB_BREAKINST & 0xffff,
+ .cpsr_mask = PSR_T_BIT | MODE_MASK,
+ .cpsr_val = PSR_T_BIT | SVC_MODE,
+ .fn = kgdb_brk_fn
+};
+
+static struct undef_hook kgdb_compiled_brkpt_arm_hook = {
.instr_mask = 0xffffffff,
.instr_val = KGDB_COMPILED_BREAK,
- .cpsr_mask = MODE_MASK,
+ .cpsr_mask = PSR_T_BIT | MODE_MASK,
.cpsr_val = SVC_MODE,
.fn = kgdb_compiled_brk_fn
};
+static struct undef_hook kgdb_compiled_brkpt_thumb_hook = {
+ .instr_mask = 0xffff,
+ .instr_val = KGDB_COMPILED_BREAK & 0xffff,
+ .cpsr_mask = PSR_T_BIT | MODE_MASK,
+ .cpsr_val = PSR_T_BIT | SVC_MODE,
+ .fn = kgdb_compiled_brk_fn
+};
+
static int __kgdb_notify(struct die_args *args, unsigned long cmd)
{
struct pt_regs *regs = args->regs;
@@ -210,8 +226,10 @@ int kgdb_arch_init(void)
if (ret != 0)
return ret;
- register_undef_hook(&kgdb_brkpt_hook);
- register_undef_hook(&kgdb_compiled_brkpt_hook);
+ register_undef_hook(&kgdb_brkpt_arm_hook);
+ register_undef_hook(&kgdb_brkpt_thumb_hook);
+ register_undef_hook(&kgdb_compiled_brkpt_arm_hook);
+ register_undef_hook(&kgdb_compiled_brkpt_thumb_hook);
return 0;
}
@@ -224,8 +242,10 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_undef_hook(&kgdb_brkpt_hook);
- unregister_undef_hook(&kgdb_compiled_brkpt_hook);
+ unregister_undef_hook(&kgdb_brkpt_arm_hook);
+ unregister_undef_hook(&kgdb_brkpt_thumb_hook);
+ unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook);
+ unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
new file mode 100644
index 000000000000..0dcefc36fb7a
--- /dev/null
+++ b/arch/arm/kernel/spectre.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bpf.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/spectre.h>
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ return !sysctl_unprivileged_bpf_disabled;
+#else
+ return false;
+#endif
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+static unsigned int spectre_v2_state;
+static unsigned int spectre_v2_methods;
+
+void spectre_v2_update_state(unsigned int state, unsigned int method)
+{
+ if (state > spectre_v2_state)
+ spectre_v2_state = state;
+ spectre_v2_methods |= method;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ const char *method;
+
+ if (spectre_v2_state == SPECTRE_UNAFFECTED)
+ return sprintf(buf, "%s\n", "Not affected");
+
+ if (spectre_v2_state != SPECTRE_MITIGATED)
+ return sprintf(buf, "%s\n", "Vulnerable");
+
+ if (_unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+ switch (spectre_v2_methods) {
+ case SPECTRE_V2_METHOD_BPIALL:
+ method = "Branch predictor hardening";
+ break;
+
+ case SPECTRE_V2_METHOD_ICIALLU:
+ method = "I-cache invalidation";
+ break;
+
+ case SPECTRE_V2_METHOD_SMC:
+ case SPECTRE_V2_METHOD_HVC:
+ method = "Firmware call";
+ break;
+
+ case SPECTRE_V2_METHOD_LOOP8:
+ method = "History overwrite";
+ break;
+
+ default:
+ method = "Multiple mitigations";
+ break;
+ }
+
+ return sprintf(buf, "Mitigation: %s\n", method);
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index da04ed85855a..cae4a748811f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -30,6 +30,7 @@
#include <linux/atomic.h>
#include <asm/cacheflush.h>
#include <asm/exception.h>
+#include <asm/spectre.h>
#include <asm/unistd.h>
#include <asm/traps.h>
#include <asm/ptrace.h>
@@ -789,10 +790,59 @@ static inline void __init kuser_init(void *vectors)
}
#endif
+#ifndef CONFIG_CPU_V7M
+static void copy_from_lma(void *vma, void *lma_start, void *lma_end)
+{
+ memcpy(vma, lma_start, lma_end - lma_start);
+}
+
+static void flush_vectors(void *vma, size_t offset, size_t size)
+{
+ unsigned long start = (unsigned long)vma + offset;
+ unsigned long end = start + size;
+
+ flush_icache_range(start, end);
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+int spectre_bhb_update_vectors(unsigned int method)
+{
+ extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[];
+ extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[];
+ void *vec_start, *vec_end;
+
+ if (system_state >= SYSTEM_FREEING_INITMEM) {
+ pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n",
+ smp_processor_id());
+ return SPECTRE_VULNERABLE;
+ }
+
+ switch (method) {
+ case SPECTRE_V2_METHOD_LOOP8:
+ vec_start = __vectors_bhb_loop8_start;
+ vec_end = __vectors_bhb_loop8_end;
+ break;
+
+ case SPECTRE_V2_METHOD_BPIALL:
+ vec_start = __vectors_bhb_bpiall_start;
+ vec_end = __vectors_bhb_bpiall_end;
+ break;
+
+ default:
+ pr_err("CPU%u: unknown Spectre BHB state %d\n",
+ smp_processor_id(), method);
+ return SPECTRE_VULNERABLE;
+ }
+
+ copy_from_lma(vectors_page, vec_start, vec_end);
+ flush_vectors(vectors_page, 0, vec_end - vec_start);
+
+ return SPECTRE_MITIGATED;
+}
+#endif
+
void __init early_trap_init(void *vectors_base)
{
-#ifndef CONFIG_CPU_V7M
- unsigned long vectors = (unsigned long)vectors_base;
extern char __stubs_start[], __stubs_end[];
extern char __vectors_start[], __vectors_end[];
unsigned i;
@@ -813,17 +863,20 @@ void __init early_trap_init(void *vectors_base)
* into the vector page, mapped at 0xffff0000, and ensure these
* are visible to the instruction stream.
*/
- memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
- memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
+ copy_from_lma(vectors_base, __vectors_start, __vectors_end);
+ copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end);
kuser_init(vectors_base);
- flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
+ flush_vectors(vectors_base, 0, PAGE_SIZE * 2);
+}
#else /* ifndef CONFIG_CPU_V7M */
+void __init early_trap_init(void *vectors_base)
+{
/*
* on V7-M there is no need to copy the vector table to a dedicated
* memory area. The address is configurable and so a table in the kernel
* image can be used.
*/
-#endif
}
+#endif
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
index b99dd8e1c93f..522510baed49 100644
--- a/arch/arm/lib/xor-neon.c
+++ b/arch/arm/lib/xor-neon.c
@@ -17,17 +17,11 @@ MODULE_LICENSE("GPL");
/*
* Pull in the reference implementations while instructing GCC (through
* -ftree-vectorize) to attempt to exploit implicit parallelism and emit
- * NEON instructions.
+ * NEON instructions. Clang does this by default at O2 so no pragma is
+ * needed.
*/
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#ifdef CONFIG_CC_IS_GCC
#pragma GCC optimize "tree-vectorize"
-#else
-/*
- * While older versions of GCC do not generate incorrect code, they fail to
- * recognize the parallel nature of these functions, and emit plain ARM code,
- * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
- */
-#warning This code requires at least version 4.6 of GCC
#endif
#pragma GCC diagnostic ignored "-Wunused-variable"
diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig
index cd300eeedc20..0bf4d312bcfd 100644
--- a/arch/arm/mach-mstar/Kconfig
+++ b/arch/arm/mach-mstar/Kconfig
@@ -3,6 +3,7 @@ menuconfig ARCH_MSTARV7
depends on ARCH_MULTI_V7
select ARM_GIC
select ARM_HEAVY_MB
+ select HAVE_ARM_ARCH_TIMER
select MST_IRQ
select MSTAR_MSC313_MPLL
help
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 6daaa645ae5d..21413a9b7b6c 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -263,9 +263,9 @@ static int __init omapdss_init_of(void)
}
r = of_platform_populate(node, NULL, NULL, &pdev->dev);
+ put_device(&pdev->dev);
if (r) {
pr_err("Unable to populate DSS submodule devices\n");
- put_device(&pdev->dev);
return r;
}
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index ccb0e3732c0d..31d1a21f6041 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -752,8 +752,10 @@ static int __init _init_clkctrl_providers(void)
for_each_matching_node(np, ti_clkctrl_match_table) {
ret = _setup_clkctrl_provider(np);
- if (ret)
+ if (ret) {
+ of_node_put(np);
break;
+ }
}
return ret;
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 593c7f793da5..44659fbc37ba 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -530,6 +530,16 @@ static struct pxa2xx_spi_controller corgi_spi_info = {
.num_chipselect = 3,
};
+static struct gpiod_lookup_table corgi_spi_gpio_table = {
+ .dev_id = "pxa2xx-spi.1",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void corgi_wait_for_hsync(void)
{
while (gpio_get_value(CORGI_GPIO_HSYNC))
@@ -548,10 +558,6 @@ static struct ads7846_platform_data corgi_ads7846_info = {
.wait_for_sync = corgi_wait_for_hsync,
};
-static struct pxa2xx_spi_chip corgi_ads7846_chip = {
- .gpio_cs = CORGI_GPIO_ADS7846_CS,
-};
-
static void corgi_bl_kick_battery(void)
{
void (*kick_batt)(void);
@@ -580,14 +586,6 @@ static struct corgi_lcd_platform_data corgi_lcdcon_info = {
.kick_battery = corgi_bl_kick_battery,
};
-static struct pxa2xx_spi_chip corgi_lcdcon_chip = {
- .gpio_cs = CORGI_GPIO_LCDCON_CS,
-};
-
-static struct pxa2xx_spi_chip corgi_max1111_chip = {
- .gpio_cs = CORGI_GPIO_MAX1111_CS,
-};
-
static struct spi_board_info corgi_spi_devices[] = {
{
.modalias = "ads7846",
@@ -595,7 +593,6 @@ static struct spi_board_info corgi_spi_devices[] = {
.bus_num = 1,
.chip_select = 0,
.platform_data = &corgi_ads7846_info,
- .controller_data= &corgi_ads7846_chip,
.irq = PXA_GPIO_TO_IRQ(CORGI_GPIO_TP_INT),
}, {
.modalias = "corgi-lcd",
@@ -603,18 +600,17 @@ static struct spi_board_info corgi_spi_devices[] = {
.bus_num = 1,
.chip_select = 1,
.platform_data = &corgi_lcdcon_info,
- .controller_data= &corgi_lcdcon_chip,
}, {
.modalias = "max1111",
.max_speed_hz = 450000,
.bus_num = 1,
.chip_select = 2,
- .controller_data= &corgi_max1111_chip,
},
};
static void __init corgi_init_spi(void)
{
+ gpiod_add_lookup_table(&corgi_spi_gpio_table);
pxa2xx_set_spi_info(1, &corgi_spi_info);
gpiod_add_lookup_table(&corgi_lcdcon_gpio_table);
spi_register_board_info(ARRAY_AND_SIZE(corgi_spi_devices));
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 1d4c5db54be2..e1870fbb19e7 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -616,7 +616,6 @@ static struct pxa2xx_spi_chip tsc2046_chip = {
.tx_threshold = 1,
.rx_threshold = 2,
.timeout = 64,
- .gpio_cs = GPIO88_HX4700_TSC2046_CS,
};
static struct spi_board_info tsc2046_board_info[] __initdata = {
@@ -635,6 +634,14 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
.enable_dma = 1,
};
+static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+ .dev_id = "pxa2xx-spi.2",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
/*
* External power
*/
@@ -896,6 +903,7 @@ static void __init hx4700_init(void)
pxa_set_i2c_info(NULL);
i2c_register_board_info(0, ARRAY_AND_SIZE(i2c_board_info));
i2c_register_board_info(1, ARRAY_AND_SIZE(pi2c_board_info));
+ gpiod_add_lookup_table(&pxa_ssp2_gpio_table);
pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info));
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 04a12523cdee..753fe166ab68 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -13,7 +13,7 @@
#include <linux/irq.h>
#include <linux/platform_device.h>
#include <linux/property.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
@@ -42,7 +42,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info1 = {
.rx_threshold = 128,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = ICONTROL_MCP251x_nCS1
};
static struct pxa2xx_spi_chip mcp251x_chip_info2 = {
@@ -50,7 +49,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info2 = {
.rx_threshold = 128,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = ICONTROL_MCP251x_nCS2
};
static struct pxa2xx_spi_chip mcp251x_chip_info3 = {
@@ -58,7 +56,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info3 = {
.rx_threshold = 128,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = ICONTROL_MCP251x_nCS3
};
static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
@@ -66,7 +63,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
.rx_threshold = 128,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = ICONTROL_MCP251x_nCS4
};
static const struct property_entry mcp251x_properties[] = {
@@ -143,6 +139,24 @@ struct platform_device pxa_spi_ssp4 = {
}
};
+static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
+ .dev_id = "pxa2xx-spi.3",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
+ .dev_id = "pxa2xx-spi.4",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct platform_device *icontrol_spi_devices[] __initdata = {
&pxa_spi_ssp3,
&pxa_spi_ssp4,
@@ -175,6 +189,8 @@ static mfp_cfg_t mfp_can_cfg[] __initdata = {
static void __init icontrol_can_init(void)
{
pxa3xx_mfp_config(ARRAY_AND_SIZE(mfp_can_cfg));
+ gpiod_add_lookup_table(&pxa_ssp3_gpio_table);
+ gpiod_add_lookup_table(&pxa_ssp4_gpio_table);
platform_add_devices(ARRAY_AND_SIZE(icontrol_spi_devices));
spi_register_board_info(ARRAY_AND_SIZE(mcp251x_board_info));
}
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 793f61375ee8..73f5953b3bb6 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -195,7 +195,6 @@ static struct pxa2xx_spi_controller littleton_spi_info = {
static struct pxa2xx_spi_chip littleton_tdo24m_chip = {
.rx_threshold = 1,
.tx_threshold = 1,
- .gpio_cs = LITTLETON_GPIO_LCD_CS,
};
static struct spi_board_info littleton_spi_devices[] __initdata = {
@@ -208,8 +207,17 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
},
};
+static struct gpiod_lookup_table littleton_spi_gpio_table = {
+ .dev_id = "pxa2xx-spi.2",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init littleton_init_spi(void)
{
+ gpiod_add_lookup_table(&littleton_spi_gpio_table);
pxa2xx_set_spi_info(2, &littleton_spi_info);
spi_register_board_info(ARRAY_AND_SIZE(littleton_spi_devices));
}
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index cd9fa465b9b2..200fd35168e0 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -938,8 +938,6 @@ struct pxa2xx_spi_chip tsc2046_chip_info = {
.tx_threshold = 1,
.rx_threshold = 2,
.timeout = 64,
- /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
- .gpio_cs = GPIO14_MAGICIAN_TSC2046_CS,
};
static struct pxa2xx_spi_controller magician_spi_info = {
@@ -947,6 +945,15 @@ static struct pxa2xx_spi_controller magician_spi_info = {
.enable_dma = 1,
};
+static struct gpiod_lookup_table magician_spi_gpio_table = {
+ .dev_id = "pxa2xx-spi.2",
+ .table = {
+ /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
+ GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct spi_board_info ads7846_spi_board_info[] __initdata = {
{
.modalias = "ads7846",
@@ -1031,6 +1038,7 @@ static void __init magician_init(void)
} else
pr_err("LCD detection: CPLD mapping failed\n");
+ gpiod_add_lookup_table(&magician_spi_gpio_table);
pxa2xx_set_spi_info(2, &magician_spi_info);
spi_register_board_info(ARRAY_AND_SIZE(ads7846_spi_board_info));
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 3a4ecc3c8f8b..58cfa434afde 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -197,6 +197,14 @@ static struct pxa2xx_spi_controller poodle_spi_info = {
.num_chipselect = 1,
};
+static struct gpiod_lookup_table poodle_spi_gpio_table = {
+ .dev_id = "pxa2xx-spi.1",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", POODLE_GPIO_TP_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static struct ads7846_platform_data poodle_ads7846_info = {
.model = 7846,
.vref_delay_usecs = 100,
@@ -205,23 +213,19 @@ static struct ads7846_platform_data poodle_ads7846_info = {
.gpio_pendown = POODLE_GPIO_TP_INT,
};
-static struct pxa2xx_spi_chip poodle_ads7846_chip = {
- .gpio_cs = POODLE_GPIO_TP_CS,
-};
-
static struct spi_board_info poodle_spi_devices[] = {
{
.modalias = "ads7846",
.max_speed_hz = 10000,
.bus_num = 1,
.platform_data = &poodle_ads7846_info,
- .controller_data= &poodle_ads7846_chip,
.irq = PXA_GPIO_TO_IRQ(POODLE_GPIO_TP_INT),
},
};
static void __init poodle_init_spi(void)
{
+ gpiod_add_lookup_table(&poodle_spi_gpio_table);
pxa2xx_set_spi_info(1, &poodle_spi_info);
spi_register_board_info(ARRAY_AND_SIZE(poodle_spi_devices));
}
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 371008e9bb02..a648e7094e84 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -510,10 +510,6 @@ static struct ads7846_platform_data spitz_ads7846_info = {
.wait_for_sync = spitz_ads7846_wait_for_hsync,
};
-static struct pxa2xx_spi_chip spitz_ads7846_chip = {
- .gpio_cs = SPITZ_GPIO_ADS7846_CS,
-};
-
static void spitz_bl_kick_battery(void)
{
void (*kick_batt)(void);
@@ -555,14 +551,6 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = {
.kick_battery = spitz_bl_kick_battery,
};
-static struct pxa2xx_spi_chip spitz_lcdcon_chip = {
- .gpio_cs = SPITZ_GPIO_LCDCON_CS,
-};
-
-static struct pxa2xx_spi_chip spitz_max1111_chip = {
- .gpio_cs = SPITZ_GPIO_MAX1111_CS,
-};
-
static struct spi_board_info spitz_spi_devices[] = {
{
.modalias = "ads7846",
@@ -570,7 +558,6 @@ static struct spi_board_info spitz_spi_devices[] = {
.bus_num = 2,
.chip_select = 0,
.platform_data = &spitz_ads7846_info,
- .controller_data = &spitz_ads7846_chip,
.irq = PXA_GPIO_TO_IRQ(SPITZ_GPIO_TP_INT),
}, {
.modalias = "corgi-lcd",
@@ -578,13 +565,11 @@ static struct spi_board_info spitz_spi_devices[] = {
.bus_num = 2,
.chip_select = 1,
.platform_data = &spitz_lcdcon_info,
- .controller_data = &spitz_lcdcon_chip,
}, {
.modalias = "max1111",
.max_speed_hz = 450000,
.bus_num = 2,
.chip_select = 2,
- .controller_data = &spitz_max1111_chip,
},
};
@@ -592,6 +577,16 @@ static struct pxa2xx_spi_controller spitz_spi_info = {
.num_chipselect = 3,
};
+static struct gpiod_lookup_table spitz_spi_gpio_table = {
+ .dev_id = "pxa2xx-spi.2",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init spitz_spi_init(void)
{
if (machine_is_akita())
@@ -599,6 +594,7 @@ static void __init spitz_spi_init(void)
else
gpiod_add_lookup_table(&spitz_lcdcon_gpio_table);
+ gpiod_add_lookup_table(&spitz_spi_gpio_table);
pxa2xx_set_spi_info(2, &spitz_spi_info);
spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices));
}
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 8ca02ec1d44c..b43e2f4536a5 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -346,6 +346,22 @@ static struct pxa2xx_spi_controller pxa_ssp_master_2_info = {
.num_chipselect = 1,
};
+static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
+ .dev_id = "pxa2xx-spi.1",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", 24, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
+ .dev_id = "pxa2xx-spi.3",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", 39, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
/* An upcoming kernel change will scrap SFRM usage so these
* drivers have been moved to use GPIOs */
static struct pxa2xx_spi_chip staccel_chip_info = {
@@ -353,7 +369,6 @@ static struct pxa2xx_spi_chip staccel_chip_info = {
.rx_threshold = 8,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = 24,
};
static struct pxa2xx_spi_chip cc2420_info = {
@@ -361,7 +376,6 @@ static struct pxa2xx_spi_chip cc2420_info = {
.rx_threshold = 8,
.dma_burst_size = 8,
.timeout = 235,
- .gpio_cs = 39,
};
static struct spi_board_info spi_board_info[] __initdata = {
@@ -410,6 +424,8 @@ static void __init imote2_stargate2_init(void)
pxa_set_btuart_info(NULL);
pxa_set_stuart_info(NULL);
+ gpiod_add_lookup_table(&pxa_ssp1_gpio_table);
+ gpiod_add_lookup_table(&pxa_ssp3_gpio_table);
pxa2xx_set_spi_info(1, &pxa_ssp_master_0_info);
pxa2xx_set_spi_info(2, &pxa_ssp_master_1_info);
pxa2xx_set_spi_info(3, &pxa_ssp_master_2_info);
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index 8e74fbb0a96e..7eaeda269927 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -570,7 +570,6 @@ static struct pxa2xx_spi_chip z2_lbs_chip_info = {
.rx_threshold = 8,
.tx_threshold = 8,
.timeout = 1000,
- .gpio_cs = GPIO24_ZIPITZ2_WIFI_CS,
};
static struct libertas_spi_platform_data z2_lbs_pdata = {
@@ -584,7 +583,6 @@ static struct pxa2xx_spi_chip lms283_chip_info = {
.rx_threshold = 1,
.tx_threshold = 1,
.timeout = 64,
- .gpio_cs = GPIO88_ZIPITZ2_LCD_CS,
};
static struct gpiod_lookup_table lms283_gpio_table = {
@@ -624,8 +622,26 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
.num_chipselect = 1,
};
+static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
+ .dev_id = "pxa2xx-spi.1",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+ .dev_id = "pxa2xx-spi.2",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init z2_spi_init(void)
{
+ gpiod_add_lookup_table(&pxa_ssp1_gpio_table);
+ gpiod_add_lookup_table(&pxa_ssp2_gpio_table);
pxa2xx_set_spi_info(1, &pxa_ssp1_master_info);
pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
gpiod_add_lookup_table(&lms283_gpio_table);
diff --git a/arch/arm/mach-s3c/Kconfig b/arch/arm/mach-s3c/Kconfig
index 25606e668cf9..1899fc3f44fd 100644
--- a/arch/arm/mach-s3c/Kconfig
+++ b/arch/arm/mach-s3c/Kconfig
@@ -191,18 +191,6 @@ config S3C64XX_DEV_SPI0
Compile in platform device definitions for S3C64XX's type
SPI controller 0
-config S3C64XX_DEV_SPI1
- bool
- help
- Compile in platform device definitions for S3C64XX's type
- SPI controller 1
-
-config S3C64XX_DEV_SPI2
- bool
- help
- Compile in platform device definitions for S3C64XX's type
- SPI controller 2
-
config SAMSUNG_DEV_TS
bool
help
diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c
index 06dec64848f9..1e266fc24f9b 100644
--- a/arch/arm/mach-s3c/devs.c
+++ b/arch/arm/mach-s3c/devs.c
@@ -1107,8 +1107,7 @@ struct platform_device s3c64xx_device_spi0 = {
},
};
-void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs)
+void __init s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs)
{
struct s3c64xx_spi_info pd;
@@ -1120,80 +1119,8 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
pd.num_cs = num_cs;
pd.src_clk_nr = src_clk_nr;
- pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio;
+ pd.cfg_gpio = s3c64xx_spi0_cfg_gpio;
s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0);
}
#endif /* CONFIG_S3C64XX_DEV_SPI0 */
-
-#ifdef CONFIG_S3C64XX_DEV_SPI1
-static struct resource s3c64xx_spi1_resource[] = {
- [0] = DEFINE_RES_MEM(S3C_PA_SPI1, SZ_256),
- [1] = DEFINE_RES_IRQ(IRQ_SPI1),
-};
-
-struct platform_device s3c64xx_device_spi1 = {
- .name = "s3c6410-spi",
- .id = 1,
- .num_resources = ARRAY_SIZE(s3c64xx_spi1_resource),
- .resource = s3c64xx_spi1_resource,
- .dev = {
- .dma_mask = &samsung_device_dma_mask,
- .coherent_dma_mask = DMA_BIT_MASK(32),
- },
-};
-
-void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs)
-{
- struct s3c64xx_spi_info pd;
-
- /* Reject invalid configuration */
- if (!num_cs || src_clk_nr < 0) {
- pr_err("%s: Invalid SPI configuration\n", __func__);
- return;
- }
-
- pd.num_cs = num_cs;
- pd.src_clk_nr = src_clk_nr;
- pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio;
-
- s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1);
-}
-#endif /* CONFIG_S3C64XX_DEV_SPI1 */
-
-#ifdef CONFIG_S3C64XX_DEV_SPI2
-static struct resource s3c64xx_spi2_resource[] = {
- [0] = DEFINE_RES_MEM(S3C_PA_SPI2, SZ_256),
- [1] = DEFINE_RES_IRQ(IRQ_SPI2),
-};
-
-struct platform_device s3c64xx_device_spi2 = {
- .name = "s3c6410-spi",
- .id = 2,
- .num_resources = ARRAY_SIZE(s3c64xx_spi2_resource),
- .resource = s3c64xx_spi2_resource,
- .dev = {
- .dma_mask = &samsung_device_dma_mask,
- .coherent_dma_mask = DMA_BIT_MASK(32),
- },
-};
-
-void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs)
-{
- struct s3c64xx_spi_info pd;
-
- /* Reject invalid configuration */
- if (!num_cs || src_clk_nr < 0) {
- pr_err("%s: Invalid SPI configuration\n", __func__);
- return;
- }
-
- pd.num_cs = num_cs;
- pd.src_clk_nr = src_clk_nr;
- pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio;
-
- s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi2);
-}
-#endif /* CONFIG_S3C64XX_DEV_SPI2 */
diff --git a/arch/arm/mach-s3c/mach-crag6410-module.c b/arch/arm/mach-s3c/mach-crag6410-module.c
index 407ad493493e..5d1d4b67a4b7 100644
--- a/arch/arm/mach-s3c/mach-crag6410-module.c
+++ b/arch/arm/mach-s3c/mach-crag6410-module.c
@@ -32,10 +32,6 @@
#include "crag6410.h"
-static struct s3c64xx_spi_csinfo wm0010_spi_csinfo = {
- .line = S3C64XX_GPC(3),
-};
-
static struct wm0010_pdata wm0010_pdata = {
.gpio_reset = S3C64XX_GPN(6),
.reset_active_high = 1, /* Active high for Glenfarclas Rev 2 */
@@ -49,7 +45,6 @@ static struct spi_board_info wm1253_devs[] = {
.chip_select = 0,
.mode = SPI_MODE_0,
.irq = S3C_EINT(4),
- .controller_data = &wm0010_spi_csinfo,
.platform_data = &wm0010_pdata,
},
};
@@ -62,7 +57,6 @@ static struct spi_board_info balblair_devs[] = {
.chip_select = 0,
.mode = SPI_MODE_0,
.irq = S3C_EINT(4),
- .controller_data = &wm0010_spi_csinfo,
.platform_data = &wm0010_pdata,
},
};
@@ -229,10 +223,6 @@ static struct arizona_pdata wm5102_reva_pdata = {
},
};
-static struct s3c64xx_spi_csinfo codec_spi_csinfo = {
- .line = S3C64XX_GPN(5),
-};
-
static struct spi_board_info wm5102_reva_spi_devs[] = {
[0] = {
.modalias = "wm5102",
@@ -242,7 +232,6 @@ static struct spi_board_info wm5102_reva_spi_devs[] = {
.mode = SPI_MODE_0,
.irq = GLENFARCLAS_PMIC_IRQ_BASE +
WM831X_IRQ_GPIO_2,
- .controller_data = &codec_spi_csinfo,
.platform_data = &wm5102_reva_pdata,
},
};
@@ -275,7 +264,6 @@ static struct spi_board_info wm5102_spi_devs[] = {
.mode = SPI_MODE_0,
.irq = GLENFARCLAS_PMIC_IRQ_BASE +
WM831X_IRQ_GPIO_2,
- .controller_data = &codec_spi_csinfo,
.platform_data = &wm5102_pdata,
},
};
@@ -298,7 +286,6 @@ static struct spi_board_info wm5110_spi_devs[] = {
.mode = SPI_MODE_0,
.irq = GLENFARCLAS_PMIC_IRQ_BASE +
WM831X_IRQ_GPIO_2,
- .controller_data = &codec_spi_csinfo,
.platform_data = &wm5102_reva_pdata,
},
};
diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c
index 4a12c75d407f..e3e0fe897bcc 100644
--- a/arch/arm/mach-s3c/mach-crag6410.c
+++ b/arch/arm/mach-s3c/mach-crag6410.c
@@ -825,6 +825,15 @@ static const struct gpio_led_platform_data gpio_leds_pdata = {
static struct dwc2_hsotg_plat crag6410_hsotg_pdata;
+static struct gpiod_lookup_table crag_spi0_gpiod_table = {
+ .dev_id = "s3c6410-spi.0",
+ .table = {
+ GPIO_LOOKUP_IDX("GPIOC", 3, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("GPION", 5, "cs", 1, GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
static void __init crag6410_machine_init(void)
{
/* Open drain IRQs need pullups */
@@ -856,7 +865,9 @@ static void __init crag6410_machine_init(void)
i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
samsung_keypad_set_platdata(&crag6410_keypad_data);
- s3c64xx_spi0_set_platdata(NULL, 0, 2);
+
+ gpiod_add_lookup_table(&crag_spi0_gpiod_table);
+ s3c64xx_spi0_set_platdata(0, 2);
pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup));
platform_add_devices(crag6410_devices, ARRAY_SIZE(crag6410_devices));
diff --git a/arch/arm/mach-s3c/setup-spi-s3c64xx.c b/arch/arm/mach-s3c/setup-spi-s3c64xx.c
index efcf78d41585..497aff71c29c 100644
--- a/arch/arm/mach-s3c/setup-spi-s3c64xx.c
+++ b/arch/arm/mach-s3c/setup-spi-s3c64xx.c
@@ -16,12 +16,3 @@ int s3c64xx_spi0_cfg_gpio(void)
return 0;
}
#endif
-
-#ifdef CONFIG_S3C64XX_DEV_SPI1
-int s3c64xx_spi1_cfg_gpio(void)
-{
- s3c_gpio_cfgall_range(S3C64XX_GPC(4), 3,
- S3C_GPIO_SFN(2), S3C_GPIO_PULL_UP);
- return 0;
-}
-#endif
diff --git a/arch/arm/mach-s3c/spi-core-s3c24xx.h b/arch/arm/mach-s3c/spi-core-s3c24xx.h
index 057667469cc3..919c5fd0c9af 100644
--- a/arch/arm/mach-s3c/spi-core-s3c24xx.h
+++ b/arch/arm/mach-s3c/spi-core-s3c24xx.h
@@ -16,12 +16,6 @@ static inline void s3c24xx_spi_setname(char *name)
#ifdef CONFIG_S3C64XX_DEV_SPI0
s3c64xx_device_spi0.name = name;
#endif
-#ifdef CONFIG_S3C64XX_DEV_SPI1
- s3c64xx_device_spi1.name = name;
-#endif
-#ifdef CONFIG_S3C64XX_DEV_SPI2
- s3c64xx_device_spi2.name = name;
-#endif
}
#endif /* __PLAT_S3C_SPI_CORE_S3C24XX_H */
diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig
index 43ddec677c0b..594edf9bbea4 100644
--- a/arch/arm/mach-socfpga/Kconfig
+++ b/arch/arm/mach-socfpga/Kconfig
@@ -2,6 +2,7 @@
menuconfig ARCH_INTEL_SOCFPGA
bool "Altera SOCFPGA family"
depends on ARCH_MULTI_V7
+ select ARCH_HAS_RESET_CONTROLLER
select ARCH_SUPPORTS_BIG_ENDIAN
select ARM_AMBA
select ARM_GIC
@@ -18,6 +19,7 @@ menuconfig ARCH_INTEL_SOCFPGA
select PL310_ERRATA_727915
select PL310_ERRATA_753970 if PL310
select PL310_ERRATA_769419
+ select RESET_CONTROLLER
if ARCH_INTEL_SOCFPGA
config SOCFPGA_SUSPEND
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 58afba346729..9724c16e9076 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -830,6 +830,7 @@ config CPU_BPREDICT_DISABLE
config CPU_SPECTRE
bool
+ select GENERIC_CPU_VULNERABILITIES
config HARDEN_BRANCH_PREDICTOR
bool "Harden the branch predictor against aliasing attacks" if EXPERT
@@ -850,6 +851,16 @@ config HARDEN_BRANCH_PREDICTOR
If unsure, say Y.
+config HARDEN_BRANCH_HISTORY
+ bool "Harden Spectre style attacks against branch history" if EXPERT
+ depends on CPU_SPECTRE
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ make use of branch history to influence future speculation. When
+ taking an exception, a sequence of branches overwrites the branch
+ history, or branch history is invalidated.
+
config TLS_REG_EMUL
bool
select NEED_KUSER_HELPERS
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 6d0cb0f7bc54..fe249ea91908 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
return phys;
}
-static void __init arm_initrd_init(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- phys_addr_t start;
- unsigned long size;
-
- initrd_start = initrd_end = 0;
-
- if (!phys_initrd_size)
- return;
-
- /*
- * Round the memory region to page boundaries as per free_initrd_mem()
- * This allows us to detect whether the pages overlapping the initrd
- * are in use, but more importantly, reserves the entire set of pages
- * as we don't want these pages allocated for other purposes.
- */
- start = round_down(phys_initrd_start, PAGE_SIZE);
- size = phys_initrd_size + (phys_initrd_start - start);
- size = round_up(size, PAGE_SIZE);
-
- if (!memblock_is_region_memory(start, size)) {
- pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
- (u64)start, size);
- return;
- }
-
- if (memblock_is_region_reserved(start, size)) {
- pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
- (u64)start, size);
- return;
- }
-
- memblock_reserve(start, size);
-
- /* Now convert initrd to virtual addresses */
- initrd_start = __phys_to_virt(phys_initrd_start);
- initrd_end = initrd_start + phys_initrd_size;
-#endif
-}
-
#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
void check_cpu_icache_size(int cpuid)
{
@@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
/* Register the kernel text, kernel data and initrd with memblock. */
memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
- arm_initrd_init();
+ reserve_initrd_mem();
arm_mm_memblock_reserve();
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 274e4f73fd33..5e2be37a198e 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -212,12 +212,14 @@ early_param("ecc", early_ecc);
static int __init early_cachepolicy(char *p)
{
pr_warn("cachepolicy kernel parameter not supported without cp15\n");
+ return 0;
}
early_param("cachepolicy", early_cachepolicy);
static int __init noalign_setup(char *__unused)
{
pr_warn("noalign kernel parameter not supported without cp15\n");
+ return 1;
}
__setup("noalign", noalign_setup);
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 114c05ab4dd9..06dbfb968182 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -6,8 +6,35 @@
#include <asm/cp15.h>
#include <asm/cputype.h>
#include <asm/proc-fns.h>
+#include <asm/spectre.h>
#include <asm/system_misc.h>
+#ifdef CONFIG_ARM_PSCI
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+ switch ((int)res.a0) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+
+ default:
+ return SPECTRE_VULNERABLE;
+ }
+}
+#else
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+ return SPECTRE_VULNERABLE;
+}
+#endif
+
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
@@ -36,13 +63,61 @@ static void __maybe_unused call_hvc_arch_workaround_1(void)
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}
-static void cpu_v7_spectre_init(void)
+static unsigned int spectre_v2_install_workaround(unsigned int method)
{
const char *spectre_v2_method = NULL;
int cpu = smp_processor_id();
if (per_cpu(harden_branch_predictor_fn, cpu))
- return;
+ return SPECTRE_MITIGATED;
+
+ switch (method) {
+ case SPECTRE_V2_METHOD_BPIALL:
+ per_cpu(harden_branch_predictor_fn, cpu) =
+ harden_branch_predictor_bpiall;
+ spectre_v2_method = "BPIALL";
+ break;
+
+ case SPECTRE_V2_METHOD_ICIALLU:
+ per_cpu(harden_branch_predictor_fn, cpu) =
+ harden_branch_predictor_iciallu;
+ spectre_v2_method = "ICIALLU";
+ break;
+
+ case SPECTRE_V2_METHOD_HVC:
+ per_cpu(harden_branch_predictor_fn, cpu) =
+ call_hvc_arch_workaround_1;
+ cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
+ spectre_v2_method = "hypervisor";
+ break;
+
+ case SPECTRE_V2_METHOD_SMC:
+ per_cpu(harden_branch_predictor_fn, cpu) =
+ call_smc_arch_workaround_1;
+ cpu_do_switch_mm = cpu_v7_smc_switch_mm;
+ spectre_v2_method = "firmware";
+ break;
+ }
+
+ if (spectre_v2_method)
+ pr_info("CPU%u: Spectre v2: using %s workaround\n",
+ smp_processor_id(), spectre_v2_method);
+
+ return SPECTRE_MITIGATED;
+}
+#else
+static unsigned int spectre_v2_install_workaround(unsigned int method)
+{
+ pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
+ smp_processor_id());
+
+ return SPECTRE_VULNERABLE;
+}
+#endif
+
+static void cpu_v7_spectre_v2_init(void)
+{
+ unsigned int state, method = 0;
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A8:
@@ -51,69 +126,133 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A17:
case ARM_CPU_PART_CORTEX_A73:
case ARM_CPU_PART_CORTEX_A75:
- per_cpu(harden_branch_predictor_fn, cpu) =
- harden_branch_predictor_bpiall;
- spectre_v2_method = "BPIALL";
+ state = SPECTRE_MITIGATED;
+ method = SPECTRE_V2_METHOD_BPIALL;
break;
case ARM_CPU_PART_CORTEX_A15:
case ARM_CPU_PART_BRAHMA_B15:
- per_cpu(harden_branch_predictor_fn, cpu) =
- harden_branch_predictor_iciallu;
- spectre_v2_method = "ICIALLU";
+ state = SPECTRE_MITIGATED;
+ method = SPECTRE_V2_METHOD_ICIALLU;
break;
-#ifdef CONFIG_ARM_PSCI
case ARM_CPU_PART_BRAHMA_B53:
/* Requires no workaround */
+ state = SPECTRE_UNAFFECTED;
break;
+
default:
/* Other ARM CPUs require no workaround */
- if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+ if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) {
+ state = SPECTRE_UNAFFECTED;
break;
+ }
+
fallthrough;
- /* Cortex A57/A72 require firmware workaround */
- case ARM_CPU_PART_CORTEX_A57:
- case ARM_CPU_PART_CORTEX_A72: {
- struct arm_smccc_res res;
- arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 != 0)
- return;
+ /* Cortex A57/A72 require firmware workaround */
+ case ARM_CPU_PART_CORTEX_A57:
+ case ARM_CPU_PART_CORTEX_A72:
+ state = spectre_v2_get_cpu_fw_mitigation_state();
+ if (state != SPECTRE_MITIGATED)
+ break;
switch (arm_smccc_1_1_get_conduit()) {
case SMCCC_CONDUIT_HVC:
- per_cpu(harden_branch_predictor_fn, cpu) =
- call_hvc_arch_workaround_1;
- cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
- spectre_v2_method = "hypervisor";
+ method = SPECTRE_V2_METHOD_HVC;
break;
case SMCCC_CONDUIT_SMC:
- per_cpu(harden_branch_predictor_fn, cpu) =
- call_smc_arch_workaround_1;
- cpu_do_switch_mm = cpu_v7_smc_switch_mm;
- spectre_v2_method = "firmware";
+ method = SPECTRE_V2_METHOD_SMC;
break;
default:
+ state = SPECTRE_VULNERABLE;
break;
}
}
-#endif
+
+ if (state == SPECTRE_MITIGATED)
+ state = spectre_v2_install_workaround(method);
+
+ spectre_v2_update_state(state, method);
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+static int spectre_bhb_method;
+
+static const char *spectre_bhb_method_name(int method)
+{
+ switch (method) {
+ case SPECTRE_V2_METHOD_LOOP8:
+ return "loop";
+
+ case SPECTRE_V2_METHOD_BPIALL:
+ return "BPIALL";
+
+ default:
+ return "unknown";
}
+}
- if (spectre_v2_method)
- pr_info("CPU%u: Spectre v2: using %s workaround\n",
- smp_processor_id(), spectre_v2_method);
+static int spectre_bhb_install_workaround(int method)
+{
+ if (spectre_bhb_method != method) {
+ if (spectre_bhb_method) {
+ pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n",
+ smp_processor_id());
+
+ return SPECTRE_VULNERABLE;
+ }
+
+ if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE)
+ return SPECTRE_VULNERABLE;
+
+ spectre_bhb_method = method;
+ }
+
+ pr_info("CPU%u: Spectre BHB: using %s workaround\n",
+ smp_processor_id(), spectre_bhb_method_name(method));
+
+ return SPECTRE_MITIGATED;
}
#else
-static void cpu_v7_spectre_init(void)
+static int spectre_bhb_install_workaround(int method)
{
+ return SPECTRE_VULNERABLE;
}
#endif
+static void cpu_v7_spectre_bhb_init(void)
+{
+ unsigned int state, method = 0;
+
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A15:
+ case ARM_CPU_PART_BRAHMA_B15:
+ case ARM_CPU_PART_CORTEX_A57:
+ case ARM_CPU_PART_CORTEX_A72:
+ state = SPECTRE_MITIGATED;
+ method = SPECTRE_V2_METHOD_LOOP8;
+ break;
+
+ case ARM_CPU_PART_CORTEX_A73:
+ case ARM_CPU_PART_CORTEX_A75:
+ state = SPECTRE_MITIGATED;
+ method = SPECTRE_V2_METHOD_BPIALL;
+ break;
+
+ default:
+ state = SPECTRE_UNAFFECTED;
+ break;
+ }
+
+ if (state == SPECTRE_MITIGATED)
+ state = spectre_bhb_install_workaround(method);
+
+ spectre_v2_update_state(state, method);
+}
+
static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
u32 mask, const char *msg)
{
@@ -142,16 +281,17 @@ static bool check_spectre_auxcr(bool *warned, u32 bit)
void cpu_v7_ca8_ibe(void)
{
if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
- cpu_v7_spectre_init();
+ cpu_v7_spectre_v2_init();
}
void cpu_v7_ca15_ibe(void)
{
if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
- cpu_v7_spectre_init();
+ cpu_v7_spectre_v2_init();
}
void cpu_v7_bugs_init(void)
{
- cpu_v7_spectre_init();
+ cpu_v7_spectre_v2_init();
+ cpu_v7_spectre_bhb_init();
}
diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
index 14db56f49f0a..6159010dac4a 100644
--- a/arch/arm/probes/kprobes/Makefile
+++ b/arch/arm/probes/kprobes/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE_actions-common.o := n
+KASAN_SANITIZE_actions-arm.o := n
+KASAN_SANITIZE_actions-thumb.o := n
obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o
obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o
test-kprobes-objs := test-core.o
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 7c9e395b77f7..ec52b776f926 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
+ -z max-page-size=4096 -shared $(ldflags-y) \
--hash-style=sysv --build-id=sha1 \
-T
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6978140edfa4..13aa8d955c46 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
select ACPI_SPCR_TABLE if ACPI
select ACPI_PPTT if ACPI
select ARCH_HAS_DEBUG_WX
+ select ARCH_BINFMT_ELF_EXTRA_PHDRS
select ARCH_BINFMT_ELF_STATE
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
@@ -18,6 +19,7 @@ config ARM64
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_CACHE_LINE_SIZE
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
@@ -192,6 +194,7 @@ config ARM64
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -670,15 +673,42 @@ config ARM64_ERRATUM_1508412
config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
bool
+config ARM64_ERRATUM_2051678
+ bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit"
+ default y
+ help
+ This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678.
+ Affected Coretex-A510 might not respect the ordering rules for
+ hardware update of the page table's dirty bit. The workaround
+ is to not enable the feature on affected CPUs.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_2077057
+ bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2077057.
+ Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
+ expected, but a Pointer Authentication trap is taken instead. The
+ erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
+ EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
+
+ This can only happen when EL2 is stepping EL1.
+
+ When these conditions occur, the SPSR_EL2 value is unchanged from the
+ previous guest entry, and can be restored from the in-memory copy.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_2119858
- bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
+ bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2119858.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858.
- Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
+ Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
@@ -761,14 +791,14 @@ config ARM64_ERRATUM_2253138
If unsure, say Y.
config ARM64_ERRATUM_2224489
- bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
+ bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range"
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2224489.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489.
- Affected Cortex-A710 cores might write to an out-of-range address, not reserved
+ Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
@@ -778,6 +808,65 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
+config ARM64_ERRATUM_2064142
+ bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2064142.
+
+ Affected Cortex-A510 core might fail to write into system registers after the
+ TRBE has been disabled. Under some conditions after the TRBE has been disabled
+ writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1,
+ and TRBTRG_EL1 will be ignored and will not be effected.
+
+ Work around this in the driver by executing TSB CSYNC and DSB after collection
+ is stopped and before performing a system register write to one of the affected
+ registers.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_2038923
+ bool "Cortex-A510: 2038923: workaround TRBE corruption with enable"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2038923.
+
+ Affected Cortex-A510 core might cause an inconsistent view on whether trace is
+ prohibited within the CPU. As a result, the trace buffer or trace buffer state
+ might be corrupted. This happens after TRBE buffer has been enabled by setting
+ TRBLIMITR_EL1.E, followed by just a single context synchronization event before
+ execution changes from a context, in which trace is prohibited to one where it
+ isn't, or vice versa. In these mentioned conditions, the view of whether trace
+ is prohibited is inconsistent between parts of the CPU, and the trace buffer or
+ the trace buffer state might be corrupted.
+
+ Work around this in the driver by preventing an inconsistent view of whether the
+ trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a
+ change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or
+ two ISB instructions if no ERET is to take place.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_1902691
+ bool "Cortex-A510: 1902691: workaround TRBE trace corruption"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 1902691.
+
+ Affected Cortex-A510 core might cause trace data corruption, when being written
+ into the memory. Effectively TRBE is broken and hence cannot be used to capture
+ trace data.
+
+ Work around this problem in the driver by just preventing TRBE initialization on
+ affected cpus. The firmware must have disabled the access to TRBE for the kernel
+ on such implementations. This will cover the kernel for any firmware that doesn't
+ do this already.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -805,13 +894,17 @@ config CAVIUM_ERRATUM_23144
If unsure, say Y.
config CAVIUM_ERRATUM_23154
- bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+ bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
default y
help
- The gicv3 of ThunderX requires a modified version for
+ The ThunderX GICv3 implementation requires a modified version for
reading the IAR status to ensure data synchronization
(access to icc_iar1_el1 is not sync'ed before and after).
+ It also suffers from erratum 38545 (also present on Marvell's
+ OcteonTX and OcteonTX2), resulting in deactivated interrupts being
+ spuriously presented to the CPU interface.
+
If unsure, say Y.
config CAVIUM_ERRATUM_27456
@@ -1166,10 +1259,7 @@ config HW_PERF_EVENTS
def_bool y
depends on ARM_PMU
-config ARCH_HAS_FILTER_PGPROT
- def_bool y
-
-# Supported by clang >= 7.0
+# Supported by clang >= 7.0 or GCC >= 12.0.0
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
@@ -1297,6 +1387,15 @@ config UNMAP_KERNEL_AT_EL0
If unsure, say Y.
+config MITIGATE_SPECTRE_BRANCH_HISTORY
+ bool "Mitigate Spectre style attacks against branch history" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ make use of branch history to influence future speculation.
+ When taking an exception from user-space, a sequence of branches
+ or a firmware call overwrites the branch history.
+
config RODATA_FULL_DEFAULT_ENABLED
bool "Apply r/o permissions of VM areas also to their linear aliases"
default y
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 7d5d58800170..21697449d762 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -309,9 +309,6 @@ config ARCH_VISCONTI
help
This enables support for Toshiba Visconti SoCs Family.
-config ARCH_VULCAN
- def_bool n
-
config ARCH_XGENE
bool "AppliedMicro X-Gene SOC Family"
help
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 517519e6e87f..f84d4b489e0b 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -107,6 +107,12 @@
no-map;
};
+ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
+ secmon_reserved_bl32: secmon@5300000 {
+ reg = <0x0 0x05300000 0x0 0x2000000>;
+ no-map;
+ };
+
linux,cma {
compatible = "shared-dma-pool";
reusable;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
index d8838dde0f0f..4fb31c2ba31c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
@@ -157,14 +157,6 @@
regulator-always-on;
};
- reserved-memory {
- /* TEE Reserved Memory */
- bl32_reserved: bl32@5000000 {
- reg = <0x0 0x05300000 0x0 0x2000000>;
- no-map;
- };
- };
-
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
index 3e968b244191..fd3fa82e4c33 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
@@ -17,7 +17,7 @@
rtc1 = &vrtc;
};
- dioo2133: audio-amplifier-0 {
+ dio2133: audio-amplifier-0 {
compatible = "simple-audio-amplifier";
enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>;
VCC-supply = <&vcc_5v>;
@@ -219,7 +219,7 @@
audio-widgets = "Line", "Lineout";
audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>,
<&tdmin_b>, <&tdmin_c>, <&tdmin_lb>,
- <&dioo2133>;
+ <&dio2133>;
audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1",
"TDMOUT_B IN 1", "FRDDR_B OUT 1",
"TDMOUT_B IN 2", "FRDDR_C OUT 1",
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 6b457b2c30a4..aa14ea017a61 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -49,6 +49,12 @@
no-map;
};
+ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
+ secmon_reserved_bl32: secmon@5300000 {
+ reg = <0x0 0x05300000 0x0 0x2000000>;
+ no-map;
+ };
+
linux,cma {
compatible = "shared-dma-pool";
reusable;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
index 212c6aa5a3b8..5751c48620ed 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
@@ -123,7 +123,7 @@
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
+ enable-gpio = <&gpio_ao GPIOE_2 GPIO_ACTIVE_HIGH>;
enable-active-high;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
index 0bd1e98a0eef..ddb1b345397f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
@@ -48,7 +48,7 @@
regulator-max-microvolt = <3300000>;
vin-supply = <&vcc_5v>;
- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
+ enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>;
enable-active-high;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
index 427475846fc7..a5d79f2f7c19 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
@@ -203,14 +203,6 @@
regulator-always-on;
};
- reserved-memory {
- /* TEE Reserved Memory */
- bl32_reserved: bl32@5000000 {
- reg = <0x0 0x05300000 0x0 0x2000000>;
- no-map;
- };
- };
-
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index 19afbc91020a..9f8f4145db88 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -97,6 +97,18 @@
<AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;
};
+ pmu-e {
+ compatible = "apple,icestorm-pmu";
+ interrupt-parent = <&aic>;
+ interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ pmu-p {
+ compatible = "apple,firestorm-pmu";
+ interrupt-parent = <&aic>;
+ interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>;
+ };
+
clkref: clock-ref {
compatible = "fixed-clock";
#clock-cells = <0>;
@@ -213,6 +225,18 @@
interrupt-controller;
reg = <0x2 0x3b100000 0x0 0x8000>;
power-domains = <&ps_aic>;
+
+ affinities {
+ e-core-pmu-affinity {
+ apple,fiq-index = <AIC_CPU_PMU_E>;
+ cpus = <&cpu0 &cpu1 &cpu2 &cpu3>;
+ };
+
+ p-core-pmu-affinity {
+ apple,fiq-index = <AIC_CPU_PMU_P>;
+ cpus = <&cpu4 &cpu5 &cpu6 &cpu7>;
+ };
+ };
};
pmgr: power-management@23b700000 {
diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
index 6288e104a089..a2635b14da30 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -543,8 +543,7 @@
<0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>,
<0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>;
/* Standard AXI Translation entries as programmed by EDK2 */
- dma-ranges = <0x02000000 0x0 0x2c1c0000 0x0 0x2c1c0000 0x0 0x00040000>,
- <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>,
+ dma-ranges = <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>,
<0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
index d74e738e4070..c03f4e183389 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
@@ -157,6 +157,10 @@
};
};
+&ftm_alarm0 {
+ status = "okay";
+};
+
&gpio1 {
gpio-line-names =
"", "", "", "", "", "", "", "",
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 3ed1f2c51cad..18e529118476 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -253,18 +253,18 @@
interrupt-controller;
reg = <0x14 4>;
interrupt-map =
- <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
- <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
- <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
- <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
- <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
- <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
- <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
- <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
- <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
- <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
- <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
- <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+ <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+ <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+ <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map-mask = <0xffffffff 0x0>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index 3cb9c21d2775..1282b61da8a5 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -293,18 +293,18 @@
interrupt-controller;
reg = <0x14 4>;
interrupt-map =
- <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
- <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
- <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
- <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
- <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
- <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
- <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
- <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
- <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
- <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
- <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
- <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+ <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+ <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+ <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map-mask = <0xffffffff 0x0>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 7032505f5ef3..3c611cb4f5fe 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -680,18 +680,18 @@
interrupt-controller;
reg = <0x14 4>;
interrupt-map =
- <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
- <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
- <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
- <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
- <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
- <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
- <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
- <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
- <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
- <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
- <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
- <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+ <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+ <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+ <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map-mask = <0xffffffff 0x0>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index f77f90ed416f..0c7a72c51a31 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -707,7 +707,6 @@
clocks = <&clk IMX8MM_CLK_VPU_DEC_ROOT>;
assigned-clocks = <&clk IMX8MM_CLK_VPU_BUS>;
assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_800M>;
- resets = <&src IMX8MQ_RESET_VPU_RESET>;
};
pgc_vpu_g1: power-domain@7 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
index f3e3418f7edc..2d4a472af6a9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
@@ -1115,8 +1115,8 @@
status = "okay";
ports {
- port@1 {
- reg = <1>;
+ port@0 {
+ reg = <0>;
mipi1_sensor_ep: endpoint {
remote-endpoint = <&camera1_ep>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 2df2510d0118..e92ebb6147e6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -554,7 +554,7 @@
assigned-clock-rates = <0>, <0>, <0>, <594000000>;
status = "disabled";
- port@0 {
+ port {
lcdif_mipi_dsi: endpoint {
remote-endpoint = <&mipi_dsi_lcdif_in>;
};
@@ -1151,8 +1151,8 @@
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
- reg = <0>;
+ port@1 {
+ reg = <1>;
csi1_mipi_ep: endpoint {
remote-endpoint = <&csi1_ep>;
@@ -1203,8 +1203,8 @@
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
- reg = <0>;
+ port@1 {
+ reg = <1>;
csi2_mipi_ep: endpoint {
remote-endpoint = <&csi2_ep>;
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
index a987ff7156bd..09f7364dd1d0 100644
--- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
@@ -132,7 +132,7 @@
scmi_sensor: protocol@15 {
reg = <0x15>;
- #thermal-sensor-cells = <0>;
+ #thermal-sensor-cells = <1>;
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/mba8mx.dtsi b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
index f27e3c8de916..ce6d5bdba0a8 100644
--- a/arch/arm64/boot/dts/freescale/mba8mx.dtsi
+++ b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
@@ -91,7 +91,7 @@
sound {
compatible = "fsl,imx-audio-tlv320aic32x4";
- model = "tqm-tlv320aic32";
+ model = "imx-audio-tlv320aic32x4";
ssi-controller = <&sai3>;
audio-codec = <&tlv320aic3x04>;
};
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
index 0dd2d2ee765a..f4270cf18996 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
@@ -502,7 +502,7 @@
};
usb0: usb@ffb00000 {
- compatible = "snps,dwc2";
+ compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2";
reg = <0xffb00000 0x40000>;
interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
phys = <&usbphy0>;
@@ -515,7 +515,7 @@
};
usb1: usb@ffb40000 {
- compatible = "snps,dwc2";
+ compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2";
reg = <0xffb40000 0x40000>;
interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
phys = <&usbphy0>;
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index 04da07ae4420..1cee26479bfe 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -18,6 +18,7 @@
aliases {
spi0 = &spi0;
+ ethernet0 = &eth0;
ethernet1 = &eth1;
mmc0 = &sdhci0;
mmc1 = &sdhci1;
@@ -138,7 +139,9 @@
/*
* U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property
* contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and
- * 2 size cells and also expects that the second range starts at 16 MB offset. If these
+ * 2 size cells and also expects that the second range starts at 16 MB offset. Also it
+ * expects that first range uses same address for PCI (child) and CPU (parent) cells (so
+ * no remapping) and that this address is the lowest from all specified ranges. If these
* conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address
* space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window
* for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB.
@@ -147,6 +150,9 @@
* https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7
* https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf
* https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33
+ * Bug related to requirement of same child and parent addresses for first range is fixed
+ * in U-Boot version 2022.04 by following commit:
+ * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17
*/
#address-cells = <3>;
#size-cells = <2>;
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index 673f4906eef9..fb78ef613b29 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -499,7 +499,7 @@
* (totaling 127 MiB) for MEM.
*/
ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */
- 0x81000000 0 0xefff0000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */
+ 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0 0 0 1 &pcie_intc 0>,
<0 0 0 2 &pcie_intc 1>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 2d48c3715fc6..aaa00da5351d 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1584,7 +1584,7 @@
#iommu-cells = <1>;
nvidia,memory-controller = <&mc>;
- status = "okay";
+ status = "disabled";
};
smmu: iommu@12000000 {
diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
index 58845a14805f..e2b9ec134cb1 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
@@ -807,3 +807,8 @@
qcom,snoc-host-cap-8bit-quirk;
};
+
+&crypto {
+ /* FIXME: qce_start triggers an SError */
+ status= "disable";
+};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 53b39e718fb6..4b19744bcfb3 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -35,6 +35,24 @@
clock-frequency = <32000>;
#clock-cells = <0>;
};
+
+ ufs_phy_rx_symbol_0_clk: ufs-phy-rx-symbol-0 {
+ compatible = "fixed-clock";
+ clock-frequency = <1000>;
+ #clock-cells = <0>;
+ };
+
+ ufs_phy_rx_symbol_1_clk: ufs-phy-rx-symbol-1 {
+ compatible = "fixed-clock";
+ clock-frequency = <1000>;
+ #clock-cells = <0>;
+ };
+
+ ufs_phy_tx_symbol_0_clk: ufs-phy-tx-symbol-0 {
+ compatible = "fixed-clock";
+ clock-frequency = <1000>;
+ #clock-cells = <0>;
+ };
};
cpus {
@@ -603,9 +621,9 @@
<0>,
<0>,
<0>,
- <0>,
- <0>,
- <0>,
+ <&ufs_phy_rx_symbol_0_clk>,
+ <&ufs_phy_rx_symbol_1_clk>,
+ <&ufs_phy_tx_symbol_0_clk>,
<0>,
<0>;
};
@@ -1923,8 +1941,8 @@
<75000000 300000000>,
<0 0>,
<0 0>,
- <75000000 300000000>,
- <75000000 300000000>;
+ <0 0>,
+ <0 0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index 10c25ad2d0c7..02b97e838c47 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -726,7 +726,7 @@
compatible = "qcom,sm8450-smmu-500", "arm,mmu-500";
reg = <0 0x15000000 0 0x100000>;
#iommu-cells = <2>;
- #global-interrupts = <2>;
+ #global-interrupts = <1>;
interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
@@ -813,6 +813,7 @@
<GIC_SPI 412 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 707 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 690 IRQ_TYPE_LEVEL_HIGH>,
@@ -1072,9 +1073,10 @@
<&gcc GCC_USB30_PRIM_MASTER_CLK>,
<&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>,
<&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
- <&gcc GCC_USB30_PRIM_SLEEP_CLK>;
+ <&gcc GCC_USB30_PRIM_SLEEP_CLK>,
+ <&gcc GCC_USB3_0_CLKREF_EN>;
clock-names = "cfg_noc", "core", "iface", "mock_utmi",
- "sleep";
+ "sleep", "xo";
assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
<&gcc GCC_USB30_PRIM_MASTER_CLK>;
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
index f972704dfe7a..56dfbb2e2fa6 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -711,7 +711,7 @@
clock-names = "pclk", "timer";
};
- dmac: dmac@ff240000 {
+ dmac: dma-controller@ff240000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x0 0xff240000 0x0 0x4000>;
interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 39db0b85b4da..b822533dc7f1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -489,7 +489,7 @@
status = "disabled";
};
- dmac: dmac@ff1f0000 {
+ dmac: dma-controller@ff1f0000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x0 0xff1f0000 0x0 0x4000>;
interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index 45a5ae5d2027..162f08bca0d4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -286,7 +286,7 @@
sound: sound {
compatible = "rockchip,rk3399-gru-sound";
- rockchip,cpu = <&i2s0 &i2s2>;
+ rockchip,cpu = <&i2s0 &spdif>;
};
};
@@ -437,10 +437,6 @@ ap_i2c_audio: &i2c8 {
status = "okay";
};
-&i2s2 {
- status = "okay";
-};
-
&io_domains {
status = "okay";
@@ -537,6 +533,17 @@ ap_i2c_audio: &i2c8 {
vqmmc-supply = <&ppvar_sd_card_io>;
};
+&spdif {
+ status = "okay";
+
+ /*
+ * SPDIF is routed internally to DP; we either don't use these pins, or
+ * mux them to something else.
+ */
+ /delete-property/ pinctrl-0;
+ /delete-property/ pinctrl-names;
+};
+
&spi1 {
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index 292bb7e80cf3..3ae5d727e367 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -232,6 +232,7 @@
&usbdrd_dwc3_0 {
dr_mode = "otg";
+ extcon = <&extcon_usb3>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index fb67db4619ea..08fa00364b42 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -25,6 +25,13 @@
};
};
+ extcon_usb3: extcon-usb3 {
+ compatible = "linux,extcon-usb-gpio";
+ id-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&usb3_id>;
+ };
+
clkin_gmac: external-gmac-clock {
compatible = "fixed-clock";
clock-frequency = <125000000>;
@@ -422,9 +429,22 @@
<4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
+
+ usb3 {
+ usb3_id: usb3-id {
+ rockchip,pins =
+ <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
};
&sdhci {
+ /*
+ * Signal integrity isn't great at 200MHz but 100MHz has proven stable
+ * enough.
+ */
+ max-frequency = <100000000>;
+
bus-width = <8>;
mmc-hs400-1_8v;
mmc-hs400-enhanced-strobe;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index d3cdf6f42a30..080457a68e3c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1881,10 +1881,10 @@
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru PCLK_HDMI_CTRL>,
<&cru SCLK_HDMI_SFR>,
- <&cru PLL_VPLL>,
+ <&cru SCLK_HDMI_CEC>,
<&cru PCLK_VIO_GRF>,
- <&cru SCLK_HDMI_CEC>;
- clock-names = "iahb", "isfr", "vpll", "grf", "cec";
+ <&cru PLL_VPLL>;
+ clock-names = "iahb", "isfr", "cec", "grf", "vpll";
power-domains = <&power RK3399_PD_HDCP>;
reg-io-width = <4>;
rockchip,grf = <&grf>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 166399b7f13f..d9eb92d59099 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -285,8 +285,6 @@
vcc_ddr: DCDC_REG3 {
regulator-always-on;
regulator-boot-on;
- regulator-min-microvolt = <1100000>;
- regulator-max-microvolt = <1100000>;
regulator-initial-mode = <0x2>;
regulator-name = "vcc_ddr";
regulator-state-mem {
diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
index 2fd313a295f8..d91df1cde736 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
@@ -32,13 +32,11 @@
clocks = <&cru SCLK_GMAC0>, <&cru SCLK_GMAC0_RX_TX>,
<&cru SCLK_GMAC0_RX_TX>, <&cru CLK_MAC0_REFOUT>,
<&cru ACLK_GMAC0>, <&cru PCLK_GMAC0>,
- <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>,
- <&cru PCLK_XPCS>;
+ <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>;
clock-names = "stmmaceth", "mac_clk_rx",
"mac_clk_tx", "clk_mac_refout",
"aclk_mac", "pclk_mac",
- "clk_mac_speed", "ptp_ref",
- "pclk_xpcs";
+ "clk_mac_speed", "ptp_ref";
resets = <&cru SRST_A_GMAC0>;
reset-names = "stmmaceth";
rockchip,grf = <&grf>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index a68033a23975..8ccce54ee8e7 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -651,7 +651,7 @@
status = "disabled";
};
- dmac0: dmac@fe530000 {
+ dmac0: dma-controller@fe530000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x0 0xfe530000 0x0 0x4000>;
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
@@ -662,7 +662,7 @@
#dma-cells = <1>;
};
- dmac1: dmac@fe550000 {
+ dmac1: dma-controller@fe550000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x0 0xfe550000 0x0 0x4000>;
interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
index a5a24f9f46c5..b210cc07c539 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
@@ -15,8 +15,18 @@
model = "Texas Instruments J721S2 EVM";
chosen {
- stdout-path = "serial10:115200n8";
- bootargs = "console=ttyS10,115200n8 earlycon=ns16550a,mmio32,2880000";
+ stdout-path = "serial2:115200n8";
+ bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,2880000";
+ };
+
+ aliases {
+ serial1 = &mcu_uart0;
+ serial2 = &main_uart8;
+ mmc0 = &main_sdhci0;
+ mmc1 = &main_sdhci1;
+ can0 = &main_mcan16;
+ can1 = &mcu_mcan0;
+ can2 = &mcu_mcan1;
};
evm_12v0: fixedregulator-evm12v0 {
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
index 80d3cae03e88..fe5234c40f6c 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
@@ -21,28 +21,6 @@
#address-cells = <2>;
#size-cells = <2>;
- aliases {
- serial0 = &wkup_uart0;
- serial1 = &mcu_uart0;
- serial2 = &main_uart0;
- serial3 = &main_uart1;
- serial4 = &main_uart2;
- serial5 = &main_uart3;
- serial6 = &main_uart4;
- serial7 = &main_uart5;
- serial8 = &main_uart6;
- serial9 = &main_uart7;
- serial10 = &main_uart8;
- serial11 = &main_uart9;
- mmc0 = &main_sdhci0;
- mmc1 = &main_sdhci1;
- can0 = &main_mcan16;
- can1 = &mcu_mcan0;
- can2 = &mcu_mcan1;
- can3 = &main_mcan3;
- can4 = &main_mcan5;
- };
-
chosen { };
cpus {
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index addfa413650b..2a965aa0188d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -45,7 +45,7 @@ config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 30b7cc6a7079..561dd2332571 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -24,7 +24,6 @@
#ifdef USE_V8_CRYPTO_EXTENSIONS
#define MODE "ce"
#define PRIO 300
-#define STRIDE 5
#define aes_expandkey ce_aes_expandkey
#define aes_ecb_encrypt ce_aes_ecb_encrypt
#define aes_ecb_decrypt ce_aes_ecb_decrypt
@@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
#define PRIO 200
-#define STRIDE 4
#define aes_ecb_encrypt neon_aes_ecb_encrypt
#define aes_ecb_decrypt neon_aes_ecb_decrypt
#define aes_cbc_encrypt neon_aes_cbc_encrypt
@@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 const iv[]);
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
- int rounds, int bytes, u8 ctr[], u8 finalbuf[]);
+ int rounds, int bytes, u8 ctr[]);
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int bytes, u32 const rk2[], u8 iv[],
@@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
unsigned int nbytes = walk.nbytes;
u8 *dst = walk.dst.virt.addr;
u8 buf[AES_BLOCK_SIZE];
- unsigned int tail;
if (unlikely(nbytes < AES_BLOCK_SIZE))
- src = memcpy(buf, src, nbytes);
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv, buf);
+ walk.iv);
kernel_neon_end();
- tail = nbytes % (STRIDE * AES_BLOCK_SIZE);
- if (tail > 0 && tail < AES_BLOCK_SIZE)
- /*
- * The final partial block could not be returned using
- * an overlapping store, so it was passed via buf[]
- * instead.
- */
- memcpy(dst + nbytes - tail, buf, tail);
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - nbytes, nbytes);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -983,6 +976,7 @@ module_cpu_feature_match(AES, aes_init);
module_init(aes_init);
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
+EXPORT_SYMBOL(neon_aes_ctr_encrypt);
EXPORT_SYMBOL(neon_aes_xts_encrypt);
EXPORT_SYMBOL(neon_aes_xts_decrypt);
#endif
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index ff01f0167ba2..dc35eb0245c5 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int bytes, u8 ctr[], u8 finalbuf[])
+ * int bytes, u8 ctr[])
*/
AES_FUNC_START(aes_ctr_encrypt)
@@ -414,8 +414,8 @@ ST5( st1 {v4.16b}, [x0], #16 )
.Lctrtail:
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
mov x16, #16
- ands x13, x4, #0xf
- csel x13, x13, x16, ne
+ ands x6, x4, #0xf
+ csel x13, x6, x16, ne
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
ST5( csel x14, x16, xzr, gt )
@@ -424,10 +424,10 @@ ST5( csel x14, x16, xzr, gt )
cmp w4, #32 - (MAX_STRIDE << 4)
csel x16, x16, xzr, gt
cmp w4, #16 - (MAX_STRIDE << 4)
- ble .Lctrtail1x
adr_l x12, .Lcts_permute_table
add x12, x12, x13
+ ble .Lctrtail1x
ST5( ld1 {v5.16b}, [x1], x14 )
ld1 {v6.16b}, [x1], x15
@@ -462,11 +462,19 @@ ST5( st1 {v5.16b}, [x0], x14 )
b .Lctrout
.Lctrtail1x:
- csel x0, x0, x6, eq // use finalbuf if less than a full block
+ sub x7, x6, #16
+ csel x6, x6, x7, eq
+ add x1, x1, x6
+ add x0, x0, x6
ld1 {v5.16b}, [x1]
+ ld1 {v6.16b}, [x0]
ST5( mov v3.16b, v4.16b )
encrypt_block v3, w3, x2, x8, w7
+ ld1 {v10.16b-v11.16b}, [x12]
+ tbl v3.16b, {v3.16b}, v10.16b
+ sshr v11.16b, v11.16b, #7
eor v5.16b, v5.16b, v3.16b
+ bif v5.16b, v6.16b, v11.16b
st1 {v5.16b}, [x0]
b .Lctrout
AES_FUNC_END(aes_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index a3405b8c344b..d427f4556b6e 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -735,119 +735,67 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
* int blocks, u8 iv[])
*/
SYM_FUNC_START_LOCAL(__xts_crypt8)
- mov x6, #1
- lsl x6, x6, x23
- subs w23, w23, #8
- csel x23, x23, xzr, pl
- csel x6, x6, xzr, mi
+ movi v18.2s, #0x1
+ movi v19.2s, #0x87
+ uzp1 v18.4s, v18.4s, v19.4s
+
+ ld1 {v0.16b-v3.16b}, [x1], #64
+ ld1 {v4.16b-v7.16b}, [x1], #64
+
+ next_tweak v26, v25, v18, v19
+ next_tweak v27, v26, v18, v19
+ next_tweak v28, v27, v18, v19
+ next_tweak v29, v28, v18, v19
+ next_tweak v30, v29, v18, v19
+ next_tweak v31, v30, v18, v19
+ next_tweak v16, v31, v18, v19
+ next_tweak v17, v16, v18, v19
- ld1 {v0.16b}, [x20], #16
- next_tweak v26, v25, v30, v31
eor v0.16b, v0.16b, v25.16b
- tbnz x6, #1, 0f
-
- ld1 {v1.16b}, [x20], #16
- next_tweak v27, v26, v30, v31
eor v1.16b, v1.16b, v26.16b
- tbnz x6, #2, 0f
-
- ld1 {v2.16b}, [x20], #16
- next_tweak v28, v27, v30, v31
eor v2.16b, v2.16b, v27.16b
- tbnz x6, #3, 0f
-
- ld1 {v3.16b}, [x20], #16
- next_tweak v29, v28, v30, v31
eor v3.16b, v3.16b, v28.16b
- tbnz x6, #4, 0f
-
- ld1 {v4.16b}, [x20], #16
- str q29, [sp, #.Lframe_local_offset]
eor v4.16b, v4.16b, v29.16b
- next_tweak v29, v29, v30, v31
- tbnz x6, #5, 0f
-
- ld1 {v5.16b}, [x20], #16
- str q29, [sp, #.Lframe_local_offset + 16]
- eor v5.16b, v5.16b, v29.16b
- next_tweak v29, v29, v30, v31
- tbnz x6, #6, 0f
-
- ld1 {v6.16b}, [x20], #16
- str q29, [sp, #.Lframe_local_offset + 32]
- eor v6.16b, v6.16b, v29.16b
- next_tweak v29, v29, v30, v31
- tbnz x6, #7, 0f
+ eor v5.16b, v5.16b, v30.16b
+ eor v6.16b, v6.16b, v31.16b
+ eor v7.16b, v7.16b, v16.16b
- ld1 {v7.16b}, [x20], #16
- str q29, [sp, #.Lframe_local_offset + 48]
- eor v7.16b, v7.16b, v29.16b
- next_tweak v29, v29, v30, v31
+ stp q16, q17, [sp, #16]
-0: mov bskey, x21
- mov rounds, x22
+ mov bskey, x2
+ mov rounds, x3
br x16
SYM_FUNC_END(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
- frame_push 6, 64
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
+ stp x29, x30, [sp, #-48]!
+ mov x29, sp
- movi v30.2s, #0x1
- movi v25.2s, #0x87
- uzp1 v30.4s, v30.4s, v25.4s
- ld1 {v25.16b}, [x24]
+ ld1 {v25.16b}, [x5]
-99: adr x16, \do8
+0: adr x16, \do8
bl __xts_crypt8
- ldp q16, q17, [sp, #.Lframe_local_offset]
- ldp q18, q19, [sp, #.Lframe_local_offset + 32]
+ eor v16.16b, \o0\().16b, v25.16b
+ eor v17.16b, \o1\().16b, v26.16b
+ eor v18.16b, \o2\().16b, v27.16b
+ eor v19.16b, \o3\().16b, v28.16b
- eor \o0\().16b, \o0\().16b, v25.16b
- eor \o1\().16b, \o1\().16b, v26.16b
- eor \o2\().16b, \o2\().16b, v27.16b
- eor \o3\().16b, \o3\().16b, v28.16b
+ ldp q24, q25, [sp, #16]
- st1 {\o0\().16b}, [x19], #16
- mov v25.16b, v26.16b
- tbnz x6, #1, 1f
- st1 {\o1\().16b}, [x19], #16
- mov v25.16b, v27.16b
- tbnz x6, #2, 1f
- st1 {\o2\().16b}, [x19], #16
- mov v25.16b, v28.16b
- tbnz x6, #3, 1f
- st1 {\o3\().16b}, [x19], #16
- mov v25.16b, v29.16b
- tbnz x6, #4, 1f
+ eor v20.16b, \o4\().16b, v29.16b
+ eor v21.16b, \o5\().16b, v30.16b
+ eor v22.16b, \o6\().16b, v31.16b
+ eor v23.16b, \o7\().16b, v24.16b
- eor \o4\().16b, \o4\().16b, v16.16b
- eor \o5\().16b, \o5\().16b, v17.16b
- eor \o6\().16b, \o6\().16b, v18.16b
- eor \o7\().16b, \o7\().16b, v19.16b
+ st1 {v16.16b-v19.16b}, [x0], #64
+ st1 {v20.16b-v23.16b}, [x0], #64
- st1 {\o4\().16b}, [x19], #16
- tbnz x6, #5, 1f
- st1 {\o5\().16b}, [x19], #16
- tbnz x6, #6, 1f
- st1 {\o6\().16b}, [x19], #16
- tbnz x6, #7, 1f
- st1 {\o7\().16b}, [x19], #16
+ subs x4, x4, #8
+ b.gt 0b
- cbz x23, 1f
- st1 {v25.16b}, [x24]
-
- b 99b
-
-1: st1 {v25.16b}, [x24]
- frame_pop
+ st1 {v25.16b}, [x5]
+ ldp x29, x30, [sp], #48
ret
.endm
@@ -869,133 +817,51 @@ SYM_FUNC_END(aesbs_xts_decrypt)
/*
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
- * int rounds, int blocks, u8 iv[], u8 final[])
+ * int rounds, int blocks, u8 iv[])
*/
SYM_FUNC_START(aesbs_ctr_encrypt)
- frame_push 8
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
- mov x25, x6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- cmp x25, #0
- cset x26, ne
- add x23, x23, x26 // do one extra block if final
-
- ldp x7, x8, [x24]
- ld1 {v0.16b}, [x24]
+ ldp x7, x8, [x5]
+ ld1 {v0.16b}, [x5]
CPU_LE( rev x7, x7 )
CPU_LE( rev x8, x8 )
adds x8, x8, #1
adc x7, x7, xzr
-99: mov x9, #1
- lsl x9, x9, x23
- subs w23, w23, #8
- csel x23, x23, xzr, pl
- csel x9, x9, xzr, le
-
- tbnz x9, #1, 0f
- next_ctr v1
- tbnz x9, #2, 0f
+0: next_ctr v1
next_ctr v2
- tbnz x9, #3, 0f
next_ctr v3
- tbnz x9, #4, 0f
next_ctr v4
- tbnz x9, #5, 0f
next_ctr v5
- tbnz x9, #6, 0f
next_ctr v6
- tbnz x9, #7, 0f
next_ctr v7
-0: mov bskey, x21
- mov rounds, x22
+ mov bskey, x2
+ mov rounds, x3
bl aesbs_encrypt8
- lsr x9, x9, x26 // disregard the extra block
- tbnz x9, #0, 0f
-
- ld1 {v8.16b}, [x20], #16
- eor v0.16b, v0.16b, v8.16b
- st1 {v0.16b}, [x19], #16
- tbnz x9, #1, 1f
+ ld1 { v8.16b-v11.16b}, [x1], #64
+ ld1 {v12.16b-v15.16b}, [x1], #64
- ld1 {v9.16b}, [x20], #16
- eor v1.16b, v1.16b, v9.16b
- st1 {v1.16b}, [x19], #16
- tbnz x9, #2, 2f
+ eor v8.16b, v0.16b, v8.16b
+ eor v9.16b, v1.16b, v9.16b
+ eor v10.16b, v4.16b, v10.16b
+ eor v11.16b, v6.16b, v11.16b
+ eor v12.16b, v3.16b, v12.16b
+ eor v13.16b, v7.16b, v13.16b
+ eor v14.16b, v2.16b, v14.16b
+ eor v15.16b, v5.16b, v15.16b
- ld1 {v10.16b}, [x20], #16
- eor v4.16b, v4.16b, v10.16b
- st1 {v4.16b}, [x19], #16
- tbnz x9, #3, 3f
+ st1 { v8.16b-v11.16b}, [x0], #64
+ st1 {v12.16b-v15.16b}, [x0], #64
- ld1 {v11.16b}, [x20], #16
- eor v6.16b, v6.16b, v11.16b
- st1 {v6.16b}, [x19], #16
- tbnz x9, #4, 4f
-
- ld1 {v12.16b}, [x20], #16
- eor v3.16b, v3.16b, v12.16b
- st1 {v3.16b}, [x19], #16
- tbnz x9, #5, 5f
-
- ld1 {v13.16b}, [x20], #16
- eor v7.16b, v7.16b, v13.16b
- st1 {v7.16b}, [x19], #16
- tbnz x9, #6, 6f
+ next_ctr v0
+ subs x4, x4, #8
+ b.gt 0b
- ld1 {v14.16b}, [x20], #16
- eor v2.16b, v2.16b, v14.16b
- st1 {v2.16b}, [x19], #16
- tbnz x9, #7, 7f
-
- ld1 {v15.16b}, [x20], #16
- eor v5.16b, v5.16b, v15.16b
- st1 {v5.16b}, [x19], #16
-
-8: next_ctr v0
- st1 {v0.16b}, [x24]
- cbz x23, .Lctr_done
-
- b 99b
-
-.Lctr_done:
- frame_pop
+ st1 {v0.16b}, [x5]
+ ldp x29, x30, [sp], #16
ret
-
- /*
- * If we are handling the tail of the input (x6 != NULL), return the
- * final keystream block back to the caller.
- */
-0: cbz x25, 8b
- st1 {v0.16b}, [x25]
- b 8b
-1: cbz x25, 8b
- st1 {v1.16b}, [x25]
- b 8b
-2: cbz x25, 8b
- st1 {v4.16b}, [x25]
- b 8b
-3: cbz x25, 8b
- st1 {v6.16b}, [x25]
- b 8b
-4: cbz x25, 8b
- st1 {v3.16b}, [x25]
- b 8b
-5: cbz x25, 8b
- st1 {v7.16b}, [x25]
- b 8b
-6: cbz x25, 8b
- st1 {v2.16b}, [x25]
- b 8b
-7: cbz x25, 8b
- st1 {v5.16b}, [x25]
- b 8b
SYM_FUNC_END(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 8df6ad8cb09d..bac4cabef607 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
- int rounds, int blocks, u8 iv[], u8 final[]);
+ int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
@@ -46,6 +46,8 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
+asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 ctr[]);
asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
u32 const rk1[], int rounds, int bytes,
u32 const rk2[], u8 iv[], int first);
@@ -58,7 +60,7 @@ struct aesbs_ctx {
int rounds;
} __aligned(AES_BLOCK_SIZE);
-struct aesbs_cbc_ctx {
+struct aesbs_cbc_ctr_ctx {
struct aesbs_ctx key;
u32 enc[AES_MAX_KEYLENGTH_U32];
};
@@ -128,10 +130,10 @@ static int ecb_decrypt(struct skcipher_request *req)
return __ecb_crypt(req, aesbs_ecb_decrypt);
}
-static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
@@ -154,7 +156,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
@@ -177,7 +179,7 @@ static int cbc_encrypt(struct skcipher_request *req)
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
@@ -205,40 +207,32 @@ static int cbc_decrypt(struct skcipher_request *req)
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
- u8 buf[AES_BLOCK_SIZE];
int err;
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes > 0) {
- unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
- u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
-
- if (walk.nbytes < walk.total) {
- blocks = round_down(blocks,
- walk.stride / AES_BLOCK_SIZE);
- final = NULL;
- }
+ int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
+ int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE);
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
kernel_neon_begin();
- aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->rk, ctx->rounds, blocks, walk.iv, final);
- kernel_neon_end();
-
- if (final) {
- u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
- u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
-
- crypto_xor_cpy(dst, src, final,
- walk.total % AES_BLOCK_SIZE);
-
- err = skcipher_walk_done(&walk, 0);
- break;
+ if (blocks >= 8) {
+ aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
+ blocks, walk.iv);
+ dst += blocks * AES_BLOCK_SIZE;
+ src += blocks * AES_BLOCK_SIZE;
}
- err = skcipher_walk_done(&walk,
- walk.nbytes - blocks * AES_BLOCK_SIZE);
+ if (nbytes && walk.nbytes == walk.total) {
+ neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
+ nbytes, walk.iv);
+ nbytes = 0;
+ }
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
@@ -308,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
return err;
while (walk.nbytes >= AES_BLOCK_SIZE) {
- unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-
- if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
- blocks = round_down(blocks,
- walk.stride / AES_BLOCK_SIZE);
-
+ int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
out = walk.dst.virt.addr;
in = walk.src.virt.addr;
nbytes = walk.nbytes;
kernel_neon_begin();
- if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
- if (first)
+ if (blocks >= 8) {
+ if (first == 1)
neon_aes_ecb_encrypt(walk.iv, walk.iv,
ctx->twkey,
ctx->key.rounds, 1);
- first = 0;
+ first = 2;
fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
@@ -333,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in += blocks * AES_BLOCK_SIZE;
nbytes -= blocks * AES_BLOCK_SIZE;
}
-
- if (walk.nbytes == walk.total && nbytes > 0)
- goto xts_tail;
-
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ nbytes = first = 0;
+ }
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
@@ -361,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
nbytes = walk.nbytes;
kernel_neon_begin();
-xts_tail:
if (encrypt)
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first ?: 2);
+ nbytes, ctx->twkey, walk.iv, first);
else
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first ?: 2);
+ nbytes, ctx->twkey, walk.iv, first);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
@@ -402,14 +397,14 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_driver_name = "cbc-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
+ .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
- .setkey = aesbs_cbc_setkey,
+ .setkey = aesbs_cbc_ctr_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
@@ -417,7 +412,7 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_driver_name = "ctr-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
@@ -425,7 +420,7 @@ static struct skcipher_alg aes_algs[] = { {
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
- .setkey = aesbs_setkey,
+ .setkey = aesbs_cbc_ctr_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 8c65cecf560a..250e1377c481 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/*
* sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
*
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index 2d8655d5b1af..35ec9ae99fe1 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -43,7 +43,7 @@
# on Cortex-A53 (or by 4 cycles per round).
# (***) Super-impressive coefficients over gcc-generated code are
# indication of some compiler "pathology", most notably code
-# generated with -mgeneral-regs-only is significanty faster
+# generated with -mgeneral-regs-only is significantly faster
# and the gap is only 40-90%.
#
# October 2016.
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index e62a094a9d52..94cb7580deb7 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/*
* sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
*
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index d71faca322f2..ee98954ae8ca 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!crypto_simd_usable())
- return crypto_sm3_update(desc, data, len);
+ if (!crypto_simd_usable()) {
+ sm3_update(shash_desc_ctx(desc), data, len);
+ return 0;
+ }
kernel_neon_begin();
sm3_base_do_update(desc, data, len, sm3_ce_transform);
@@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
static int sm3_ce_final(struct shash_desc *desc, u8 *out)
{
- if (!crypto_simd_usable())
- return crypto_sm3_finup(desc, NULL, 0, out);
+ if (!crypto_simd_usable()) {
+ sm3_final(shash_desc_ctx(desc), out);
+ return 0;
+ }
kernel_neon_begin();
sm3_base_do_finalize(desc, sm3_ce_transform);
@@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out)
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable())
- return crypto_sm3_finup(desc, data, len, out);
+ if (!crypto_simd_usable()) {
+ struct sm3_state *sctx = shash_desc_ctx(desc);
+
+ if (len)
+ sm3_update(sctx, data, len);
+ sm3_final(sctx, out);
+ return 0;
+ }
kernel_neon_begin();
- sm3_base_do_update(desc, data, len, sm3_ce_transform);
+ if (len)
+ sm3_base_do_update(desc, data, len, sm3_ce_transform);
+ sm3_base_do_finalize(desc, sm3_ce_transform);
kernel_neon_end();
- return sm3_ce_final(desc, out);
+ return sm3_base_finish(desc, out);
}
static struct shash_alg sm3_alg = {
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
new file mode 100644
index 000000000000..99483b19b99f
--- /dev/null
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ASM_APPLE_M1_PMU_h
+#define __ASM_APPLE_M1_PMU_h
+
+#include <linux/bits.h>
+#include <asm/sysreg.h>
+
+/* Counters */
+#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0)
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0)
+#define PMCR0_IMODE GENMASK(10, 8)
+#define PMCR0_IMODE_OFF 0
+#define PMCR0_IMODE_PMI 1
+#define PMCR0_IMODE_AIC 2
+#define PMCR0_IMODE_HALT 3
+#define PMCR0_IMODE_FIQ 4
+#define PMCR0_IACT BIT(11)
+#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12)
+#define PMCR0_STOP_CNT_ON_PMI BIT(20)
+#define PMCR0_CNT_GLOB_L2C_EVT BIT(21)
+#define PMCR0_DEFER_PMI_TO_ERET BIT(22)
+#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30)
+#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32)
+#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
+
+#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
+#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
+#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
+#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48)
+
+#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0)
+#define PMESR0_EVT_CNT_2 GENMASK(7, 0)
+#define PMESR0_EVT_CNT_3 GENMASK(15, 8)
+#define PMESR0_EVT_CNT_4 GENMASK(23, 16)
+#define PMESR0_EVT_CNT_5 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0)
+#define PMESR1_EVT_CNT_6 GENMASK(7, 0)
+#define PMESR1_EVT_CNT_7 GENMASK(15, 8)
+#define PMESR1_EVT_CNT_8 GENMASK(23, 16)
+#define PMESR1_EVT_CNT_9 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0)
+#define PMSR_OVERFLOW GENMASK(9, 0)
+
+#endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 4ad22c3135db..8bd5afc7b692 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void)
* The gicv3 of ThunderX requires a modified version for reading the
* IAR status to ensure data synchronization (access to icc_iar1_el1
* is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ * erratum 23154 is not applicable.
*/
static inline u64 gic_read_iar_cavium_thunderx(void)
{
- u64 irqstat;
+ u64 irqstat, apr;
+ apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
nops(8);
irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
nops(4);
mb();
- return irqstat;
+ /* Max priority groups implemented is only 32 */
+ if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+ return irqstat;
+
+ return 0x3ff;
}
static inline void gic_write_ctlr(u32 val)
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 09e43272ccb0..d1bb5e71df25 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v)
return ok;
}
+static inline bool __arm64_rndrrs(unsigned long *v)
+{
+ bool ok;
+
+ /*
+ * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success,
+ * and set PSTATE.NZCV to 0b0100 otherwise.
+ */
+ asm volatile(
+ __mrs_s("%0", SYS_RNDRRS_EL0) "\n"
+ " cset %w1, ne\n"
+ : "=r" (*v), "=r" (ok)
+ :
+ : "cc");
+
+ return ok;
+}
+
static inline bool __must_check arch_get_random_long(unsigned long *v)
{
+ /*
+ * Only support the generic interface after we have detected
+ * the system wide capability, avoiding complexity with the
+ * cpufeature code and with potential scheduling between CPUs
+ * with and without the feature.
+ */
+ if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+ return true;
return false;
}
static inline bool __must_check arch_get_random_int(unsigned int *v)
{
+ if (cpus_have_const_cap(ARM64_HAS_RNG)) {
+ unsigned long val;
+
+ if (__arm64_rndr(&val)) {
+ *v = val;
+ return true;
+ }
+ }
return false;
}
@@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
}
/*
- * Only support the generic interface after we have detected
- * the system wide capability, avoiding complexity with the
- * cpufeature code and with potential scheduling between CPUs
- * with and without the feature.
+ * RNDRRS is not backed by an entropy source but by a DRBG that is
+ * reseeded after each invocation. This is not a 100% fit but good
+ * enough to implement this API if no other entropy source exists.
*/
- if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+ if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v))
return true;
return false;
@@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
}
if (cpus_have_const_cap(ARM64_HAS_RNG)) {
- if (__arm64_rndr(&val)) {
+ if (__arm64_rndrrs(&val)) {
*v = val;
return true;
}
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index f1bba5fc61c4..ead62f7dd269 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -60,6 +60,9 @@ alternative_else_nop_endif
.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
mrs \tmp1, id_aa64isar1_el1
ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+ mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1
+ ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4
+ orr \tmp1, \tmp1, \tmp2
cbz \tmp1, .Lno_addr_auth\@
mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index e8bd0af0141c..8c5a61aeaf8e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -109,6 +109,13 @@
.endm
/*
+ * Clear Branch History instruction
+ */
+ .macro clearbhb
+ hint #22
+ .endm
+
+/*
* Speculation barrier
*/
.macro sb
@@ -535,11 +542,6 @@ alternative_endif
#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
#endif
-#ifdef CONFIG_KASAN_HW_TAGS
-#define EXPORT_SYMBOL_NOHWKASAN(name)
-#else
-#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name)
-#endif
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
@@ -850,4 +852,50 @@ alternative_endif
#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
+ .macro __mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb spectre_bhb_patch_loop_iter
+ mov \tmp, #32 // Patched to correct the immediate
+alternative_cb_end
+.Lspectre_bhb_loop\@:
+ b . + 4
+ subs \tmp, \tmp, #1
+ b.ne .Lspectre_bhb_loop\@
+ sb
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb spectre_bhb_patch_loop_mitigation_enable
+ b .L_spectre_bhb_loop_done\@ // Patched to NOP
+alternative_cb_end
+ __mitigate_spectre_bhb_loop \tmp
+.L_spectre_bhb_loop_done\@:
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ /* Save/restores x0-x3 to the stack */
+ .macro __mitigate_spectre_bhb_fw
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ stp x0, x1, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+alternative_cb smccc_patch_fw_mitigation_conduit
+ nop // Patched to SMC/HVC #0
+alternative_cb_end
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_clear_insn
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb spectre_bhb_patch_clearbhb
+ /* Patched to NOP when not supported */
+ clearbhb
+ isb
+alternative_cb_end
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ef6be92b1921..c62e7e5e2f0c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -356,6 +356,7 @@ struct arm64_cpu_capabilities {
struct { /* Feature register checking */
u32 sys_reg;
u8 field_pos;
+ u8 field_width;
u8 min_field_value;
u8 hwcap_type;
bool sign;
@@ -576,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
static inline int __attribute_const__
cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
{
+ if (WARN_ON_ONCE(!width))
+ width = 4;
return (sign) ?
cpuid_feature_extract_signed_field_width(features, field, width) :
cpuid_feature_extract_unsigned_field_width(features, field, width);
@@ -637,6 +640,35 @@ static inline bool cpu_supports_mixed_endian_el0(void)
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
+
+static inline bool supports_csv2p3(int scope)
+{
+ u64 pfr0;
+ u8 csv2_val;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+ else
+ pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_CSV2_SHIFT);
+ return csv2_val == 3;
+}
+
+static inline bool supports_clearbhb(int scope)
+{
+ u64 isar2;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+ else
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ return cpuid_feature_extract_unsigned_field(isar2,
+ ID_AA64ISAR2_CLEARBHB_SHIFT);
+}
+
const struct cpumask *system_32bit_el0_cpumask(void);
DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
@@ -854,6 +886,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
extern struct arm64_ftr_override id_aa64mmfr1_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64isar1_override;
+extern struct arm64_ftr_override id_aa64isar2_override;
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 19b8441aa8f2..232b439cbaf3 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -73,8 +73,14 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_NEOVERSE_V1 0xD40
+#define ARM_CPU_PART_CORTEX_A78 0xD41
+#define ARM_CPU_PART_CORTEX_X1 0xD44
+#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
+#define ARM_CPU_PART_CORTEX_A78C 0xD4B
#define APM_CPU_PART_POTENZA 0x000
@@ -82,6 +88,13 @@
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
+/* OcteonTx2 series */
+#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1
+#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2
+#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3
+#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4
+#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5
+#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6
#define BRCM_CPU_PART_BRAHMA_B53 0x100
#define BRCM_CPU_PART_VULCAN 0x516
@@ -115,11 +128,23 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
+#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
+#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
+#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX)
+#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX)
+#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX)
+#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN)
+#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM)
+#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 657c921fd784..00c291067e57 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -34,18 +34,6 @@
*/
#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
-/*
- * BRK instruction encoding
- * The #imm16 value should be placed at bits[20:5] within BRK ins
- */
-#define AARCH64_BREAK_MON 0xd4200000
-
-/*
- * BRK instruction for provoking a fault on purpose
- * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
- */
-#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
-
#define AARCH64_BREAK_KGDB_DYN_DBG \
(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 3198acb2aad8..7f3c87f7a0ce 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -106,7 +106,7 @@
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
- tbz x0, #0, 1f // and check that it sticks
+ tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
.Lskip_gicv3_\@:
.endm
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 4335800201c9..daff882883f9 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,9 +62,11 @@ enum fixed_addresses {
#endif /* CONFIG_ACPI_APEI_GHES */
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ FIX_ENTRY_TRAMP_TEXT3,
+ FIX_ENTRY_TRAMP_TEXT2,
+ FIX_ENTRY_TRAMP_TEXT1,
FIX_ENTRY_TRAMP_DATA,
- FIX_ENTRY_TRAMP_TEXT,
-#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index f68fbb207473..8db5ec0089db 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -108,6 +108,7 @@
#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV)
#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
+#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
index 2c075f615c6a..1a7d0d483698 100644
--- a/arch/arm64/include/asm/insn-def.h
+++ b/arch/arm64/include/asm/insn-def.h
@@ -3,7 +3,21 @@
#ifndef __ASM_INSN_DEF_H
#define __ASM_INSN_DEF_H
+#include <asm/brk-imm.h>
+
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON 0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 6b776c8667b2..1e5760d567ae 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -65,6 +65,7 @@ enum aarch64_insn_hint_cr_op {
AARCH64_INSN_HINT_PSB = 0x11 << 5,
AARCH64_INSN_HINT_TSB = 0x12 << 5,
AARCH64_INSN_HINT_CSDB = 0x14 << 5,
+ AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5,
AARCH64_INSN_HINT_BTI = 0x20 << 5,
AARCH64_INSN_HINT_BTIC = 0x22 << 5,
@@ -205,7 +206,9 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
AARCH64_INSN_LDST_LOAD_EX,
+ AARCH64_INSN_LDST_LOAD_ACQ_EX,
AARCH64_INSN_LDST_STORE_EX,
+ AARCH64_INSN_LDST_STORE_REL_EX,
};
enum aarch64_insn_adsb_type {
@@ -280,6 +283,36 @@ enum aarch64_insn_adr_type {
AARCH64_INSN_ADR_TYPE_ADR,
};
+enum aarch64_insn_mem_atomic_op {
+ AARCH64_INSN_MEM_ATOMIC_ADD,
+ AARCH64_INSN_MEM_ATOMIC_CLR,
+ AARCH64_INSN_MEM_ATOMIC_EOR,
+ AARCH64_INSN_MEM_ATOMIC_SET,
+ AARCH64_INSN_MEM_ATOMIC_SWP,
+};
+
+enum aarch64_insn_mem_order_type {
+ AARCH64_INSN_MEM_ORDER_NONE,
+ AARCH64_INSN_MEM_ORDER_ACQ,
+ AARCH64_INSN_MEM_ORDER_REL,
+ AARCH64_INSN_MEM_ORDER_ACQREL,
+};
+
+enum aarch64_insn_mb_type {
+ AARCH64_INSN_MB_SY,
+ AARCH64_INSN_MB_ST,
+ AARCH64_INSN_MB_LD,
+ AARCH64_INSN_MB_ISH,
+ AARCH64_INSN_MB_ISHST,
+ AARCH64_INSN_MB_ISHLD,
+ AARCH64_INSN_MB_NSH,
+ AARCH64_INSN_MB_NSHST,
+ AARCH64_INSN_MB_NSHLD,
+ AARCH64_INSN_MB_OSH,
+ AARCH64_INSN_MB_OSHST,
+ AARCH64_INSN_MB_OSHLD,
+};
+
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ \
@@ -303,6 +336,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
+__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
+__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
+__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
+__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
+__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
@@ -474,13 +512,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
- enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size);
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
@@ -541,6 +572,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy);
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order);
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order);
+#else
+static inline
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+
+static inline
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+#endif
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 01d47c5886dc..1767ded83888 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -355,8 +355,8 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
-#define CPACR_EL1_FPEN (3 << 20)
#define CPACR_EL1_TTA (1 << 28)
-#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
+#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
+ CPACR_EL1_ZEN_EL1EN)
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5bc01e62c08a..031e3a2537fc 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -714,6 +714,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
}
+static inline bool kvm_system_needs_idmapped_vectors(void)
+{
+ return cpus_have_const_cap(ARM64_SPECTRE_V3A);
+}
+
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 462882f356c7..aa7fa2a08f06 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index b77e9b3f5371..43f8c25b3fda 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -39,28 +39,4 @@
SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
bti c ;
-/*
- * Annotate a function as position independent, i.e., safe to be called before
- * the kernel virtual mapping is activated.
- */
-#define SYM_FUNC_START_PI(x) \
- SYM_FUNC_START_ALIAS(__pi_##x); \
- SYM_FUNC_START(x)
-
-#define SYM_FUNC_START_WEAK_PI(x) \
- SYM_FUNC_START_ALIAS(__pi_##x); \
- SYM_FUNC_START_WEAK(x)
-
-#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \
- SYM_FUNC_START_ALIAS(__pi_##x); \
- SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
-
-#define SYM_FUNC_END_PI(x) \
- SYM_FUNC_END(x); \
- SYM_FUNC_END_ALIAS(__pi_##x)
-
-#define SYM_FUNC_END_ALIAS_PI(x) \
- SYM_FUNC_END_ALIAS(x); \
- SYM_FUNC_END_ALIAS(__pi_##x)
-
#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 5d10051c3e62..29c85810ae69 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -17,12 +17,10 @@
#include <asm/cpucaps.h>
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
-static inline bool system_uses_lse_atomics(void)
+static __always_inline bool system_uses_lse_atomics(void)
{
- return (static_branch_likely(&arm64_const_caps_ready)) &&
- static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
+ return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
}
#define __lse_ll_sc_body(op, ...) \
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index a11ccadd47d2..094701ec5500 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,8 +1,8 @@
SECTIONS {
#ifdef CONFIG_ARM64_MODULE_PLTS
- .plt 0 (NOLOAD) : { BYTE(0) }
- .init.plt 0 (NOLOAD) : { BYTE(0) }
- .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
+ .plt 0 : { BYTE(0) }
+ .init.plt 0 : { BYTE(0) }
+ .text.ftrace_trampoline 0 : { BYTE(0) }
#endif
#ifdef CONFIG_KASAN_SW_TAGS
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index 626d359b396e..14ee86b019c2 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -11,6 +11,7 @@
#define MTE_TAG_SHIFT 56
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8)
#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index e4704a403237..a857bcacf0fe 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -5,6 +5,7 @@
#ifndef __ASM_MTE_KASAN_H
#define __ASM_MTE_KASAN_H
+#include <asm/compiler.h>
#include <asm/mte-def.h>
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 075539f5f1c8..adcb937342f1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -11,7 +11,9 @@
#ifndef __ASSEMBLY__
#include <linux/bitfield.h>
+#include <linux/kasan-enabled.h>
#include <linux/page-flags.h>
+#include <linux/sched.h>
#include <linux/types.h>
#include <asm/pgtable-types.h>
@@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
#endif /* CONFIG_ARM64_MTE */
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+ if (!system_supports_mte())
+ return;
+
+ /*
+ * Re-enable tag checking (TCO set on exception entry). This is only
+ * necessary if MTE is enabled in either the kernel or the userspace
+ * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
+ * for both). With MTE disabled in the kernel and disabled or
+ * asynchronous in userspace, tag check faults (including in uaccesses)
+ * are not reported, therefore there is no need to re-enable checking.
+ * This is beneficial on microarchitectures where re-enabling TCO is
+ * expensive.
+ */
+ if (kasan_hw_tags_enabled() ||
+ (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
+ asm volatile(SET_PSTATE_TCO(0));
+}
+
#ifdef CONFIG_KASAN_HW_TAGS
/* Whether the MTE asynchronous mode is enabled. */
DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
diff --git a/arch/arm64/include/asm/paravirt_api_clock.h b/arch/arm64/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/arm64/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 4ef6f19331f9..3eaf462f5752 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -15,70 +15,70 @@
/*
* Common architectural and microarchitectural event numbers.
*/
-#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
-#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33
-#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34
-#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39
-#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B
-#define ARMV8_PMUV3_PERFCTR_STALL 0x3C
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F
+#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011
+#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D
+#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B
+#define ARMV8_PMUV3_PERFCTR_STALL 0x003C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F
/* Statistical profiling extension microarchitectural events */
#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
@@ -96,6 +96,20 @@
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B
+/* Trace buffer events */
+#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C
+#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E
+
+/* Trace unit events */
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B
+
/* additional latency from alignment events */
#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020
#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021
@@ -107,91 +121,91 @@
#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026
/* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8
/*
* Per-CPU PMCR: config reg
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 40085e53f573..66671ff05183 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -273,6 +273,8 @@
#define TCR_NFD1 (UL(1) << 54)
#define TCR_E0PD0 (UL(1) << 55)
#define TCR_E0PD1 (UL(1) << 56)
+#define TCR_TCMA0 (UL(1) << 57)
+#define TCR_TCMA1 (UL(1) << 58)
/*
* TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 7032f04c8ac6..b1e1b74d993c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -92,7 +92,7 @@ extern bool arm64_use_ng_mappings;
#define __P001 PAGE_READONLY
#define __P010 PAGE_READONLY
#define __P011 PAGE_READONLY
-#define __P100 PAGE_EXECONLY
+#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_READONLY_EXEC
#define __P111 PAGE_READONLY_EXEC
@@ -101,7 +101,7 @@ extern bool arm64_use_ng_mappings;
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
-#define __S100 PAGE_EXECONLY
+#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c4ba047a82d2..94e147e5456c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1017,17 +1017,6 @@ static inline bool arch_wants_old_prefaulted_pte(void)
}
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
- if (cpus_have_const_cap(ARM64_HAS_EPAN))
- return prot;
-
- if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
- return prot;
-
- return PAGE_READONLY_EXEC;
-}
-
static inline bool pud_sect_supported(void)
{
return PAGE_SIZE == SZ_4K;
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index e83f0982b99c..0159b625cc7f 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -2,6 +2,7 @@
#ifndef __ASM_PREEMPT_H
#define __ASM_PREEMPT_H
+#include <linux/jump_label.h>
#include <linux/thread_info.h>
#define PREEMPT_NEED_RESCHED BIT(32)
@@ -80,10 +81,24 @@ static inline bool should_resched(int preempt_offset)
}
#ifdef CONFIG_PREEMPTION
+
void preempt_schedule(void);
-#define __preempt_schedule() preempt_schedule()
void preempt_schedule_notrace(void);
-#define __preempt_schedule_notrace() preempt_schedule_notrace()
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+void dynamic_preempt_schedule(void);
+#define __preempt_schedule() dynamic_preempt_schedule()
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
+
+#else /* CONFIG_PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() preempt_schedule()
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+
+#endif /* CONFIG_PREEMPT_DYNAMIC */
#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6f41b65f9962..73e38d9a540c 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -21,6 +21,7 @@
#define MTE_CTRL_TCF_SYNC (1UL << 16)
#define MTE_CTRL_TCF_ASYNC (1UL << 17)
+#define MTE_CTRL_TCF_ASYMM (1UL << 18)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
index 1bce62fa908a..56f7b1d4d54b 100644
--- a/arch/arm64/include/asm/rwonce.h
+++ b/arch/arm64/include/asm/rwonce.h
@@ -5,7 +5,7 @@
#ifndef __ASM_RWONCE_H
#define __ASM_RWONCE_H
-#ifdef CONFIG_LTO
+#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__)
#include <linux/compiler_types.h>
#include <asm/alternative-macros.h>
@@ -66,7 +66,7 @@
})
#endif /* !BUILD_VDSO */
-#endif /* CONFIG_LTO */
+#endif /* CONFIG_LTO && !__ASSEMBLY__ */
#include <asm-generic/rwonce.h>
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 152cb35bf9df..40971ac1303f 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -23,4 +23,9 @@ extern char __mmuoff_data_start[], __mmuoff_data_end[];
extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+static inline size_t entry_tramp_text_size(void)
+{
+ return __entry_tramp_text_end - __entry_tramp_text_start;
+}
+
#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index f62ca39da6c5..aa3d3607d5c8 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -67,7 +67,8 @@ struct bp_hardening_data {
DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-static inline void arm64_apply_bp_hardening(void)
+/* Called during entry so must be __always_inline */
+static __always_inline void arm64_apply_bp_hardening(void)
{
struct bp_hardening_data *d;
@@ -93,5 +94,9 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
enum mitigation_state arm64_get_meltdown_state(void);
+enum mitigation_state arm64_get_spectre_bhb_state(void);
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+u8 spectre_bhb_loop_affected(int scope);
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 95f7686b728d..3a3264ff47b9 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c);
-#ifndef CONFIG_KASAN_HW_TAGS
#define __HAVE_ARCH_STRCMP
extern int strcmp(const char *, const char *);
#define __HAVE_ARCH_STRNCMP
extern int strncmp(const char *, const char *, __kernel_size_t);
-#endif
#define __HAVE_ARCH_STRLEN
extern __kernel_size_t strlen(const char *);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004ae..c7ca3a105528 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -773,6 +773,9 @@
#define ID_AA64ISAR1_GPI_IMP_DEF 0x1
/* id_aa64isar2 */
+#define ID_AA64ISAR2_CLEARBHB_SHIFT 28
+#define ID_AA64ISAR2_APA3_SHIFT 12
+#define ID_AA64ISAR2_GPA3_SHIFT 8
#define ID_AA64ISAR2_RPRES_SHIFT 4
#define ID_AA64ISAR2_WFXT_SHIFT 0
@@ -786,6 +789,16 @@
#define ID_AA64ISAR2_WFXT_NI 0x0
#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2
+#define ID_AA64ISAR2_APA3_NI 0x0
+#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1
+#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5
+
+#define ID_AA64ISAR2_GPA3_NI 0x0
+#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1
+
/* id_aa64pfr0 */
#define ID_AA64PFR0_CSV3_SHIFT 60
#define ID_AA64PFR0_CSV2_SHIFT 56
@@ -904,6 +917,7 @@
#endif
/* id_aa64mmfr1 */
+#define ID_AA64MMFR1_ECBHB_SHIFT 60
#define ID_AA64MMFR1_AFP_SHIFT 44
#define ID_AA64MMFR1_ETS_SHIFT 36
#define ID_AA64MMFR1_TWED_SHIFT 32
@@ -1097,13 +1111,11 @@
#define ZCR_ELx_LEN_SIZE 9
#define ZCR_ELx_LEN_MASK 0x1ff
+#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
+#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
+
#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */
#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
-#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-
-/* TCR EL1 Bit Definitions */
-#define SYS_TCR_EL1_TCMA1 (BIT(58))
-#define SYS_TCR_EL1_TCMA0 (BIT(57))
/* GCR_EL1 Definitions */
#define SYS_GCR_EL1_RRND (BIT(16))
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index f386b90a79c8..9fab663dd2de 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -24,6 +24,10 @@ void update_freq_counters_refs(void);
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+#ifdef CONFIG_ACPI_CPPC_LIB
+#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
+#endif
+
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
new file mode 100644
index 000000000000..bc9a2145f419
--- /dev/null
+++ b/arch/arm64/include/asm/vectors.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 ARM Ltd.
+ */
+#ifndef __ASM_VECTORS_H
+#define __ASM_VECTORS_H
+
+#include <linux/bug.h>
+#include <linux/percpu.h>
+
+#include <asm/fixmap.h>
+
+extern char vectors[];
+extern char tramp_vectors[];
+extern char __bp_harden_el1_vectors[];
+
+/*
+ * Note: the order of this enum corresponds to two arrays in entry.S:
+ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
+ * 'full fat' vectors are used directly.
+ */
+enum arm64_bp_harden_el1_vectors {
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ /*
+ * Perform the BHB loop mitigation, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_LOOP,
+
+ /*
+ * Make the SMC call for firmware mitigation, before branching to the
+ * canonical vectors.
+ */
+ EL1_VECTOR_BHB_FW,
+
+ /*
+ * Use the ClearBHB instruction, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_CLEAR_INSN,
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+ /*
+ * Remap the kernel before branching to the canonical vectors.
+ */
+ EL1_VECTOR_KPTI,
+};
+
+#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+#define EL1_VECTOR_BHB_LOOP -1
+#define EL1_VECTOR_BHB_FW -1
+#define EL1_VECTOR_BHB_CLEAR_INSN -1
+#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+/* The vectors to use on return from EL0. e.g. to remap the kernel */
+DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
+
+#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_VALIAS 0ul
+#endif
+
+static inline const char *
+arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
+{
+ if (arm64_kernel_unmapped_at_el0())
+ return (char *)(TRAMP_VALIAS + SZ_2K * slot);
+
+ WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
+
+ return __bp_harden_el1_vectors + SZ_2K * slot;
+}
+
+#endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index 947f6a4f1aa0..befcd8a7abc9 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -16,7 +16,8 @@
extern struct xor_block_template const xor_block_inner_neon;
static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
kernel_neon_begin();
xor_block_inner_neon.do_2(bytes, p1, p2);
@@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
kernel_neon_begin();
xor_block_inner_neon.do_3(bytes, p1, p2, p3);
@@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
kernel_neon_begin();
xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
@@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
kernel_neon_begin();
xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index f03731847d9d..99cb5d383048 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -78,5 +78,6 @@
#define HWCAP2_ECV (1 << 19)
#define HWCAP2_AFP (1 << 20)
#define HWCAP2_RPRES (1 << 21)
+#define HWCAP2_MTE3 (1 << 22)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..323e251ed37b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2
+
/* SVE registers */
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88b3e2a21408..986837d7ec82 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
+obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9e1c1aef9ebd..4c9b5b4b7a0b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -214,6 +214,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
};
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+const struct midr_range cavium_erratum_23154_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX),
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO),
+ {},
+};
+#endif
+
#ifdef CONFIG_CAVIUM_ERRATUM_27456
const struct midr_range cavium_erratum_27456_cpus[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
@@ -347,6 +362,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -371,6 +387,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -423,10 +440,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_23154
{
- /* Cavium ThunderX, pass 1.x */
- .desc = "Cavium erratum 23154",
+ .desc = "Cavium errata 23154 and 38545",
.capability = ARM64_WORKAROUND_CAVIUM_23154,
- ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus),
},
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -500,6 +517,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.matches = has_spectre_v4,
.cpu_enable = spectre_v4_enable_mitigation,
},
+ {
+ .desc = "Spectre-BHB",
+ .capability = ARM64_SPECTRE_BHB,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = is_spectre_bhb_affected,
+ .cpu_enable = spectre_bhb_enable_mitigation,
+ },
#ifdef CONFIG_ARM64_ERRATUM_1418040
{
.desc = "ARM erratum 1418040",
@@ -598,6 +622,40 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2077057
+ {
+ .desc = "ARM erratum 2077057",
+ .capability = ARM64_WORKAROUND_2077057,
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2064142
+ {
+ .desc = "ARM erratum 2064142",
+ .capability = ARM64_WORKAROUND_2064142,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2038923
+ {
+ .desc = "ARM erratum 2038923",
+ .capability = ARM64_WORKAROUND_2038923,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1902691
+ {
+ .desc = "ARM erratum 1902691",
+ .capability = ARM64_WORKAROUND_1902691,
+
+ /* Cortex-A510 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a46ab3b1c4d5..d72c4b4d389c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -73,6 +73,8 @@
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/kasan.h>
+#include <linux/percpu.h>
+
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
@@ -85,6 +87,7 @@
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
+#include <asm/vectors.h>
#include <asm/virt.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
@@ -110,6 +113,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
bool arm64_use_ng_mappings = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
+DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
+
/*
* Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
* support it?
@@ -226,6 +231,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -596,6 +606,7 @@ static const struct arm64_ftr_bits ftr_raz[] = {
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
static const struct __ftr_reg_entry {
u32 sys_id;
@@ -644,6 +655,8 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
&id_aa64isar1_override),
ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
+ &id_aa64isar2_override),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -1307,7 +1320,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+ int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+ entry->field_width,
+ entry->sign);
return val >= entry->min_field_value;
}
@@ -1590,6 +1605,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
int cpu = smp_processor_id();
+ if (__this_cpu_read(this_cpu_vector) == vectors) {
+ const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+
+ __this_cpu_write(this_cpu_vector, v);
+ }
+
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
@@ -1646,6 +1667,9 @@ static bool cpu_has_broken_dbm(void)
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2051678
+ MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+#endif
{},
};
@@ -1772,14 +1796,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
}
-static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
-{
- u64 val = read_sysreg_s(SYS_CLIDR_EL1);
-
- /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
- WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
-}
-
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@@ -1826,21 +1842,27 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
/* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
entry->field_pos, entry->sign);
- return sec_val == boot_val;
+ return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
}
static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
int scope)
{
- return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
- has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+ bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+ bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+ bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
+
+ return apa || apa3 || api;
}
static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
int __unused)
{
- return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
- __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+ bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+ bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5);
+ bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3);
+
+ return gpa || gpa3 || gpi;
}
#endif /* CONFIG_ARM64_PTR_AUTH */
@@ -1942,6 +1964,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
@@ -1952,6 +1975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR0_EL1,
.field_pos = ID_AA64MMFR0_ECV_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
@@ -1963,6 +1987,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
.cpu_enable = cpu_enable_pan,
@@ -1976,6 +2001,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 3,
},
@@ -1988,6 +2014,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 2,
},
@@ -2012,6 +2039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_EL0_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
},
#ifdef CONFIG_KVM
@@ -2023,6 +2051,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_EL1_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
},
{
@@ -2043,6 +2072,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
*/
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_CSV3_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
.matches = unmap_kernel_at_el0,
.cpu_enable = kpti_install_ng_mappings,
@@ -2062,6 +2092,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
},
{
@@ -2072,6 +2103,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .field_width = 4,
.min_field_value = 2,
},
#endif
@@ -2083,6 +2115,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_SVE_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR0_SVE,
.matches = has_cpuid_feature,
.cpu_enable = sve_kernel_enable,
@@ -2097,6 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_RAS_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR0_RAS_V1,
.cpu_enable = cpu_clear_disr,
},
@@ -2115,6 +2149,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_AMU_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR0_AMU,
.cpu_enable = cpu_amu_enable,
},
@@ -2139,9 +2174,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
.matches = has_cpuid_feature,
- .cpu_enable = cpu_has_fwb,
},
{
.desc = "ARMv8.4 Translation Table Level",
@@ -2150,6 +2185,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
.matches = has_cpuid_feature,
},
@@ -2160,6 +2196,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_TLB_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = ID_AA64ISAR0_TLB_RANGE,
},
@@ -2178,6 +2215,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+ .field_width = 4,
.min_field_value = 2,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
@@ -2190,6 +2228,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
},
{
@@ -2199,6 +2238,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_SSBS_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
},
@@ -2211,6 +2251,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_CNP_SHIFT,
+ .field_width = 4,
.min_field_value = 1,
.cpu_enable = cpu_enable_cnp,
},
@@ -2222,27 +2263,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.field_pos = ID_AA64ISAR1_SB_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PTR_AUTH
{
- .desc = "Address authentication (architected algorithm)",
- .capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
+ .desc = "Address authentication (architected QARMA5 algorithm)",
+ .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_APA_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
.matches = has_address_auth_cpucap,
},
{
+ .desc = "Address authentication (architected QARMA3 algorithm)",
+ .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .sys_reg = SYS_ID_AA64ISAR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR2_APA3_SHIFT,
+ .field_width = 4,
+ .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED,
+ .matches = has_address_auth_cpucap,
+ },
+ {
.desc = "Address authentication (IMP DEF algorithm)",
.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_API_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
.matches = has_address_auth_cpucap,
},
@@ -2252,22 +2307,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_address_auth_metacap,
},
{
- .desc = "Generic authentication (architected algorithm)",
- .capability = ARM64_HAS_GENERIC_AUTH_ARCH,
+ .desc = "Generic authentication (architected QARMA5 algorithm)",
+ .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_GPA_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
.matches = has_cpuid_feature,
},
{
+ .desc = "Generic authentication (architected QARMA3 algorithm)",
+ .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64ISAR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR2_GPA3_SHIFT,
+ .field_width = 4,
+ .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED,
+ .matches = has_cpuid_feature,
+ },
+ {
.desc = "Generic authentication (IMP DEF algorithm)",
.capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_GPI_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
.matches = has_cpuid_feature,
},
@@ -2288,6 +2356,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = can_use_gic_priorities,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
@@ -2299,6 +2368,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
+ .field_width = 4,
.field_pos = ID_AA64MMFR2_E0PD_SHIFT,
.matches = has_cpuid_feature,
.min_field_value = 1,
@@ -2313,6 +2383,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+ .field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
@@ -2330,6 +2401,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = bti_enable,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_BT_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR1_BT_BTI,
.sign = FTR_UNSIGNED,
},
@@ -2342,6 +2414,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR1_MTE,
.sign = FTR_UNSIGNED,
.cpu_enable = cpu_enable_mte,
@@ -2353,6 +2426,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .field_width = 4,
.min_field_value = ID_AA64PFR1_MTE_ASYMM,
.sign = FTR_UNSIGNED,
},
@@ -2364,16 +2438,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+ .field_width = 4,
.matches = has_cpuid_feature,
.min_field_value = 1,
},
{},
};
-#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \
+#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \
.matches = has_cpuid_feature, \
.sys_reg = reg, \
.field_pos = field, \
+ .field_width = width, \
.sign = s, \
.min_field_value = min_value,
@@ -2383,10 +2459,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.hwcap_type = cap_type, \
.hwcap = cap, \
-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \
+#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \
{ \
__HWCAP_CAP(#cap, cap_type, cap) \
- HWCAP_CPUID_MATCH(reg, field, s, min_value) \
+ HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \
}
#define HWCAP_MULTI_CAP(list, cap_type, cap) \
@@ -2406,11 +2482,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
+ 4, FTR_UNSIGNED,
+ ID_AA64ISAR1_APA_ARCHITECTED)
+ },
+ {
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED)
},
{
HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
},
{},
};
@@ -2418,77 +2499,82 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
{
HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+ },
+ {
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED)
},
{
HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
},
{},
};
#endif
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
- HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
#endif
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
#ifdef CONFIG_ARM64_MTE
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3),
#endif /* CONFIG_ARM64_MTE */
- HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
- HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+ HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+ HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
{},
};
@@ -2517,15 +2603,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
#ifdef CONFIG_COMPAT
HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
- HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+ HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
- HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
- HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
#endif
{},
};
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 03991eeff643..3006f4324808 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
struct acpi_lpi_state *lpi;
struct acpi_processor *pr = per_cpu(processors, cpu);
+ if (unlikely(!pr || !pr->flags.has_lpi))
+ return -EINVAL;
+
/*
* If the PSCI cpu_suspend function hook has not been initialized
* idle states must not be enabled, so bail out
@@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
if (!psci_ops.cpu_suspend)
return -EOPNOTSUPP;
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
count = pr->power.count - 1;
if (count <= 0)
return -ENODEV;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 591c18a889a5..330b92ea863a 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -97,6 +97,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_ECV] = "ecv",
[KERNEL_HWCAP_AFP] = "afp",
[KERNEL_HWCAP_RPRES] = "rpres",
+ [KERNEL_HWCAP_MTE3] = "mte3",
};
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index 314391a156ee..2b65aae332ce 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(VA_BITS);
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+ vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
+ vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
+ vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
+ vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+ vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
+ vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
kimage_voffset);
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
new file mode 100644
index 000000000000..3ed39c61a510
--- /dev/null
+++ b/arch/arm64/kernel/elfcore.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/coredump.h>
+#include <linux/elfcore.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+
+#ifndef VMA_ITERATOR
+#define VMA_ITERATOR(name, mm, addr) \
+ struct mm_struct *name = mm
+#define for_each_vma(vmi, vma) \
+ for (vma = vmi->mmap; vma; vma = vma->vm_next)
+#endif
+
+#define for_each_mte_vma(vmi, vma) \
+ if (system_supports_mte()) \
+ for_each_vma(vmi, vma) \
+ if (vma->vm_flags & VM_MTE)
+
+static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_DONTDUMP)
+ return 0;
+
+ return vma_pages(vma) * MTE_PAGE_TAG_STORAGE;
+}
+
+/* Derived from dump_user_range(); start/end must be page-aligned */
+static int mte_dump_tag_range(struct coredump_params *cprm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ char tags[MTE_PAGE_TAG_STORAGE];
+ struct page *page = get_dump_page(addr);
+
+ /*
+ * get_dump_page() returns NULL when encountering an empty
+ * page table entry that would otherwise have been filled with
+ * the zero page. Skip the equivalent tag dump which would
+ * have been all zeros.
+ */
+ if (!page) {
+ dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+ continue;
+ }
+
+ /*
+ * Pages mapped in user space as !pte_access_permitted() (e.g.
+ * PROT_EXEC only) may not have the PG_mte_tagged flag set.
+ */
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ put_page(page);
+ dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+ continue;
+ }
+
+ mte_save_page_tags(page_address(page), tags);
+ put_page(page);
+ if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE))
+ return 0;
+ }
+
+ return 1;
+}
+
+Elf_Half elf_core_extra_phdrs(void)
+{
+ struct vm_area_struct *vma;
+ int vma_count = 0;
+ VMA_ITERATOR(vmi, current->mm, 0);
+
+ for_each_mte_vma(vmi, vma)
+ vma_count++;
+
+ return vma_count;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, current->mm, 0);
+
+ for_each_mte_vma(vmi, vma) {
+ struct elf_phdr phdr;
+
+ phdr.p_type = PT_ARM_MEMTAG_MTE;
+ phdr.p_offset = offset;
+ phdr.p_vaddr = vma->vm_start;
+ phdr.p_paddr = 0;
+ phdr.p_filesz = mte_vma_tag_dump_size(vma);
+ phdr.p_memsz = vma->vm_end - vma->vm_start;
+ offset += phdr.p_filesz;
+ phdr.p_flags = 0;
+ phdr.p_align = 0;
+
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+ return 0;
+ }
+
+ return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+ struct vm_area_struct *vma;
+ size_t data_size = 0;
+ VMA_ITERATOR(vmi, current->mm, 0);
+
+ for_each_mte_vma(vmi, vma)
+ data_size += mte_vma_tag_dump_size(vma);
+
+ return data_size;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, current->mm, 0);
+
+ for_each_mte_vma(vmi, vma) {
+ if (vma->vm_flags & VM_DONTDUMP)
+ continue;
+
+ if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end))
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ef7fcefb96bd..878c65aa7206 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,6 +6,7 @@
*/
#include <linux/context_tracking.h>
+#include <linux/kasan.h>
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
@@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
{
__enter_from_kernel_mode(regs);
mte_check_tfsr_entry();
+ mte_disable_tco_entry(current);
}
/*
@@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
trace_hardirqs_off_finish();
+ mte_disable_tco_entry(current);
}
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
@@ -220,9 +223,26 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
lockdep_hardirqs_on(CALLER_ADDR0);
}
+#ifdef CONFIG_PREEMPT_DYNAMIC
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
static void __sched arm64_preempt_schedule_irq(void)
{
- lockdep_assert_irqs_disabled();
+ if (!need_irq_preemption())
+ return;
+
+ /*
+ * Note: thread_info::preempt_count includes both thread_info::count
+ * and thread_info::need_resched, and is not equivalent to
+ * preempt_count().
+ */
+ if (READ_ONCE(current_thread_info()->preempt_count) != 0)
+ return;
/*
* DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
@@ -438,14 +458,7 @@ static __always_inline void __el1_irq(struct pt_regs *regs,
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- /*
- * Note: thread_info::preempt_count includes both thread_info::count
- * and thread_info::need_resched, and is not equivalent to
- * preempt_count().
- */
- if (IS_ENABLED(CONFIG_PREEMPTION) &&
- READ_ONCE(current_thread_info()->preempt_count) == 0)
- arm64_preempt_schedule_irq();
+ arm64_preempt_schedule_irq();
exit_to_kernel_mode(regs);
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 772ec2ecf488..ede028dee81b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -37,18 +37,21 @@
.macro kernel_ventry, el:req, ht:req, regsize:req, label:req
.align 7
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+.Lventry_start\@:
.if \el == 0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+ /*
+ * This must be the first instruction of the EL0 vector entries. It is
+ * skipped by the trampoline vectors, to trigger the cleanup.
+ */
+ b .Lskip_tramp_vectors_cleanup\@
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
-alternative_else_nop_endif
+.Lskip_tramp_vectors_cleanup\@:
.endif
-#endif
sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
@@ -95,11 +98,15 @@ alternative_else_nop_endif
mrs x0, tpidrro_el0
#endif
b el\el\ht\()_\regsize\()_\label
+.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
- .macro tramp_alias, dst, sym
+ .macro tramp_alias, dst, sym, tmp
mov_q \dst, TRAMP_VALIAS
- add \dst, \dst, #(\sym - .entry.tramp.text)
+ adr_l \tmp, \sym
+ add \dst, \dst, \tmp
+ adr_l \tmp, .entry.tramp.text
+ sub \dst, \dst, \tmp
.endm
/*
@@ -116,7 +123,7 @@ alternative_cb_end
tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
mov w1, #\state
-alternative_cb spectre_v4_patch_fw_mitigation_conduit
+alternative_cb smccc_patch_fw_mitigation_conduit
nop // Patched to SMC/HVC #0
alternative_cb_end
.L__asm_ssbd_skip\@:
@@ -300,6 +307,7 @@ alternative_else_nop_endif
str w21, [sp, #S_SYSCALLNO]
.endif
+#ifdef CONFIG_ARM64_PSEUDO_NMI
/* Save pmr */
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
@@ -307,12 +315,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
alternative_else_nop_endif
-
- /* Re-enable tag checking (TCO set on exception entry) */
-#ifdef CONFIG_ARM64_MTE
-alternative_if ARM64_MTE
- SET_PSTATE_TCO(0)
-alternative_else_nop_endif
#endif
/*
@@ -330,6 +332,7 @@ alternative_else_nop_endif
disable_daif
.endif
+#ifdef CONFIG_ARM64_PSEUDO_NMI
/* Restore pmr */
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
@@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
dsb sy // Ensure priority change is seen by redistributor
.L__skip_pmr_sync\@:
alternative_else_nop_endif
+#endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
@@ -413,21 +417,26 @@ alternative_else_nop_endif
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
- ldr lr, [sp, #S_LR]
- add sp, sp, #PT_REGS_SIZE // restore sp
.if \el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #PT_REGS_SIZE // restore sp
+ eret
+alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
bne 4f
- msr far_el1, x30
- tramp_alias x30, tramp_exit_native
+ msr far_el1, x29
+ tramp_alias x30, tramp_exit_native, x29
br x30
4:
- tramp_alias x30, tramp_exit_compat
+ tramp_alias x30, tramp_exit_compat, x29
br x30
#endif
.else
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #PT_REGS_SIZE // restore sp
+
/* Ensure any device/NC reads complete */
alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
@@ -594,12 +603,6 @@ SYM_CODE_END(ret_to_user)
.popsection // .entry.text
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
- .pushsection ".entry.tramp.text", "ax"
-
// Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
@@ -633,12 +636,47 @@ alternative_else_nop_endif
*/
.endm
- .macro tramp_ventry, regsize = 64
+ .macro tramp_data_page dst
+ adr_l \dst, .entry.tramp.text
+ sub \dst, \dst, PAGE_SIZE
+ .endm
+
+ .macro tramp_data_read_var dst, var
+#ifdef CONFIG_RANDOMIZE_BASE
+ tramp_data_page \dst
+ add \dst, \dst, #:lo12:__entry_tramp_data_\var
+ ldr \dst, [\dst]
+#else
+ ldr \dst, =\var
+#endif
+ .endm
+
+#define BHB_MITIGATION_NONE 0
+#define BHB_MITIGATION_LOOP 1
+#define BHB_MITIGATION_FW 2
+#define BHB_MITIGATION_INSN 3
+
+ .macro tramp_ventry, vector_start, regsize, kpti, bhb
.align 7
1:
.if \regsize == 64
msr tpidrro_el0, x30 // Restored in kernel_ventry
.endif
+
+ .if \bhb == BHB_MITIGATION_LOOP
+ /*
+ * This sequence must appear before the first indirect branch. i.e. the
+ * ret out of tramp_ventry. It appears here because x30 is free.
+ */
+ __mitigate_spectre_bhb_loop x30
+ .endif // \bhb == BHB_MITIGATION_LOOP
+
+ .if \bhb == BHB_MITIGATION_INSN
+ clearbhb
+ isb
+ .endif // \bhb == BHB_MITIGATION_INSN
+
+ .if \kpti == 1
/*
* Defend against branch aliasing attacks by pushing a dummy
* entry onto the return stack and using a RET instruction to
@@ -648,46 +686,75 @@ alternative_else_nop_endif
b .
2:
tramp_map_kernel x30
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x30, tramp_vectors + PAGE_SIZE
alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
- ldr x30, [x30]
-#else
- ldr x30, =vectors
-#endif
+ tramp_data_read_var x30, vectors
alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
- prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ prfm plil1strm, [x30, #(1b - \vector_start)]
alternative_else_nop_endif
+
msr vbar_el1, x30
- add x30, x30, #(1b - tramp_vectors)
isb
+ .else
+ ldr x30, =vectors
+ .endif // \kpti == 1
+
+ .if \bhb == BHB_MITIGATION_FW
+ /*
+ * The firmware sequence must appear before the first indirect branch.
+ * i.e. the ret out of tramp_ventry. But it also needs the stack to be
+ * mapped to save/restore the registers the SMC clobbers.
+ */
+ __mitigate_spectre_bhb_fw
+ .endif // \bhb == BHB_MITIGATION_FW
+
+ add x30, x30, #(1b - \vector_start + 4)
ret
+.org 1b + 128 // Did we overflow the ventry slot?
.endm
.macro tramp_exit, regsize = 64
- adr x30, tramp_vectors
+ tramp_data_read_var x30, this_cpu_vector
+ get_this_cpu_offset x29
+ ldr x30, [x30, x29]
+
msr vbar_el1, x30
- tramp_unmap_kernel x30
+ ldr lr, [sp, #S_LR]
+ tramp_unmap_kernel x29
.if \regsize == 64
- mrs x30, far_el1
+ mrs x29, far_el1
.endif
+ add sp, sp, #PT_REGS_SIZE // restore sp
eret
sb
.endm
- .align 11
-SYM_CODE_START_NOALIGN(tramp_vectors)
+ .macro generate_tramp_vector, kpti, bhb
+.Lvector_start\@:
.space 0x400
- tramp_ventry
- tramp_ventry
- tramp_ventry
- tramp_ventry
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, \kpti, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, \kpti, \bhb
+ .endr
+ .endm
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ * The order must match __bp_harden_el1_vectors and the
+ * arm64_bp_harden_el1_vectors enum.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+ .align 11
+SYM_CODE_START_NOALIGN(tramp_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE
SYM_CODE_END(tramp_vectors)
SYM_CODE_START(tramp_exit_native)
@@ -704,13 +771,57 @@ SYM_CODE_END(tramp_exit_compat)
.pushsection ".rodata", "a"
.align PAGE_SHIFT
SYM_DATA_START(__entry_tramp_data_start)
+__entry_tramp_data_vectors:
.quad vectors
+#ifdef CONFIG_ARM_SDE_INTERFACE
+__entry_tramp_data___sdei_asm_handler:
+ .quad __sdei_asm_handler
+#endif /* CONFIG_ARM_SDE_INTERFACE */
+__entry_tramp_data_this_cpu_vector:
+ .quad this_cpu_vector
SYM_DATA_END(__entry_tramp_data_start)
.popsection // .rodata
#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
+ * Exception vectors for spectre mitigations on entry from EL1 when
+ * kpti is not in use.
+ */
+ .macro generate_el1_vector, bhb
+.Lvector_start\@:
+ kernel_ventry 1, t, 64, sync // Synchronous EL1t
+ kernel_ventry 1, t, 64, irq // IRQ EL1t
+ kernel_ventry 1, t, 64, fiq // FIQ EL1h
+ kernel_ventry 1, t, 64, error // Error EL1t
+
+ kernel_ventry 1, h, 64, sync // Synchronous EL1h
+ kernel_ventry 1, h, 64, irq // IRQ EL1h
+ kernel_ventry 1, h, 64, fiq // FIQ EL1h
+ kernel_ventry 1, h, 64, error // Error EL1h
+
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, 0, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, 0, \bhb
+ .endr
+ .endm
+
+/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
+ .pushsection ".entry.text", "ax"
+ .align 11
+SYM_CODE_START(__bp_harden_el1_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_el1_vector bhb=BHB_MITIGATION_LOOP
+ generate_el1_vector bhb=BHB_MITIGATION_FW
+ generate_el1_vector bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+SYM_CODE_END(__bp_harden_el1_vectors)
+ .popsection
+
+
+/*
* Register switch for AArch64. The callee-saved registers need to be saved
* and restored. On entry:
* x0 = previous task_struct (must be preserved across the switch)
@@ -835,14 +946,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
* Remember whether to unmap the kernel on exit.
*/
1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x4, tramp_vectors + PAGE_SIZE
- add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
- ldr x4, [x4]
-#else
- ldr x4, =__sdei_asm_handler
-#endif
+ tramp_data_read_var x4, __sdei_asm_handler
br x4
SYM_CODE_END(__sdei_asm_entry_trampoline)
NOKPROBE(__sdei_asm_entry_trampoline)
@@ -865,13 +969,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline)
NOKPROBE(__sdei_asm_exit_trampoline)
.ltorg
.popsection // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-SYM_DATA_START(__sdei_asm_trampoline_next_handler)
- .quad __sdei_asm_handler
-SYM_DATA_END(__sdei_asm_trampoline_next_handler)
-.popsection // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
@@ -981,7 +1078,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
+ tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
br x5
#endif
SYM_CODE_END(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index d8e606fe3c21..8a2ceb591686 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -17,7 +17,7 @@
#define FTR_DESC_NAME_LEN 20
#define FTR_DESC_FIELD_LEN 10
#define FTR_ALIAS_NAME_LEN 30
-#define FTR_ALIAS_OPTION_LEN 80
+#define FTR_ALIAS_OPTION_LEN 116
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
@@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = {
},
};
+static const struct ftr_set_desc isar2 __initconst = {
+ .name = "id_aa64isar2",
+ .override = &id_aa64isar2_override,
+ .fields = {
+ { "gpa3", ID_AA64ISAR2_GPA3_SHIFT },
+ { "apa3", ID_AA64ISAR2_APA3_SHIFT },
+ {}
+ },
+};
+
extern struct arm64_ftr_override kaslr_feature_override;
static const struct ftr_set_desc kaslr __initconst = {
@@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = {
&mmfr1,
&pfr1,
&isar1,
+ &isar2,
&kaslr,
};
@@ -100,7 +111,8 @@ static const struct {
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
- "id_aa64isar1.api=0 id_aa64isar1.apa=0" },
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+ "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "kaslr.disabled=1" },
};
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 7eaf1f7c4168..55a1ced8eb77 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -66,6 +66,10 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
+KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
+KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f418ebc65f95..78b3e0f8e997 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void)
}
#endif
+/*
+ * This is where we actually resolve the system and process MTE mode
+ * configuration into an actual value in SCTLR_EL1 that affects
+ * userspace.
+ */
static void mte_update_sctlr_user(struct task_struct *task)
{
/*
@@ -199,9 +204,20 @@ static void mte_update_sctlr_user(struct task_struct *task)
unsigned long pref, resolved_mte_tcf;
pref = __this_cpu_read(mte_tcf_preferred);
+ /*
+ * If there is no overlap between the system preferred and
+ * program requested values go with what was requested.
+ */
resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
sctlr &= ~SCTLR_EL1_TCF0_MASK;
- if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+ /*
+ * Pick an actual setting. The order in which we check for
+ * set bits and map into register values determines our
+ * default order.
+ */
+ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
+ sctlr |= SCTLR_EL1_TCF0_ASYMM;
+ else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
sctlr |= SCTLR_EL1_TCF0_ASYNC;
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
sctlr |= SCTLR_EL1_TCF0_SYNC;
@@ -253,6 +269,9 @@ void mte_thread_switch(struct task_struct *next)
mte_update_sctlr_user(next);
mte_update_gcr_excl(next);
+ /* TCO may not have been disabled on exception entry for the current task. */
+ mte_disable_tco_entry(next);
+
/*
* Check if an async tag exception occurred at EL1.
*
@@ -293,6 +312,17 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
if (arg & PR_MTE_TCF_SYNC)
mte_ctrl |= MTE_CTRL_TCF_SYNC;
+ /*
+ * If the system supports it and both sync and async modes are
+ * specified then implicitly enable asymmetric mode.
+ * Userspace could see a mix of both sync and async anyway due
+ * to differing or changing defaults on CPUs.
+ */
+ if (cpus_have_cap(ARM64_MTE_ASYMM) &&
+ (arg & PR_MTE_TCF_ASYNC) &&
+ (arg & PR_MTE_TCF_SYNC))
+ mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+
task->thread.mte_ctrl = mte_ctrl;
if (task == current) {
preempt_disable();
@@ -467,6 +497,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev,
return sysfs_emit(buf, "async\n");
case MTE_CTRL_TCF_SYNC:
return sysfs_emit(buf, "sync\n");
+ case MTE_CTRL_TCF_ASYMM:
+ return sysfs_emit(buf, "asymm\n");
default:
return sysfs_emit(buf, "???\n");
}
@@ -482,6 +514,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev,
tcf = MTE_CTRL_TCF_ASYNC;
else if (sysfs_streq(buf, "sync"))
tcf = MTE_CTRL_TCF_SYNC;
+ else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
+ tcf = MTE_CTRL_TCF_ASYMM;
else
return -EINVAL;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cab678ed6618..cb69ff1e6138 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+ ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP),
+ ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG),
+ ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0),
+ ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1),
+ ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2),
+ ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3),
+ ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4),
+ ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5),
+ ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6),
+ ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7),
ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 5369e649fa79..7fa97df55e3a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -635,7 +635,8 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
return -EINVAL;
if (system_supports_mte())
- valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
+ valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
+ | PR_MTE_TAG_MASK;
if (arg & ~valid_mask)
return -EINVAL;
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 902e4084c477..5777929d35bf 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -18,15 +18,18 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/bpf.h>
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched/task_stack.h>
+#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/spectre.h>
#include <asm/traps.h>
+#include <asm/vectors.h>
#include <asm/virt.h>
/*
@@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void)
return ret;
}
+static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
+{
+ switch (bhb_state) {
+ case SPECTRE_UNAFFECTED:
+ return "";
+ default:
+ case SPECTRE_VULNERABLE:
+ return ", but not BHB";
+ case SPECTRE_MITIGATED:
+ return ", BHB";
+ }
+}
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ return !sysctl_unprivileged_bpf_disabled;
+#else
+ return false;
+#endif
+}
+
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
char *buf)
{
+ enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
+ const char *bhb_str = get_bhb_affected_string(bhb_state);
+ const char *v2_str = "Branch predictor hardening";
+
switch (spectre_v2_state) {
case SPECTRE_UNAFFECTED:
- return sprintf(buf, "Not affected\n");
+ if (bhb_state == SPECTRE_UNAFFECTED)
+ return sprintf(buf, "Not affected\n");
+
+ /*
+ * Platforms affected by Spectre-BHB can't report
+ * "Not affected" for Spectre-v2.
+ */
+ v2_str = "CSV2";
+ fallthrough;
case SPECTRE_MITIGATED:
- return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+ if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+ return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
case SPECTRE_VULNERABLE:
fallthrough;
default:
@@ -193,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn)
__this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
}
-static void call_smc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_smc_arch_workaround_1(void)
{
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}
-static void call_hvc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_hvc_arch_workaround_1(void)
{
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}
-static void qcom_link_stack_sanitisation(void)
+/* Called during entry so must be noinstr */
+static noinstr void qcom_link_stack_sanitisation(void)
{
u64 tmp;
@@ -554,9 +597,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
* Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
* to call into firmware to adjust the mitigation state.
*/
-void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt,
- __le32 *origptr,
- __le32 *updptr, int nr_inst)
+void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
{
u32 insn;
@@ -770,3 +813,344 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
return -ENODEV;
}
}
+
+/*
+ * Spectre BHB.
+ *
+ * A CPU is either:
+ * - Mitigated by a branchy loop a CPU specific number of times, and listed
+ * in our "loop mitigated list".
+ * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the ClearBHB instruction to perform the mitigation.
+ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
+ * software mitigation in the vectors is needed.
+ * - Has CSV2.3, so is unaffected.
+ */
+static enum mitigation_state spectre_bhb_state;
+
+enum mitigation_state arm64_get_spectre_bhb_state(void)
+{
+ return spectre_bhb_state;
+}
+
+enum bhb_mitigation_bits {
+ BHB_LOOP,
+ BHB_FW,
+ BHB_HW,
+ BHB_INSN,
+};
+static unsigned long system_bhb_mitigations;
+
+/*
+ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
+ * SCOPE_SYSTEM call will give the right answer.
+ */
+u8 spectre_bhb_loop_affected(int scope)
+{
+ u8 k = 0;
+ static u8 max_bhb_k;
+
+ if (scope == SCOPE_LOCAL_CPU) {
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
+
+ max_bhb_k = max(max_bhb_k, k);
+ } else {
+ k = max_bhb_k;
+ }
+
+ return k;
+}
+
+static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+static bool is_spectre_bhb_fw_affected(int scope)
+{
+ static bool system_affected;
+ enum mitigation_state fw_state;
+ bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
+ static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+ {},
+ };
+ bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
+ spectre_bhb_firmware_mitigated_list);
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return system_affected;
+
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
+ system_affected = true;
+ return true;
+ }
+
+ return false;
+}
+
+static bool supports_ecbhb(int scope)
+{
+ u64 mmfr1;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ else
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_ECBHB_SHIFT);
+}
+
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (supports_csv2p3(scope))
+ return false;
+
+ if (supports_clearbhb(scope))
+ return true;
+
+ if (spectre_bhb_loop_affected(scope))
+ return true;
+
+ if (is_spectre_bhb_fw_affected(scope))
+ return true;
+
+ return false;
+}
+
+static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
+{
+ const char *v = arm64_get_bp_hardening_vector(slot);
+
+ if (slot < 0)
+ return;
+
+ __this_cpu_write(this_cpu_vector, v);
+
+ /*
+ * When KPTI is in use, the vectors are switched when exiting to
+ * user-space.
+ */
+ if (arm64_kernel_unmapped_at_el0())
+ return;
+
+ write_sysreg(v, vbar_el1);
+ isb();
+}
+
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
+{
+ bp_hardening_cb_t cpu_cb;
+ enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+ if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
+ return;
+
+ if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
+ /* No point mitigating Spectre-BHB alone. */
+ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
+ pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
+ } else if (cpu_mitigations_off()) {
+ pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_HW, &system_bhb_mitigations);
+ } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
+ /*
+ * Ensure KVM uses the indirect vector which will have ClearBHB
+ * added.
+ */
+ if (!data->slot)
+ data->slot = HYP_VECTOR_INDIRECT;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_INSN, &system_bhb_mitigations);
+ } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ /*
+ * Ensure KVM uses the indirect vector which will have the
+ * branchy-loop added. A57/A72-r0 will already have selected
+ * the spectre-indirect vector, which is sufficient for BHB
+ * too.
+ */
+ if (!data->slot)
+ data->slot = HYP_VECTOR_INDIRECT;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_LOOP, &system_bhb_mitigations);
+ } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (fw_state == SPECTRE_MITIGATED) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
+ }
+ }
+
+ update_mitigation_state(&spectre_bhb_state, state);
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1);
+
+ if (test_bit(BHB_LOOP, &system_bhb_mitigations))
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1);
+
+ if (test_bit(BHB_FW, &system_bhb_mitigations))
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to correct the immediate */
+void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u8 rd;
+ u32 insn;
+ u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
+
+ BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
+ return;
+
+ insn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+ insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to mov WA3 when supported */
+void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u8 rd;
+ u32 insn;
+
+ BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+ !test_bit(BHB_FW, &system_bhb_mitigations))
+ return;
+
+ insn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR,
+ AARCH64_INSN_VARIANT_32BIT,
+ AARCH64_INSN_REG_ZR, rd,
+ ARM_SMCCC_ARCH_WORKAROUND_3);
+ if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT))
+ return;
+
+ *updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to NOP when not supported */
+void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 2);
+
+ if (test_bit(BHB_INSN, &system_bhb_mitigations))
+ return;
+
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n"
+void unpriv_ebpf_notify(int new_state)
+{
+ if (spectre_v2_state == SPECTRE_VULNERABLE ||
+ spectre_bhb_state != SPECTRE_MITIGATED)
+ return;
+
+ if (!new_state)
+ pr_err("WARNING: %s", EBPF_WARN);
+}
+#endif
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f70573928f1b..3505789cf4bd 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -406,9 +406,6 @@ static int __init topology_init(void)
{
int i;
- for_each_online_node(i)
- register_one_node(i);
-
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
cpu->hotpluggable = cpu_can_disable(i);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index d8aaf4b6f432..50fe8eaf7df0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -11,7 +11,6 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/signal.h>
-#include <linux/personality.h>
#include <linux/freezer.h>
#include <linux/stddef.h>
#include <linux/uaccess.h>
@@ -577,10 +576,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
{
int err;
- err = sigframe_alloc(user, &user->fpsimd_offset,
- sizeof(struct fpsimd_context));
- if (err)
- return err;
+ if (system_supports_fpsimd()) {
+ err = sigframe_alloc(user, &user->fpsimd_offset,
+ sizeof(struct fpsimd_context));
+ if (err)
+ return err;
+ }
/* fault information, if valid */
if (add_all || current->thread.fault_code) {
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0fb58fed54cb..e4103e085681 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -33,8 +33,8 @@
*/
-static void start_backtrace(struct stackframe *frame, unsigned long fp,
- unsigned long pc)
+static notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
{
frame->fp = fp;
frame->pc = pc;
@@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp,
frame->prev_fp = 0;
frame->prev_type = STACK_TYPE_UNKNOWN;
}
+NOKPROBE_SYMBOL(start_backtrace);
/*
* Unwind from one frame record (A) to the next frame record (B).
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index db5159a3055f..12c6864e51e1 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -9,7 +9,6 @@
#include <linux/compat.h>
#include <linux/cpufeature.h>
-#include <linux/personality.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 70fc42470f13..bb878f52ca0a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -9,7 +9,6 @@
#include <linux/bug.h>
#include <linux/context_tracking.h>
#include <linux/signal.h>
-#include <linux/personality.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/spinlock.h>
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 60813497a381..172452f79e46 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+# -Wmissing-prototypes and -Wmissing-declarations are removed from
+# the CFLAGS of vgettimeofday.c to make possible to build the
+# kernel with CONFIG_WERROR enabled.
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO)
+ $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 50bab186c49b..edaf0faf766f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
<= SZ_4K, "Hibernate exit text too big or misaligned")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
"Entry trampoline text too big")
#endif
#ifdef CONFIG_KVM
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a4a0063df456..946e401ef6b0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
xfer_to_guest_mode_work_pending();
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Enter the guest
*/
trace_kvm_entry(*vcpu_pc(vcpu));
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
- ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ ret = kvm_arm_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_ctxsync_fp(vcpu);
/*
- * We may have taken a host interrupt in HYP mode (ie
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in SVC mode, with interrupts
- * disabled. Enabling the interrupts now will have
- * the effect of taking the interrupt again, in SVC
- * mode this time.
+ * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
+ * context synchronization event) is necessary to ensure that
+ * pending interrupts are taken.
*/
local_irq_enable();
+ isb();
+ local_irq_disable();
+
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest we
- * account that tick as being spent in the guest. We enable
- * preemption after calling guest_exit() so that if we get
- * preempted we make sure ticks after that is not counted as
- * guest time.
- */
- guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
@@ -1476,10 +1491,7 @@ static int kvm_init_vector_slots(void)
base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
- return 0;
-
- if (!has_vhe()) {
+ if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
__BP_HARDEN_HYP_VECS_SZ, &base);
if (err)
@@ -1855,6 +1867,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fd2dd26caf91..e3140abd2e2e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
+ if (ARM_SERROR_PENDING(exception_index)) {
+ /*
+ * The SError is handled by handle_exit_early(). If the guest
+ * survives it will re-execute the original instruction.
+ */
+ return 1;
+ }
+
exception_index = ARM_EXCEPTION_CODE(exception_index);
switch (exception_index) {
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0418399e0a20..c5d009715402 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
{
- write_sysreg_el1(val, SYS_SPSR);
+ if (has_vhe())
+ write_sysreg_el1(val, SYS_SPSR);
+ else
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b6b6801d96d5..7839d075729b 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2
/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
ARM_SMCCC_ARCH_WORKAROUND_2)
+ cbz w1, wa_epilogue
+
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_3)
cbnz w1, el1_trap
wa_epilogue:
@@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector)
sub sp, sp, #(8 * 4)
stp x2, x3, [sp, #(8 * 0)]
stp x0, x1, [sp, #(8 * 2)]
+ alternative_cb spectre_bhb_patch_wa3
+ /* Patched to mov WA3 when supported */
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ alternative_cb_end
smc #0
ldp x2, x3, [sp, #(8 * 0)]
add sp, sp, #(8 * 2)
@@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector)
spectrev2_smccc_wa1_smc
.else
stp x0, x1, [sp, #-16]!
+ mitigate_spectre_bhb_loop x0
+ mitigate_spectre_bhb_clear_insn
.endif
.if \indirect != 0
alternative_cb kvm_patch_vector_branch
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 58e14f8ead23..6379a1e3e6e5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
if (has_vhe()) {
- reg = CPACR_EL1_FPEN;
+ reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
if (sve_guest)
- reg |= CPACR_EL1_ZEN;
+ reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
sysreg_clear_set(cpacr_el1, 0, reg);
} else {
@@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
+static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Check for the conditions of Cortex-A510's #2077057. When these occur
+ * SPSR_EL2 can't be trusted, but isn't needed either as it is
+ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
+ * Are we single-stepping the guest, and took a PAC exception from the
+ * active-not-pending state?
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
+ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ *vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
+ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ synchronize_vcpu_pstate(vcpu, exit_code);
/*
* Check whether we want to repaint the state one way or
@@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
- if (ARM_SERROR_PENDING(*exit_code)) {
+ if (ARM_SERROR_PENDING(*exit_code) &&
+ ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index eea1f6a53723..5ad626527d41 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -192,6 +192,11 @@
ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
)
+#define PVM_ID_AA64ISAR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \
+ )
+
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 958734f4d6b0..0c367eb5f4e2 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,8 @@
#include <asm/assembler.h>
#include <asm/alternative.h>
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 526a7d6fa86f..cdbe8e246418 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -148,8 +148,10 @@ int hyp_map_vectors(void)
phys_addr_t phys;
void *bp_base;
- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+ if (!kvm_system_needs_idmapped_vectors()) {
+ __hyp_bp_vect_base = __bp_harden_hyp_vecs;
return 0;
+ }
phys = __hyp_pa(__bp_harden_hyp_vecs);
bp_base = (void *)__pkvm_create_private_mapping(phys,
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 792cf6e6ac92..33f5181af330 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val;
u64 id_aa64pfr1_el1_sys_val;
u64 id_aa64isar0_el1_sys_val;
u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64isar2_el1_sys_val;
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
@@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
return id_aa64isar1_el1_sys_val & allow_mask;
}
+static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
+{
+ u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+
+ if (!vcpu_has_ptrauth(vcpu))
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+
+ return id_aa64isar2_el1_sys_val & allow_mask;
+}
+
static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
{
u64 set_mask;
@@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
return get_pvm_id_aa64isar0(vcpu);
case SYS_ID_AA64ISAR1_EL1:
return get_pvm_id_aa64isar1(vcpu);
+ case SYS_ID_AA64ISAR2_EL1:
+ return get_pvm_id_aa64isar2(vcpu);
case SYS_ID_AA64MMFR0_EL1:
return get_pvm_id_aa64mmfr0(vcpu);
case SYS_ID_AA64MMFR1_EL1:
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 844a6f003fd5..2cb3867eb7c2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
- if (need_flush) {
- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
-
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
- }
+ if (need_flush && mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
- kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- pte_follow = kvm_pte_follow(pte, mm_ops);
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
+ if (mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 20db2f281cf2..4fb419f7b8b6 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
+ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 11d053fdd604..262dfe03134d 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -10,6 +10,7 @@
#include <linux/kvm_host.h>
#include <linux/types.h>
#include <linux/jump_label.h>
+#include <linux/percpu.h>
#include <uapi/linux/psci.h>
#include <kvm/arm_psci.h>
@@ -24,6 +25,8 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/vectors.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -38,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_ZEN;
+ val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -53,9 +56,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
if (update_fp_enabled(vcpu)) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
+ val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else {
- val &= ~CPACR_EL1_FPEN;
+ val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
__activate_traps_fpsimd32(vcpu);
}
@@ -67,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps);
static void __deactivate_traps(struct kvm_vcpu *vcpu)
{
- extern char vectors[]; /* kernel exception vectors */
+ const char *host_vectors = vectors;
___deactivate_traps(vcpu);
@@ -81,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
- write_sysreg(vectors, vbar_el1);
+
+ if (!arm64_kernel_unmapped_at_el0())
+ host_vectors = __this_cpu_read(this_cpu_vector);
+ write_sysreg(host_vectors, vbar_el1);
}
NOKPROBE_SYMBOL(__deactivate_traps);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 30da78f72b3b..202b8c455724 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
break;
}
break;
+ case ARM_SMCCC_ARCH_WORKAROUND_3:
+ switch (arm64_get_spectre_bhb_state()) {
+ case SPECTRE_VULNERABLE:
+ break;
+ case SPECTRE_MITIGATED:
+ val[0] = SMCCC_RET_SUCCESS;
+ break;
+ case SPECTRE_UNAFFECTED:
+ val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
+ break;
+ }
+ break;
case ARM_SMCCC_HV_PV_TIME_FEATURES:
val[0] = SMCCC_RET_SUCCESS;
break;
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 3eae32876897..5918095c90a5 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -46,8 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
- kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ kvm_vcpu_wfi(vcpu);
return PSCI_RET_SUCCESS;
}
@@ -406,7 +405,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
- return 3; /* PSCI version and two workaround registers */
+ return 4; /* PSCI version and three workaround registers */
}
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
@@ -420,6 +419,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
return -EFAULT;
+ if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
+ return -EFAULT;
+
return 0;
}
@@ -459,6 +461,17 @@ static int get_kernel_wa_level(u64 regid)
case SPECTRE_VULNERABLE:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
}
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ switch (arm64_get_spectre_bhb_state()) {
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ case SPECTRE_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
}
return -EINVAL;
@@ -475,6 +488,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
break;
default:
@@ -520,6 +534,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
}
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
return -EINVAL;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4dc2fba316ff..baa65292bbc2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
break;
+ case SYS_ID_AA64ISAR2_EL1:
+ if (!vcpu_has_ptrauth(vcpu))
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+ break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 7068da080799..49837d3a3ef5 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -248,6 +248,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ } else if (vgic_irq_is_mapped_level(irq)) {
+ val = vgic_get_phys_line_level(irq);
} else {
val = irq_is_pending(irq);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index a33d4366b326..b549af8b1dc2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
+static const struct midr_range broken_seis[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ {},
+};
+
+static bool vgic_v3_broken_seis(void)
+{
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
+ is_midr_in_range_list(read_cpuid_id(), broken_seis));
+}
+
/**
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
* @info: pointer to the GIC description
@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
- kvm_info("GICv3 with locally generated SEI\n");
+ if (vgic_v3_broken_seis()) {
+ kvm_info("GICv3 with broken locally generated SEI\n");
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 1fd5d790ab80..ebde40e7fa2b 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
* Parameters:
* x0 - dest
*/
-SYM_FUNC_START_PI(clear_page)
+SYM_FUNC_START(__pi_clear_page)
mrs x1, dczid_el0
tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
and w1, w1, #0xf
@@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page)
tst x0, #(PAGE_SIZE - 1)
b.ne 2b
ret
-SYM_FUNC_END_PI(clear_page)
+SYM_FUNC_END(__pi_clear_page)
+SYM_FUNC_ALIAS(clear_page, __pi_clear_page)
EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 29144f4cd449..c336d2ffdec5 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
* x0 - dest
* x1 - src
*/
-SYM_FUNC_START_PI(copy_page)
+SYM_FUNC_START(__pi_copy_page)
alternative_if ARM64_HAS_NO_HW_PREFETCH
// Prefetch three cache lines ahead.
prfm pldl1strm, [x1, #128]
@@ -75,5 +75,6 @@ alternative_else_nop_endif
stnp x16, x17, [x0, #112 - 256]
ret
-SYM_FUNC_END_PI(copy_page)
+SYM_FUNC_END(__pi_copy_page)
+SYM_FUNC_ALIAS(copy_page, __pi_copy_page)
EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 0f9e10ecda23..8340dccff46f 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -11,7 +11,44 @@
.arch armv8-a+crc
- .macro __crc32, c
+ .macro byteorder, reg, be
+ .if \be
+CPU_LE( rev \reg, \reg )
+ .else
+CPU_BE( rev \reg, \reg )
+ .endif
+ .endm
+
+ .macro byteorder16, reg, be
+ .if \be
+CPU_LE( rev16 \reg, \reg )
+ .else
+CPU_BE( rev16 \reg, \reg )
+ .endif
+ .endm
+
+ .macro bitorder, reg, be
+ .if \be
+ rbit \reg, \reg
+ .endif
+ .endm
+
+ .macro bitorder16, reg, be
+ .if \be
+ rbit \reg, \reg
+ lsr \reg, \reg, #16
+ .endif
+ .endm
+
+ .macro bitorder8, reg, be
+ .if \be
+ rbit \reg, \reg
+ lsr \reg, \reg, #24
+ .endif
+ .endm
+
+ .macro __crc32, c, be=0
+ bitorder w0, \be
cmp x2, #16
b.lt 8f // less than 16 bytes
@@ -24,10 +61,14 @@
add x8, x8, x1
add x1, x1, x7
ldp x5, x6, [x8]
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
-CPU_BE( rev x5, x5 )
-CPU_BE( rev x6, x6 )
+ byteorder x3, \be
+ byteorder x4, \be
+ byteorder x5, \be
+ byteorder x6, \be
+ bitorder x3, \be
+ bitorder x4, \be
+ bitorder x5, \be
+ bitorder x6, \be
tst x7, #8
crc32\c\()x w8, w0, x3
@@ -55,33 +96,43 @@ CPU_BE( rev x6, x6 )
32: ldp x3, x4, [x1], #32
sub x2, x2, #32
ldp x5, x6, [x1, #-16]
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
-CPU_BE( rev x5, x5 )
-CPU_BE( rev x6, x6 )
+ byteorder x3, \be
+ byteorder x4, \be
+ byteorder x5, \be
+ byteorder x6, \be
+ bitorder x3, \be
+ bitorder x4, \be
+ bitorder x5, \be
+ bitorder x6, \be
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
crc32\c\()x w0, w0, x5
crc32\c\()x w0, w0, x6
cbnz x2, 32b
-0: ret
+0: bitorder w0, \be
+ ret
8: tbz x2, #3, 4f
ldr x3, [x1], #8
-CPU_BE( rev x3, x3 )
+ byteorder x3, \be
+ bitorder x3, \be
crc32\c\()x w0, w0, x3
4: tbz x2, #2, 2f
ldr w3, [x1], #4
-CPU_BE( rev w3, w3 )
+ byteorder w3, \be
+ bitorder w3, \be
crc32\c\()w w0, w0, w3
2: tbz x2, #1, 1f
ldrh w3, [x1], #2
-CPU_BE( rev16 w3, w3 )
+ byteorder16 w3, \be
+ bitorder16 w3, \be
crc32\c\()h w0, w0, w3
1: tbz x2, #0, 0f
ldrb w3, [x1]
+ bitorder8 w3, \be
crc32\c\()b w0, w0, w3
-0: ret
+0: bitorder w0, \be
+ ret
.endm
.align 5
@@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32
alternative_else_nop_endif
__crc32 c
SYM_FUNC_END(__crc32c_le)
+
+ .align 5
+SYM_FUNC_START(crc32_be)
+alternative_if_not ARM64_HAS_CRC32
+ b crc32_be_base
+alternative_else_nop_endif
+ __crc32 be=1
+SYM_FUNC_END(crc32_be)
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index fccfe363e567..5e90887deec4 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
switch (type) {
case AARCH64_INSN_LDST_LOAD_EX:
+ case AARCH64_INSN_LDST_LOAD_ACQ_EX:
insn = aarch64_insn_get_load_ex_value();
+ if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX)
+ insn |= BIT(15);
break;
case AARCH64_INSN_LDST_STORE_EX:
+ case AARCH64_INSN_LDST_STORE_REL_EX:
insn = aarch64_insn_get_store_ex_value();
+ if (type == AARCH64_INSN_LDST_STORE_REL_EX)
+ insn |= BIT(15);
break;
default:
pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
@@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
state);
}
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
- enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type,
+ u32 insn)
{
- u32 insn = aarch64_insn_get_ldadd_value();
+ u32 order;
+
+ switch (type) {
+ case AARCH64_INSN_MEM_ORDER_NONE:
+ order = 0;
+ break;
+ case AARCH64_INSN_MEM_ORDER_ACQ:
+ order = 2;
+ break;
+ case AARCH64_INSN_MEM_ORDER_REL:
+ order = 1;
+ break;
+ case AARCH64_INSN_MEM_ORDER_ACQREL:
+ order = 3;
+ break;
+ default:
+ pr_err("%s: unknown mem order %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn &= ~GENMASK(23, 22);
+ insn |= order << 22;
+
+ return insn;
+}
+
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order)
+{
+ u32 insn;
+
+ switch (op) {
+ case AARCH64_INSN_MEM_ATOMIC_ADD:
+ insn = aarch64_insn_get_ldadd_value();
+ break;
+ case AARCH64_INSN_MEM_ATOMIC_CLR:
+ insn = aarch64_insn_get_ldclr_value();
+ break;
+ case AARCH64_INSN_MEM_ATOMIC_EOR:
+ insn = aarch64_insn_get_ldeor_value();
+ break;
+ case AARCH64_INSN_MEM_ATOMIC_SET:
+ insn = aarch64_insn_get_ldset_value();
+ break;
+ case AARCH64_INSN_MEM_ATOMIC_SWP:
+ insn = aarch64_insn_get_swp_value();
+ break;
+ default:
+ pr_err("%s: unimplemented mem atomic op %d\n", __func__, op);
+ return AARCH64_BREAK_FAULT;
+ }
switch (size) {
case AARCH64_INSN_SIZE_32:
@@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
insn = aarch64_insn_encode_ldst_size(size, insn);
+ insn = aarch64_insn_encode_ldst_order(order, insn);
+
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
result);
@@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
value);
}
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size)
+static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type,
+ u32 insn)
{
- /*
- * STADD is simply encoded as an alias for LDADD with XZR as
- * the destination register.
- */
- return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
- value, size);
+ u32 order;
+
+ switch (type) {
+ case AARCH64_INSN_MEM_ORDER_NONE:
+ order = 0;
+ break;
+ case AARCH64_INSN_MEM_ORDER_ACQ:
+ order = BIT(22);
+ break;
+ case AARCH64_INSN_MEM_ORDER_REL:
+ order = BIT(15);
+ break;
+ case AARCH64_INSN_MEM_ORDER_ACQREL:
+ order = BIT(15) | BIT(22);
+ break;
+ default:
+ pr_err("%s: unknown mem order %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn &= ~(BIT(15) | BIT(22));
+ insn |= order;
+
+ return insn;
+}
+
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order)
+{
+ u32 insn;
+
+ switch (size) {
+ case AARCH64_INSN_SIZE_32:
+ case AARCH64_INSN_SIZE_64:
+ break;
+ default:
+ pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_get_cas_value();
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_cas_order(order, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ result);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ address);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+ value);
}
+#endif
static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
@@ -1379,7 +1491,7 @@ static u32 aarch64_encode_immediate(u64 imm,
* Compute the rotation to get a continuous set of
* ones, with the first bit set at position 0
*/
- ror = fls(~imm);
+ ror = fls64(~imm);
}
/*
@@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
}
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+ u32 opt;
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_MB_SY:
+ opt = 0xf;
+ break;
+ case AARCH64_INSN_MB_ST:
+ opt = 0xe;
+ break;
+ case AARCH64_INSN_MB_LD:
+ opt = 0xd;
+ break;
+ case AARCH64_INSN_MB_ISH:
+ opt = 0xb;
+ break;
+ case AARCH64_INSN_MB_ISHST:
+ opt = 0xa;
+ break;
+ case AARCH64_INSN_MB_ISHLD:
+ opt = 0x9;
+ break;
+ case AARCH64_INSN_MB_NSH:
+ opt = 0x7;
+ break;
+ case AARCH64_INSN_MB_NSHST:
+ opt = 0x6;
+ break;
+ case AARCH64_INSN_MB_NSHLD:
+ opt = 0x5;
+ break;
+ default:
+ pr_err("%s: unknown dmb type %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_get_dmb_value();
+ insn &= ~GENMASK(11, 8);
+ insn |= (opt << 8);
+
+ return insn;
+}
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 7c2276fdab54..37a9f2a4f7f4 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -38,7 +38,7 @@
.p2align 4
nop
-SYM_FUNC_START_WEAK_PI(memchr)
+SYM_FUNC_START(__pi_memchr)
and chrin, chrin, #0xff
lsr wordcnt, cntin, #3
cbz wordcnt, L(byte_loop)
@@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp)
L(not_found):
mov result, #0
ret
-SYM_FUNC_END_PI(memchr)
+SYM_FUNC_END(__pi_memchr)
+SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr)
EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 7d956384222f..a5ccf2c55f91 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -32,7 +32,7 @@
#define tmp1 x7
#define tmp2 x8
-SYM_FUNC_START_WEAK_PI(memcmp)
+SYM_FUNC_START(__pi_memcmp)
subs limit, limit, 8
b.lo L(less8)
@@ -134,6 +134,6 @@ L(byte_loop):
b.eq L(byte_loop)
sub result, data1w, data2w
ret
-
-SYM_FUNC_END_PI(memcmp)
+SYM_FUNC_END(__pi_memcmp)
+SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index b82fd64ee1e1..4ab48d49c451 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,10 +57,7 @@
The loop tail is handled by always copying 64 bytes from the end.
*/
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK_PI(memcpy)
+SYM_FUNC_START(__pi_memcpy)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
@@ -241,12 +238,16 @@ L(copy64_from_start):
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
+SYM_FUNC_END(__pi_memcpy)
-SYM_FUNC_END_PI(memcpy)
-EXPORT_SYMBOL(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
+SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_END_ALIAS_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
+SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)
+
+SYM_FUNC_ALIAS(__memmove, __pi_memmove)
EXPORT_SYMBOL(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
+EXPORT_SYMBOL(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index a9c1c9a01ea9..a5aebe82ad73 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -42,8 +42,7 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
-SYM_FUNC_START_ALIAS(__memset)
-SYM_FUNC_START_WEAK_PI(memset)
+SYM_FUNC_START(__pi_memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
@@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
-SYM_FUNC_END_PI(memset)
-EXPORT_SYMBOL(memset)
-SYM_FUNC_END_ALIAS(__memset)
+SYM_FUNC_END(__pi_memset)
+
+SYM_FUNC_ALIAS(__memset, __pi_memset)
EXPORT_SYMBOL(__memset)
+
+SYM_FUNC_ALIAS_WEAK(memset, __pi_memset)
+EXPORT_SYMBOL(memset)
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index f531dcb95174..8590af3c98c0 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user)
/*
* Save the tags in a page
* x0 - page address
- * x1 - tag storage
+ * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
*/
SYM_FUNC_START(mte_save_page_tags)
multitag_transfer_size x7, x5
@@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags)
/*
* Restore the tags in a page
* x0 - page address
- * x1 - tag storage
+ * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
*/
SYM_FUNC_START(mte_restore_page_tags)
multitag_transfer_size x7, x5
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index 1f47eae3b0d6..94ee67a6b212 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -18,7 +18,7 @@
* Returns:
* x0 - address of first occurrence of 'c' or 0
*/
-SYM_FUNC_START_WEAK(strchr)
+SYM_FUNC_START(__pi_strchr)
and w1, w1, #0xff
1: ldrb w2, [x0], #1
cmp w2, w1
@@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr)
cmp w2, w1
csel x0, x0, xzr, eq
ret
-SYM_FUNC_END(strchr)
+SYM_FUNC_END(__pi_strchr)
+
+SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr)
EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 83bcad72ec97..9b89b4533607 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2012-2021, Arm Limited.
+ * Copyright (c) 2012-2022, Arm Limited.
*
* Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
*/
#include <linux/linkage.h>
@@ -11,166 +11,180 @@
/* Assumptions:
*
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
*/
#define L(label) .L ## label
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-/* Parameters and result. */
#define src1 x0
#define src2 x1
#define result x0
-/* Internal variables. */
#define data1 x2
#define data1w w2
#define data2 x3
#define data2w w3
#define has_nul x4
#define diff x5
+#define off1 x5
#define syndrome x6
-#define tmp1 x7
-#define tmp2 x8
-#define tmp3 x9
-#define zeroones x10
-#define pos x11
-
- /* Start of performance-critical section -- one 64B cache line. */
- .align 6
-SYM_FUNC_START_WEAK_PI(strcmp)
- eor tmp1, src1, src2
- mov zeroones, #REP8_01
- tst tmp1, #7
+#define tmp x6
+#define data3 x7
+#define zeroones x8
+#define shift x9
+#define off2 x10
+
+/* On big-endian early bytes are at MSB and on little-endian LSB.
+ LS_FW means shifting towards early bytes. */
+#ifdef __AARCH64EB__
+# define LS_FW lsl
+#else
+# define LS_FW lsr
+#endif
+
+/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word.
+ Since carry propagation makes 0x1 bytes before a NUL byte appear
+ NUL too in big-endian, byte-reverse the data before the NUL check. */
+
+
+SYM_FUNC_START(__pi_strcmp)
+ sub off2, src2, src1
+ mov zeroones, REP8_01
+ and tmp, src1, 7
+ tst off2, 7
b.ne L(misaligned8)
- ands tmp1, src1, #7
- b.ne L(mutual_align)
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
+ cbnz tmp, L(mutual_align)
+
+ .p2align 4
+
L(loop_aligned):
- ldr data1, [src1], #8
- ldr data2, [src2], #8
+ ldr data2, [src1, off2]
+ ldr data1, [src1], 8
L(start_realigned):
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+#ifdef __AARCH64EB__
+ rev tmp, data1
+ sub has_nul, tmp, zeroones
+ orr tmp, tmp, REP8_7f
+#else
+ sub has_nul, data1, zeroones
+ orr tmp, data1, REP8_7f
+#endif
+ bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
+ ccmp data1, data2, 0, eq
+ b.eq L(loop_aligned)
+#ifdef __AARCH64EB__
+ rev has_nul, has_nul
+#endif
+ eor diff, data1, data2
orr syndrome, diff, has_nul
- cbz syndrome, L(loop_aligned)
- /* End of performance-critical section -- one 64B cache line. */
-
L(end):
-#ifndef __AARCH64EB__
+#ifndef __AARCH64EB__
rev syndrome, syndrome
rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
- /* For big-endian we cannot use the trick with the syndrome value
- as carry-propagation can corrupt the upper bits if the trailing
- bytes in the string contain 0x01. */
- /* However, if there is no NUL byte in the dword, we can generate
- the result directly. We can't just subtract the bytes as the
- MSB might be significant. */
- cbnz has_nul, 1f
- cmp data1, data2
- cset result, ne
- cneg result, result, lo
- ret
-1:
- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
- rev tmp3, data1
- sub tmp1, tmp3, zeroones
- orr tmp2, tmp3, #REP8_7f
- bic has_nul, tmp1, tmp2
- rev has_nul, has_nul
- orr syndrome, diff, has_nul
- clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
+#endif
+ clz shift, syndrome
+ /* The most-significant-non-zero bit of the syndrome marks either the
+ first bit that is different, or the top bit of the first zero byte.
Shifting left now will bring the critical information into the
top bits. */
- lsl data1, data1, pos
- lsl data2, data2, pos
+ lsl data1, data1, shift
+ lsl data2, data2, shift
/* But we need to zero-extend (char is unsigned) the value and then
perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
+ lsr data1, data1, 56
+ sub result, data1, data2, lsr 56
ret
-#endif
+
+ .p2align 4
L(mutual_align):
/* Sources are mutually aligned, but are not currently at an
alignment boundary. Round down the addresses and then mask off
- the bytes that preceed the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr data1, data1, tmp2
- orr data2, data2, tmp2
+ the bytes that precede the start point. */
+ bic src1, src1, 7
+ ldr data2, [src1, off2]
+ ldr data1, [src1], 8
+ neg shift, src2, lsl 3 /* Bits to alignment -64. */
+ mov tmp, -1
+ LS_FW tmp, tmp, shift
+ orr data1, data1, tmp
+ orr data2, data2, tmp
b L(start_realigned)
L(misaligned8):
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
- checking to make sure that we don't access beyond page boundary in
- SRC2. */
- tst src1, #7
- b.eq L(loop_misaligned)
+ checking to make sure that we don't access beyond the end of SRC2. */
+ cbz tmp, L(src1_aligned)
L(do_misaligned):
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- cmp data1w, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ ldrb data1w, [src1], 1
+ ldrb data2w, [src2], 1
+ cmp data1w, 0
+ ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
b.ne L(done)
- tst src1, #7
+ tst src1, 7
b.ne L(do_misaligned)
-L(loop_misaligned):
- /* Test if we are within the last dword of the end of a 4K page. If
- yes then jump back to the misaligned loop to copy a byte at a time. */
- and tmp1, src2, #0xff8
- eor tmp1, tmp1, #0xff8
- cbz tmp1, L(do_misaligned)
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+L(src1_aligned):
+ neg shift, src2, lsl 3
+ bic src2, src2, 7
+ ldr data3, [src2], 8
+#ifdef __AARCH64EB__
+ rev data3, data3
+#endif
+ lsr tmp, zeroones, shift
+ orr data3, data3, tmp
+ sub has_nul, data3, zeroones
+ orr tmp, data3, REP8_7f
+ bics has_nul, has_nul, tmp
+ b.ne L(tail)
+
+ sub off1, src2, src1
+
+ .p2align 4
+
+L(loop_unaligned):
+ ldr data3, [src1, off1]
+ ldr data2, [src1, off2]
+#ifdef __AARCH64EB__
+ rev data3, data3
+#endif
+ sub has_nul, data3, zeroones
+ orr tmp, data3, REP8_7f
+ ldr data1, [src1], 8
+ bics has_nul, has_nul, tmp
+ ccmp data1, data2, 0, eq
+ b.eq L(loop_unaligned)
+
+ lsl tmp, has_nul, shift
+#ifdef __AARCH64EB__
+ rev tmp, tmp
+#endif
+ eor diff, data1, data2
+ orr syndrome, diff, tmp
+ cbnz syndrome, L(end)
+L(tail):
+ ldr data1, [src1]
+ neg shift, shift
+ lsr data2, data3, shift
+ lsr has_nul, has_nul, shift
+#ifdef __AARCH64EB__
+ rev data2, data2
+ rev has_nul, has_nul
+#endif
+ eor diff, data1, data2
orr syndrome, diff, has_nul
- cbz syndrome, L(loop_misaligned)
b L(end)
L(done):
sub result, data1, data2
ret
-
-SYM_FUNC_END_PI(strcmp)
-EXPORT_SYMBOL_NOHWKASAN(strcmp)
+SYM_FUNC_END(__pi_strcmp)
+SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
+EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 1648790e91b3..4919fe81ae54 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -79,7 +79,7 @@
whether the first fetch, which may be misaligned, crosses a page
boundary. */
-SYM_FUNC_START_WEAK_PI(strlen)
+SYM_FUNC_START(__pi_strlen)
and tmp1, srcin, MIN_PAGE_SIZE - 1
mov zeroones, REP8_01
cmp tmp1, MIN_PAGE_SIZE - 16
@@ -208,6 +208,6 @@ L(page_cross):
csel data1, data1, tmp4, eq
csel data2, data2, tmp2, eq
b L(page_cross_entry)
-
-SYM_FUNC_END_PI(strlen)
+SYM_FUNC_END(__pi_strlen)
+SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)
EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6..fe7bbc0b42a7 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2013-2021, Arm Limited.
+ * Copyright (c) 2013-2022, Arm Limited.
*
* Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S
*/
#include <linux/linkage.h>
@@ -11,14 +11,14 @@
/* Assumptions:
*
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
*/
#define L(label) .L ## label
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
/* Parameters and result. */
#define src1 x0
@@ -39,12 +39,26 @@
#define tmp3 x10
#define zeroones x11
#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
+#define mask x13
+#define endloop x14
#define count mask
+#define offset pos
+#define neg_offset x15
-SYM_FUNC_START_WEAK_PI(strncmp)
+/* Define endian dependent shift operations.
+ On big-endian early bytes are at MSB and on little-endian LSB.
+ LS_FW means shifting towards early bytes.
+ LS_BK means shifting towards later bytes.
+ */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
+
+SYM_FUNC_START(__pi_strncmp)
cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
and count, src1, #7
b.ne L(misaligned8)
cbnz count, L(mutual_align)
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -64,56 +75,52 @@ L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
L(start_realigned):
- subs limit_wd, limit_wd, #1
+ subs limit, limit, #8
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
+ csinv endloop, diff, xzr, hi /* Last Dword or differences. */
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
ccmp endloop, #0, #0, eq
b.eq L(loop_aligned)
/* End of main loop */
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, L(not_limit)
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq L(not_limit)
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- /* Make sure that the NUL byte is marked in the syndrome. */
- orr has_nul, has_nul, mask
-
-L(not_limit):
+L(full_check):
+#ifndef __AARCH64EB__
orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
+ add limit, limit, 8 /* Rewind limit to before last subs. */
+L(syndrome_check):
+ /* Limit was reached. Check if the NUL byte or the difference
+ is before the limit. */
rev syndrome, syndrome
rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
clz pos, syndrome
rev data2, data2
lsl data1, data1, pos
+ cmp limit, pos, lsr #3
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
perform a signed 32-bit subtraction. */
lsr data1, data1, #56
sub result, data1, data2, lsr #56
+ csel result, result, xzr, hi
ret
#else
+ /* Not reached the limit, must have found the end or a diff. */
+ tbz limit, #63, L(not_limit)
+ add tmp1, limit, 8
+ cbz limit, L(not_limit)
+
+ lsl limit, tmp1, #3 /* Bits -> bytes. */
+ mov mask, #~0
+ lsr mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ /* Make sure that the NUL byte is marked in the syndrome. */
+ orr has_nul, has_nul, mask
+
+L(not_limit):
/* For big-endian we cannot use the trick with the syndrome value
as carry-propagation can corrupt the upper bits if the trailing
bytes in the string contain 0x01. */
@@ -134,10 +141,11 @@ L(not_limit):
rev has_nul, has_nul
orr syndrome, diff, has_nul
clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
+ /* The most-significant-non-zero bit of the syndrome marks either the
+ first bit that is different, or the top bit of the first zero byte.
Shifting left now will bring the critical information into the
top bits. */
+L(end_quick):
lsl data1, data1, pos
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
@@ -159,22 +167,12 @@ L(mutual_align):
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, count
- add tmp3, tmp3, count
+ LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
+ /* Adjust the limit and ensure it doesn't overflow. */
+ adds limit, limit, count
+ csinv limit, limit, xzr, lo
orr data1, data1, tmp2
orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
b L(start_realigned)
.p2align 4
@@ -197,13 +195,11 @@ L(done):
/* Align the SRC1 to a dword by doing a bytewise compare and then do
the dword loop. */
L(try_misaligned_words):
- lsr limit_wd, limit, #3
- cbz count, L(do_misaligned)
+ cbz count, L(src1_aligned)
neg count, count
and count, count, #7
sub limit, limit, count
- lsr limit_wd, limit, #3
L(page_end_loop):
ldrb data1w, [src1], #1
@@ -214,48 +210,101 @@ L(page_end_loop):
subs count, count, #1
b.hi L(page_end_loop)
-L(do_misaligned):
- /* Prepare ourselves for the next page crossing. Unlike the aligned
- loop, we fetch 1 less dword because we risk crossing bounds on
- SRC2. */
- mov count, #8
- subs limit_wd, limit_wd, #1
- b.lo L(done_loop)
-L(loop_misaligned):
- and tmp2, src2, #0xff8
- eor tmp2, tmp2, #0xff8
- cbz tmp2, L(page_end_loop)
+ /* The following diagram explains the comparison of misaligned strings.
+ The bytes are shown in natural order. For little-endian, it is
+ reversed in the registers. The "x" bytes are before the string.
+ The "|" separates data that is loaded at one time.
+ src1 | a a a a a a a a | b b b c c c c c | . . .
+ src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
+
+ After shifting in each step, the data looks like this:
+ STEP_A STEP_B STEP_C
+ data1 a a a a a a a a b b b c c c c c b b b c c c c c
+ data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
+ The bytes with "0" are eliminated from the syndrome via mask.
+
+ Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+ time from SRC2. The comparison happens in 3 steps. After each step
+ the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+ /* Calculate offset from 8 byte alignment to string start in bits. No
+ need to mask offset since shifts are ignoring upper bits. */
+ lsl offset, src2, #3
+ bic src2, src2, #0xf
+ mov mask, -1
+ neg neg_offset, offset
ldr data1, [src1], #8
- ldr data2, [src2], #8
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
- subs limit_wd, limit_wd, #1
- b.pl L(loop_misaligned)
+ ldp tmp1, tmp2, [src2], #16
+ LS_BK mask, mask, neg_offset
+ and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
+ /* Skip the first compare if data in tmp1 is irrelevant. */
+ tbnz offset, 6, L(misaligned_mid_loop)
-L(done_loop):
- /* We found a difference or a NULL before the limit was reached. */
- and limit, limit, #7
- cbz limit, L(not_limit)
- /* Read the last word. */
- sub src1, src1, 8
- sub src2, src2, 8
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
+L(loop_misaligned):
+ /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+ LS_FW data2, tmp1, offset
+ LS_BK tmp1, tmp2, neg_offset
+ subs limit, limit, #8
+ orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
+ sub has_nul, data1, zeroones
eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
+ orr tmp3, data1, #REP8_7f
+ csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
+ orr tmp3, endloop, has_nul
+ cbnz tmp3, L(full_check)
+
+ ldr data1, [src1], #8
+L(misaligned_mid_loop):
+ /* STEP_B: Compare first part of data1 to second part of tmp2. */
+ LS_FW data2, tmp2, offset
+#ifdef __AARCH64EB__
+ /* For big-endian we do a byte reverse to avoid carry-propagation
+ problem described above. This way we can reuse the has_nul in the
+ next step and also use syndrome value trick at the end. */
+ rev tmp3, data1
+ #define data1_fixed tmp3
+#else
+ #define data1_fixed data1
+#endif
+ sub has_nul, data1_fixed, zeroones
+ orr tmp3, data1_fixed, #REP8_7f
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
+#ifdef __AARCH64EB__
+ rev has_nul, has_nul
+#endif
+ cmp limit, neg_offset, lsr #3
+ orr syndrome, diff, has_nul
+ bic syndrome, syndrome, mask /* Ignore later bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
+
+ /* STEP_C: Compare second part of data1 to first part of tmp1. */
+ ldp tmp1, tmp2, [src2], #16
+ cmp limit, #8
+ LS_BK data2, tmp1, neg_offset
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ and syndrome, syndrome, mask /* Ignore earlier bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
+
+ ldr data1, [src1], #8
+ sub limit, limit, #8
+ b L(loop_misaligned)
+
+#ifdef __AARCH64EB__
+L(syndrome_check):
+ clz pos, syndrome
+ cmp pos, limit, lsl #3
+ b.lo L(end_quick)
+#endif
L(ret0):
mov result, #0
ret
-
-SYM_FUNC_END_PI(strncmp)
-EXPORT_SYMBOL_NOHWKASAN(strncmp)
+SYM_FUNC_END(__pi_strncmp)
+SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
+EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index b72913a99038..d5ac0e10a01d 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -47,7 +47,7 @@ limit_wd .req x14
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
-SYM_FUNC_START_WEAK_PI(strnlen)
+SYM_FUNC_START(__pi_strnlen)
cbz limit, .Lhit_limit
mov zeroones, #REP8_01
bic src, srcin, #15
@@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
.Lhit_limit:
mov len, limit
ret
-SYM_FUNC_END_PI(strnlen)
+SYM_FUNC_END(__pi_strnlen)
+
+SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
index 13132d1ed6d1..a5123cf0ce12 100644
--- a/arch/arm64/lib/strrchr.S
+++ b/arch/arm64/lib/strrchr.S
@@ -18,7 +18,7 @@
* Returns:
* x0 - address of last occurrence of 'c' or 0
*/
-SYM_FUNC_START_WEAK_PI(strrchr)
+SYM_FUNC_START(__pi_strrchr)
mov x3, #0
and w1, w1, #0xff
1: ldrb w2, [x0], #1
@@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr)
b 1b
2: mov x0, x3
ret
-SYM_FUNC_END_PI(strrchr)
+SYM_FUNC_END(__pi_strrchr)
+SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr)
EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index d189cf4e70ea..96b171995d19 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -10,8 +10,8 @@
#include <linux/module.h>
#include <asm/neon-intrinsics.h>
-void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
- unsigned long *p2)
+void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
} while (--lines > 0);
}
-void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3)
+void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
} while (--lines > 0);
}
-void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3, unsigned long *p4)
+void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
} while (--lines > 0);
}
-void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3,
- unsigned long *p4, unsigned long *p5)
+void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
return res;
}
-static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3)
+static void xor_arm64_eor3_3(unsigned long bytes,
+ unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
} while (--lines > 0);
}
-static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3,
- unsigned long *p4)
+static void xor_arm64_eor3_4(unsigned long bytes,
+ unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
@@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
} while (--lines > 0);
}
-static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1,
- unsigned long *p2, unsigned long *p3,
- unsigned long *p4, unsigned long *p5)
+static void xor_arm64_eor3_5(unsigned long bytes,
+ unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
uint64_t *dp1 = (uint64_t *)p1;
uint64_t *dp2 = (uint64_t *)p2;
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7d0563db4201..0ea6cc25dc66 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
/*
* dcache_clean_pou(start, end)
@@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou)
* - start - kernel start address of region
* - end - kernel end address of region
*/
-SYM_FUNC_START_PI(dcache_inval_poc)
+SYM_FUNC_START(__pi_dcache_inval_poc)
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc)
b.lo 2b
dsb sy
ret
-SYM_FUNC_END_PI(dcache_inval_poc)
+SYM_FUNC_END(__pi_dcache_inval_poc)
+SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
/*
* dcache_clean_poc(start, end)
@@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START_PI(dcache_clean_poc)
+SYM_FUNC_START(__pi_dcache_clean_poc)
dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(dcache_clean_poc)
+SYM_FUNC_END(__pi_dcache_clean_poc)
+SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
/*
* dcache_clean_pop(start, end)
@@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START_PI(dcache_clean_pop)
+SYM_FUNC_START(__pi_dcache_clean_pop)
alternative_if_not ARM64_HAS_DCPOP
b dcache_clean_poc
alternative_else_nop_endif
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(dcache_clean_pop)
+SYM_FUNC_END(__pi_dcache_clean_pop)
+SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
/*
* __dma_flush_area(start, size)
@@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop)
* - start - virtual start address of region
* - size - size in question
*/
-SYM_FUNC_START_PI(__dma_flush_area)
+SYM_FUNC_START(__pi___dma_flush_area)
add x1, x0, x1
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__dma_flush_area)
+SYM_FUNC_END(__pi___dma_flush_area)
+SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
/*
* __dma_map_area(start, size, dir)
@@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area)
* - size - size of region
* - dir - DMA direction
*/
-SYM_FUNC_START_PI(__dma_map_area)
+SYM_FUNC_START(__pi___dma_map_area)
add x1, x0, x1
cmp w2, #DMA_FROM_DEVICE
b.eq __pi_dcache_inval_poc
b __pi_dcache_clean_poc
-SYM_FUNC_END_PI(__dma_map_area)
+SYM_FUNC_END(__pi___dma_map_area)
+SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
@@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-SYM_FUNC_START_PI(__dma_unmap_area)
+SYM_FUNC_START(__pi___dma_unmap_area)
add x1, x0, x1
cmp w2, #DMA_TO_DEVICE
b.ne __pi_dcache_inval_poc
ret
-SYM_FUNC_END_PI(__dma_unmap_area)
+SYM_FUNC_END(__pi___dma_unmap_area)
+SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index c0181e60cc98..489455309695 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -40,8 +40,8 @@ static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
- int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type);
- int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type);
+ int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+ int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned long data, addr, offset;
addr = pt_regs_read_reg(regs, reg_addr);
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 2aaf950b906c..a06c6ac770d4 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte)
{
struct page *page = pte_page(pte);
+ /*
+ * HugeTLB pages are always fully mapped, so only setting head page's
+ * PG_dcache_clean flag is enough.
+ */
+ if (PageHuge(page))
+ page = compound_head(page);
+
if (!test_bit(PG_dcache_clean, &page->flags)) {
sync_icache_aliases((unsigned long)page_address(page),
(unsigned long)page_address(page) +
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index ffb9c229610a..cbace1c9e137 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void)
}
#endif /* CONFIG_CMA */
-#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
-bool arch_hugetlb_migration_supported(struct hstate *h)
+static bool __hugetlb_valid_size(unsigned long size)
{
- size_t pagesize = huge_page_size(h);
-
- switch (pagesize) {
+ switch (size) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
return pud_sect_supported();
#endif
- case PMD_SIZE:
case CONT_PMD_SIZE:
+ case PMD_SIZE:
case CONT_PTE_SIZE:
return true;
}
- pr_warn("%s: unrecognized huge page size 0x%lx\n",
- __func__, pagesize);
+
return false;
}
+
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h)
+{
+ size_t pagesize = huge_page_size(h);
+
+ if (!__hugetlb_valid_size(pagesize)) {
+ pr_warn("%s: unrecognized huge page size 0x%lx\n",
+ __func__, pagesize);
+ return false;
+ }
+ return true;
+}
#endif
int pmd_huge(pmd_t pmd)
@@ -347,6 +356,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
size_t pagesize = 1UL << shift;
+ entry = pte_mkhuge(entry);
if (pagesize == CONT_PTE_SIZE) {
entry = pte_mkcont(entry);
} else if (pagesize == CONT_PMD_SIZE) {
@@ -506,16 +516,5 @@ arch_initcall(hugetlbpage_init);
bool __init arch_hugetlb_valid_size(unsigned long size)
{
- switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- return pud_sect_supported();
-#endif
- case CONT_PMD_SIZE:
- case PMD_SIZE:
- case CONT_PTE_SIZE:
- return true;
- }
-
- return false;
+ return __hugetlb_valid_size(size);
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771..9e26ec80d317 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
+ *
+ * Memory reservation for crash kernel either done early or deferred
+ * depending on DMA memory zones configs (ZONE_DMA) --
+ *
+ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
+ * here instead of max_zone_phys(). This lets early reservation of
+ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
+ * Reserving memory early for crash kernel allows linear creation of block
+ * mappings (greater than page-granularity) for all the memory bank rangs.
+ * In this scheme a comparatively quicker boot is observed.
+ *
+ * If ZONE_DMA configs are defined, crash kernel memory reservation
+ * is delayed until DMA zone memory range size initilazation performed in
+ * zone_sizes_init(). The defer is necessary to steer clear of DMA zone
+ * memory range to avoid overlap allocation. So crash kernel memory boundaries
+ * are not known when mapping all bank memory ranges, which otherwise means
+ * not possible to exclude crash kernel range from creating block mappings
+ * so page-granularity mappings are created for the entire memory range.
+ * Hence a slightly slower boot is observed.
+ *
+ * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
*/
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
+phys_addr_t __ro_after_init arm64_dma_phys_limit;
+#else
+phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
+#endif
#ifdef CONFIG_KEXEC_CORE
/*
@@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
- if (!arm64_dma_phys_limit)
- arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
+
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
@@ -361,7 +388,8 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- reserve_crashkernel();
+ if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+ reserve_crashkernel();
memblock_dump_all();
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index a38f54cd638c..77ada00280d9 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -7,8 +7,10 @@
#include <linux/io.h>
#include <linux/memblock.h>
+#include <linux/mm.h>
#include <linux/types.h>
+#include <asm/cpufeature.h>
#include <asm/page.h>
/*
@@ -38,3 +40,18 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
{
return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
}
+
+static int __init adjust_protection_map(void)
+{
+ /*
+ * With Enhanced PAN we can honour the execute-only permissions as
+ * there is no PAN override with such mappings.
+ */
+ if (cpus_have_const_cap(ARM64_HAS_EPAN)) {
+ protection_map[VM_EXEC] = PAGE_EXECONLY;
+ protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
+ }
+
+ return 0;
+}
+arch_initcall(adjust_protection_map);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8..626ec32873c6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -17,6 +17,7 @@
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
+#include <linux/memremap.h>
#include <linux/memory.h>
#include <linux/fs.h>
#include <linux/io.h>
@@ -63,6 +64,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
static DEFINE_SPINLOCK(swapper_pgdir_lock);
+static DEFINE_MUTEX(fixmap_lock);
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
{
@@ -294,18 +296,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
} while (addr = next, addr != end);
}
-static inline bool use_1G_block(unsigned long addr, unsigned long next,
- unsigned long phys)
-{
- if (PAGE_SHIFT != 12)
- return false;
-
- if (((addr | next | phys) & ~PUD_MASK) != 0)
- return false;
-
- return true;
-}
-
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int),
@@ -329,6 +319,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
}
BUG_ON(p4d_bad(p4d));
+ /*
+ * No need for locking during early boot. And it doesn't work as
+ * expected with KASLR enabled.
+ */
+ if (system_state != SYSTEM_BOOTING)
+ mutex_lock(&fixmap_lock);
pudp = pud_set_fixmap_offset(p4dp, addr);
do {
pud_t old_pud = READ_ONCE(*pudp);
@@ -338,7 +334,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (use_1G_block(addr, next, phys) &&
+ if (pud_sect_supported() &&
+ ((addr | next | phys) & ~PUD_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pud_set_huge(pudp, phys, prot);
@@ -359,6 +356,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
} while (pudp++, addr = next, addr != end);
pud_clear_fixmap();
+ if (system_state != SYSTEM_BOOTING)
+ mutex_unlock(&fixmap_lock);
}
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
@@ -517,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
+ if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -528,6 +527,17 @@ static void __init map_mem(pgd_t *pgdp)
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA) ||
+ IS_ENABLED(CONFIG_ZONE_DMA32))
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+ else if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+#endif
+
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -554,6 +564,25 @@ static void __init map_mem(pgd_t *pgdp)
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+#ifdef CONFIG_KEXEC_CORE
+ if (crash_mem_map &&
+ !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ if (crashk_res.end) {
+ __map_memblock(pgdp, crashk_res.start,
+ crashk_res.end + 1,
+ PAGE_KERNEL,
+ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+ }
+#endif
}
void mark_rodata_ro(void)
@@ -617,6 +646,8 @@ early_param("rodata", parse_rodata);
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __init map_entry_trampoline(void)
{
+ int i;
+
pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
@@ -625,11 +656,15 @@ static int __init map_entry_trampoline(void)
/* Map only the text into the trampoline page table */
memset(tramp_pg_dir, 0, PGD_SIZE);
- __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
- prot, __pgd_pgtable_alloc, 0);
+ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
+ entry_tramp_text_size(), prot,
+ __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
/* Map both the text and data into the kernel page table */
- __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
+ __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+ pa_start + i * PAGE_SIZE, prot);
+
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern char __entry_tramp_data_start[];
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index 7c4ef56265ee..a9e50e930484 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages);
void *mte_allocate_tag_storage(void)
{
/* tags granule is 16 bytes, 2 tags stored per byte */
- return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL);
+ return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
}
void mte_free_tag_storage(char *storage)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index d35c90d2e47a..50bbed947bec 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,7 +46,7 @@
#endif
#ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
#else
/*
* The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index cc0cf0f5c7c3..9d9250c7cc72 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -89,9 +89,16 @@
#define A64_STXR(sf, Rt, Rn, Rs) \
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
-/* LSE atomics */
+/*
+ * LSE atomics
+ *
+ * STADD is simply encoded as an alias for LDADD with XZR as
+ * the destination register.
+ */
#define A64_STADD(sf, Rn, Rs) \
- aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+ aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
+ A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \
+ AARCH64_INSN_MEM_ORDER_NONE)
/* Add/subtract (immediate) */
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
index 932b4fe5c768..cf1307188150 100644
--- a/arch/arm64/tools/Makefile
+++ b/arch/arm64/tools/Makefile
@@ -5,18 +5,14 @@ kapi := $(gen)/asm
kapi-hdrs-y := $(kapi)/cpucaps.h
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+targets += $(addprefix ../../../, $(kapi-hdrs-y))
PHONY += kapi
-kapi: $(kapi-hdrs-y) $(gen-y)
-
-# Create output directory if not already present
-_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+kapi: $(kapi-hdrs-y)
quiet_cmd_gen_cpucaps = GEN $@
- cmd_gen_cpucaps = mkdir -p $(dir $@) && \
- $(AWK) -f $(filter-out $(PHONY),$^) > $@
+ cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
$(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 870c39537dd0..3ed418f70e3b 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -7,7 +7,8 @@ BTI
HAS_32BIT_EL0_DO_NOT_USE
HAS_32BIT_EL1
HAS_ADDRESS_AUTH
-HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_ARCH_QARMA3
+HAS_ADDRESS_AUTH_ARCH_QARMA5
HAS_ADDRESS_AUTH_IMP_DEF
HAS_AMU_EXTN
HAS_ARMv8_4_TTL
@@ -21,7 +22,8 @@ HAS_E0PD
HAS_ECV
HAS_EPAN
HAS_GENERIC_AUTH
-HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_ARCH_QARMA3
+HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
HAS_IRQ_PRIO_MASKING
HAS_LDAPR
@@ -44,6 +46,7 @@ MTE_ASYMM
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
+SPECTRE_BHB
SSBS
SVE
UNMAP_KERNEL_AT_EL0
@@ -55,6 +58,10 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
+WORKAROUND_1902691
+WORKAROUND_2038923
+WORKAROUND_2064142
+WORKAROUND_2077057
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 151607ed5158..bbe245117777 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -30,6 +30,7 @@
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+#define pmd_pfn(pmd) (pmd_phys(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
#define pte_clear(mm, addr, ptep) set_pte((ptep), \
(((unsigned int) addr >= PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 18cd6ea9ab23..0610724d6a28 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -236,6 +236,11 @@ static inline int pmd_bad(pmd_t pmd)
}
/*
+ * pmd_pfn - converts a PMD entry to a page frame number
+ */
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
+
+/*
* pmd_page - converts a PMD entry to a page pointer
*/
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index f01e91e10d95..3167a3b5c97b 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -29,8 +29,6 @@ int max_kernel_seg = 0x303;
/* indicate pfn's of high memory */
unsigned long highstart_pfn, highend_pfn;
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
/* Default cache attribute for newly created page tables */
unsigned long _dflt_cache_att = CACHEDEF;
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 703952819e10..e003b2473c64 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,6 +8,7 @@ menu "Processor type and features"
config IA64
bool
+ select ARCH_BINFMT_ELF_EXTRA_PHDRS
select ARCH_HAS_DMA_MARK_CLEAN
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
@@ -318,7 +319,7 @@ config ARCH_PROC_KCORE_TEXT
depends on PROC_KCORE
config IA64_MCA_RECOVERY
- tristate "MCA recovery from errors other than TLB."
+ bool "MCA recovery from errors other than TLB."
config IA64_PALINFO
tristate "/proc/pal support"
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
index 629cb9cdf723..851d8594cdb8 100644
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -106,7 +106,6 @@ CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_737=y
CONFIG_NLS_CODEPAGE_775=y
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9584b2c5f394..7aa8f2330fb1 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -267,6 +267,7 @@ ia64_phys_addr_valid (unsigned long addr)
#define pmd_present(pmd) (pmd_val(pmd) != 0UL)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
+#define pmd_pfn(pmd) ((pmd_val(pmd) & _PFN_MASK) >> PAGE_SHIFT)
#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb37706..1684716f0820 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node) \
+#define arch_alloc_thread_stack_node(tsk, node) \
((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
#else
#define current_thread_info() ((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
#define task_thread_info(tsk) ((struct thread_info *) 0)
#endif
-#define free_thread_stack(tsk) /* nothing */
+#define arch_free_thread_stack(tsk) /* nothing */
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h
index 673051bf9d7d..6785f70d3208 100644
--- a/arch/ia64/include/asm/xor.h
+++ b/arch/ia64/include/asm/xor.h
@@ -4,13 +4,20 @@
*/
-extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
- unsigned long *);
-extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
-extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *, unsigned long *);
+extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
static struct xor_block_template xor_block_ia64 = {
.name = "ia64",
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index e4992917a24b..94a848b06f15 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -70,16 +70,6 @@ static int __init topology_init(void)
{
int i, err = 0;
-#ifdef CONFIG_NUMA
- /*
- * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
- */
- for_each_online_node(i) {
- if ((err = register_one_node(i)))
- goto out;
- }
-#endif
-
sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL);
if (!sysfs_cpus)
panic("kzalloc in topology_init failed - NR_CPUS too big?");
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 791d4176e4a6..73d0db36edb6 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -608,17 +608,11 @@ void __init paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-pg_data_t *arch_alloc_nodedata(int nid)
+pg_data_t * __init arch_alloc_nodedata(int nid)
{
unsigned long size = compute_pernodesize(nid);
- return kzalloc(size, GFP_KERNEL);
-}
-
-void arch_free_nodedata(pg_data_t *pgdat)
-{
- kfree(pgdat);
+ return memblock_alloc(size, SMP_CACHE_BYTES);
}
void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
@@ -626,7 +620,6 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
pgdat_list[update_node] = update_pgdat;
scatter_node_data();
}
-#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
index acb55a41260d..2bcdd7d3a1ad 100644
--- a/arch/ia64/pci/fixup.c
+++ b/arch/ia64/pci/fixup.c
@@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 936e1803c7c7..936cce42ae9a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,6 +4,7 @@ config M68K
default y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
@@ -17,7 +18,6 @@ config M68K
select GENERIC_CPU_DEVICES
select GENERIC_IOMAP
select GENERIC_IRQ_SHOW
- select HAVE_AOUT if MMU
select HAVE_ASM_MODVERSIONS
select HAVE_DEBUG_BUGVERBOSE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index be2dfab48fd4..3137b45750df 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -37,6 +37,7 @@
#include <asm/irq.h>
#include <asm/machdep.h>
#include <asm/io.h>
+#include <asm/config.h>
static unsigned long amiga_model;
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 581a5f68d102..42a8b8e2b664 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -16,6 +16,7 @@
#include <asm/apollohw.h>
#include <asm/irq.h>
#include <asm/machdep.h>
+#include <asm/config.h>
u_long sio01_physaddr;
u_long sio23_physaddr;
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 261a0f57cc9a..38a7c0578105 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -46,6 +46,7 @@
#include <asm/machdep.h>
#include <asm/hwtest.h>
#include <asm/io.h>
+#include <asm/config.h>
u_long atari_mch_cookie;
EXPORT_SYMBOL(atari_mch_cookie);
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index ba65f942d0c7..ce6818eff75e 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/kdev_t.h>
-#include <linux/genhd.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/interrupt.h>
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 0c6feafbbd11..3a1d90e399e0 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -23,7 +23,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
-#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
#include <linux/bcd.h>
@@ -36,6 +35,7 @@
#include <asm/traps.h>
#include <asm/machdep.h>
#include <asm/bvme6000hw.h>
+#include <asm/config.h>
static void bvme6000_get_model(char *model);
extern void bvme6000_sched_init(void);
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index bc9952f8be66..114aaa3f955a 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y
CONFIG_FB_AMIGA_AGA=y
CONFIG_FB_FM2=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DMASOUND_PAULA=m
@@ -500,7 +498,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -643,7 +640,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index a77269c6e5ba..30b9d932b930 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_VGA16 is not set
# CONFIG_LOGO_LINUX_CLUT224 is not set
@@ -457,7 +455,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -599,7 +596,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 7a74efa6b9a1..51ff3180e69d 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -478,7 +475,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -621,7 +617,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index a5323bf2eb33..7d95ca4366e4 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -450,7 +447,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -592,7 +588,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5e80aa0869d5..e306e3813607 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -459,7 +457,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -601,7 +598,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index e84326a3f62d..41316cf02441 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -480,7 +477,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -623,7 +619,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 337552f43339..2fc3f0df6d43 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DMASOUND_ATARI=m
@@ -565,7 +563,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -708,7 +705,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 7b688f7d272a..9603f4396469 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -449,7 +446,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -591,7 +587,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 7c2cb31d63dd..c9cabd3344df 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -450,7 +447,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -592,7 +588,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ca43897af26d..5f994bf44fb8 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -467,7 +464,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -610,7 +606,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index e3d515f37144..183e33f7d4a0 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -388,9 +385,6 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -452,7 +446,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -593,7 +586,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d601606c969b..8214263b9ab8 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NF_LOG_IPV4=m
CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -387,9 +384,6 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -451,7 +445,6 @@ CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
# CONFIG_CIFS_STATS2 is not set
# CONFIG_CIFS_DEBUG is not set
@@ -593,7 +586,7 @@ CONFIG_TEST_UUID=m
CONFIG_TEST_XARRAY=m
CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
CONFIG_TEST_IDA=m
CONFIG_TEST_BITOPS=m
CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 9c57b245dc12..267b02cc5655 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -13,7 +13,6 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/slab.h>
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index ce1eb3d3d55d..2c92843397c3 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -22,6 +22,7 @@
#include <asm/blinken.h>
#include <asm/io.h> /* readb() and writeb() */
#include <asm/hp300hw.h>
+#include <asm/config.h>
#include "time.h"
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index e8ca4b0ccefa..6cf464cdab06 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -4,8 +4,7 @@
#include <linux/irqflags.h>
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+#define __xg(type, x) ((volatile type *)(x))
extern unsigned long __invalid_xchg_size(unsigned long, volatile void *, int);
@@ -50,7 +49,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
"1:\n\t"
"casb %0,%1,%2\n\t"
"jne 1b"
- : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+ : "=&d" (x) : "d" (x), "m" (*__xg(u8, ptr)) : "memory");
break;
case 2:
__asm__ __volatile__
@@ -58,7 +57,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
"1:\n\t"
"casw %0,%1,%2\n\t"
"jne 1b"
- : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+ : "=&d" (x) : "d" (x), "m" (*__xg(u16, ptr)) : "memory");
break;
case 4:
__asm__ __volatile__
@@ -66,7 +65,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
"1:\n\t"
"casl %0,%1,%2\n\t"
"jne 1b"
- : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+ : "=&d" (x) : "d" (x), "m" (*__xg(u32, ptr)) : "memory");
break;
default:
x = __invalid_xchg_size(x, ptr, size);
diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h
new file mode 100644
index 000000000000..e73ffa23c4f5
--- /dev/null
+++ b/arch/m68k/include/asm/config.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file contains prototypes provided by each m68k machine
+ * to parse bootinfo data structures and to configure the machine
+ */
+
+#ifndef _M68K_CONFIG_H
+#define _M68K_CONFIG_H
+
+extern int amiga_parse_bootinfo(const struct bi_record *record);
+extern int apollo_parse_bootinfo(const struct bi_record *record);
+extern int atari_parse_bootinfo(const struct bi_record *record);
+extern int bvme6000_parse_bootinfo(const struct bi_record *record);
+extern int hp300_parse_bootinfo(const struct bi_record *record);
+extern int mac_parse_bootinfo(const struct bi_record *record);
+extern int mvme147_parse_bootinfo(const struct bi_record *record);
+extern int mvme16x_parse_bootinfo(const struct bi_record *record);
+extern int q40_parse_bootinfo(const struct bi_record *record);
+
+extern void config_amiga(void);
+extern void config_apollo(void);
+extern void config_atari(void);
+extern void config_bvme6000(void);
+extern void config_hp300(void);
+extern void config_mac(void);
+extern void config_mvme147(void);
+extern void config_mvme16x(void);
+extern void config_q40(void);
+extern void config_sun3(void);
+extern void config_sun3x(void);
+
+#endif /* _M68K_CONFIG_H */
diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h
index 6390ef2f7f86..c117907e1276 100644
--- a/arch/m68k/include/asm/current.h
+++ b/arch/m68k/include/asm/current.h
@@ -24,6 +24,8 @@ static inline struct task_struct *get_current(void)
#define current get_current()
-#endif /* CONFNIG_MMU */
+#endif /* CONFIG_MMU */
+
+register unsigned long current_stack_pointer __asm__("sp");
#endif /* !(_M68K_CURRENT_H) */
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 6f2b87d7a50d..94f38d76e278 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -322,6 +322,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) (__pte((x).val))
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 022c3abc280d..7c9b56e2a750 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -147,6 +147,7 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
#define pmd_present(pmd) (pmd_val(pmd) & _PAGE_TABLE)
#define pmd_clear(pmdp) ({ pmd_val(*pmdp) = 0; })
+#define pmd_pfn(pmd) ((pmd_val(pmd) & _TABLE_MASK) >> PAGE_SHIFT)
/*
* m68k does not have huge pages (020/030 actually could), but generic code
* expects pmd_page() to exists, only to then DCE it all. Provide a dummy to
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 5b24283a0a42..5e4e753f0d24 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -130,6 +130,7 @@ static inline void pte_clear (struct mm_struct *mm, unsigned long addr, pte_t *p
({ pte_t __pte; pte_val(__pte) = pfn | pgprot_val(pgprot); __pte; })
#define pte_page(pte) virt_to_page(__pte_page(pte))
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 49e573b94326..8f94feed969c 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <linux/console.h>
-#include <linux/genhd.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -47,6 +46,7 @@
#endif
#include <asm/macintosh.h>
#include <asm/natfeat.h>
+#include <asm/config.h>
#if !FPSTATESIZE || !NR_IRQS
#warning No CPU/platform type selected, your kernel will not work!
@@ -113,28 +113,6 @@ EXPORT_SYMBOL(isa_type);
EXPORT_SYMBOL(isa_sex);
#endif
-extern int amiga_parse_bootinfo(const struct bi_record *);
-extern int atari_parse_bootinfo(const struct bi_record *);
-extern int mac_parse_bootinfo(const struct bi_record *);
-extern int q40_parse_bootinfo(const struct bi_record *);
-extern int bvme6000_parse_bootinfo(const struct bi_record *);
-extern int mvme16x_parse_bootinfo(const struct bi_record *);
-extern int mvme147_parse_bootinfo(const struct bi_record *);
-extern int hp300_parse_bootinfo(const struct bi_record *);
-extern int apollo_parse_bootinfo(const struct bi_record *);
-
-extern void config_amiga(void);
-extern void config_atari(void);
-extern void config_mac(void);
-extern void config_sun3(void);
-extern void config_apollo(void);
-extern void config_mvme147(void);
-extern void config_mvme16x(void);
-extern void config_bvme6000(void);
-extern void config_hp300(void);
-extern void config_q40(void);
-extern void config_sun3x(void);
-
#define MASK_256K 0xfffc0000
extern void paging_init(void);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 5d16f9b47aa9..65d124ec80bb 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -47,6 +47,7 @@
#include <asm/mac_via.h>
#include <asm/mac_oss.h>
#include <asm/mac_psc.h>
+#include <asm/config.h>
/* Mac bootinfo struct */
struct mac_booter_data mac_bi_data;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 1493cf5eac1e..71aa9f6315dc 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -93,8 +93,6 @@ retry:
vma = find_vma(mm, address);
if (!vma)
goto map_err;
- if (vma->vm_flags & VM_IO)
- goto acc_err;
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index dfd6202fd403..4e6218115f43 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -22,7 +22,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
-#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
@@ -34,6 +33,7 @@
#include <asm/traps.h>
#include <asm/machdep.h>
#include <asm/mvme147hw.h>
+#include <asm/config.h>
static void mvme147_get_model(char *model);
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index b4422c2dfbbf..f00c7aa058de 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -24,7 +24,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
-#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
#include <linux/module.h>
@@ -37,6 +36,7 @@
#include <asm/traps.h>
#include <asm/machdep.h>
#include <asm/mvme16xhw.h>
+#include <asm/config.h>
extern t_bdid mvme_bdid;
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 5caf1e5be1c2..9237243077ce 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -34,6 +34,7 @@
#include <asm/traps.h>
#include <asm/machdep.h>
#include <asm/q40_master.h>
+#include <asm/config.h>
extern void q40_init_IRQ(void);
static void q40_get_model(char *model);
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 59798e43cdb0..da568e981604 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -45,6 +45,8 @@ config MICROBLAZE
select SET_FS
select ZONE_DMA
select TRACE_IRQFLAGS_SUPPORT
+ select GENERIC_IRQ_MULTI_HANDLER
+ select HANDLE_DOMAIN_IRQ
# Endianness selection
choice
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index 0a28e80bbab0..cb6ab55d1d01 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -11,7 +11,4 @@
struct pt_regs;
extern void do_IRQ(struct pt_regs *regs);
-/* should be defined in each interrupt controller driver */
-extern unsigned int xintc_get_irq(void);
-
#endif /* _ASM_MICROBLAZE_IRQ_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index c136a01e467e..0c72646370e1 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -399,6 +399,9 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return ((unsigned long) (pmd_val(pmd) & PAGE_MASK));
}
+/* returns pfn of the pmd entry*/
+#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
+
/* returns struct *page of the pmd entry*/
#define pmd_page(pmd) (pfn_to_page(__pa(pmd_val(pmd)) >> PAGE_SHIFT))
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index 903dad822fad..1f8cb4c4f74f 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -20,27 +20,13 @@
#include <linux/irqchip.h>
#include <linux/of_irq.h>
-static u32 concurrent_irq;
-
void __irq_entry do_IRQ(struct pt_regs *regs)
{
- unsigned int irq;
struct pt_regs *old_regs = set_irq_regs(regs);
trace_hardirqs_off();
irq_enter();
- irq = xintc_get_irq();
-next_irq:
- BUG_ON(!irq);
- generic_handle_irq(irq);
-
- irq = xintc_get_irq();
- if (irq != -1U) {
- pr_debug("next irq: %d\n", irq);
- ++concurrent_irq;
- goto next_irq;
- }
-
+ handle_arch_irq(regs);
irq_exit();
set_irq_regs(old_regs);
trace_hardirqs_on();
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 3e336b3dbb10..ab6e3dc0bc1d 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -83,6 +83,8 @@
label = "HDMI OUT";
type = "a";
+ ddc-en-gpios = <&gpa 25 GPIO_ACTIVE_HIGH>;
+
port {
hdmi_con: endpoint {
remote-endpoint = <&dw_hdmi_out>;
@@ -114,17 +116,6 @@
gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
enable-active-high;
};
-
- hdmi_power: fixedregulator@3 {
- compatible = "regulator-fixed";
-
- regulator-name = "hdmi_power";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
-
- gpio = <&gpa 25 0>;
- enable-active-high;
- };
};
&ext {
@@ -576,8 +567,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pins_hdmi_ddc>;
- hdmi-5v-supply = <&hdmi_power>;
-
ports {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 0a515cde1c18..25860fba6218 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -74,7 +74,7 @@
#define EXC(inst_reg,addr,handler) \
9: inst_reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index c6a652ad34f7..e835730ea7fa 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -69,6 +69,5 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_LIBCRC32C=y
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index e2ed105f8c97..0021427a1bbe 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -159,7 +159,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
# CONFIG_RPCSEC_GSS_KRB5 is not set
CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 7e987d6f5e34..7a97a0818ce4 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
# CONFIG_RPCSEC_GSS_KRB5 is not set
CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index 6df5f6f2ac8e..a0643363526d 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_SWAP=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
# CONFIG_RPCSEC_GSS_KRB5 is not set
CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 21a1168ae301..70a4ba90f491 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -269,7 +269,6 @@ CONFIG_UFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_CIFS=m
CONFIG_CIFS_UPCALL=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 1ae48f7d9ddd..74020aa3440b 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -112,7 +112,6 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=m
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 8c223035921f..843f360da5f2 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -92,5 +92,4 @@ CONFIG_TMPFS=y
CONFIG_UFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 3321bb576944..1c220d152a50 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -363,7 +363,6 @@ CONFIG_UFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_737=m
CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 009b30372226..b5ba08d7ab57 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -371,7 +371,6 @@ CONFIG_UFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_737=m
CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index e214e136101c..8d58653f1b4e 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -370,7 +370,6 @@ CONFIG_UFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_737=m
CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 3dc2da2bee0d..7d6f235e8ccb 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -354,7 +354,6 @@ CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_CODA_FS=m
CONFIG_AFS_FS=m
diff --git a/arch/mips/configs/tb0219_defconfig b/arch/mips/configs/tb0219_defconfig
index 6547f84750b5..c56d8ab14ba6 100644
--- a/arch/mips/configs/tb0219_defconfig
+++ b/arch/mips/configs/tb0219_defconfig
@@ -72,6 +72,5 @@ CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="cca=3 mem=64M console=ttyVR0,115200 ip=any root=/dev/nfs"
diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig
index 7e099f7c2286..6e1423428f02 100644
--- a/arch/mips/configs/tb0226_defconfig
+++ b/arch/mips/configs/tb0226_defconfig
@@ -67,6 +67,5 @@ CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="cca=3 mem=32M console=ttyVR0,115200"
diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig
index 0d881dd862c0..cf65a0879ece 100644
--- a/arch/mips/configs/tb0287_defconfig
+++ b/arch/mips/configs/tb0287_defconfig
@@ -77,7 +77,6 @@ CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig
index 4798dc86c9ce..7e16da0bde8c 100644
--- a/arch/mips/configs/workpad_defconfig
+++ b/arch/mips/configs/workpad_defconfig
@@ -63,6 +63,5 @@ CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_NFS_FS=m
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyVR0,19200 ide0=0x170,0x376,49 mem=16M"
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
index 6ffdd4b5e1d0..336ac9b65235 100644
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -285,7 +285,7 @@ symbol = value
#define PTR_SCALESHIFT 2
-#define PTR .word
+#define PTR_WD .word
#define PTRSIZE 4
#define PTRLOG 2
#endif
@@ -310,7 +310,7 @@ symbol = value
#define PTR_SCALESHIFT 3
-#define PTR .dword
+#define PTR_WD .dword
#define PTRSIZE 8
#define PTRLOG 3
#endif
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index b463f2aa5a61..db497a8167da 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -32,7 +32,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\
@@ -54,7 +54,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t"\
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_err] "=r" (error) \
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 7b8037f25d9e..374c6322775d 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -86,6 +86,11 @@ extern void paging_init(void);
*/
#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd))
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return pmd_val(pmd) >> _PFN_SHIFT;
+}
+
#ifndef CONFIG_MIPS_HUGE_TLB_SUPPORT
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
@@ -422,11 +427,6 @@ static inline int pmd_write(pmd_t pmd)
return !!(pmd_val(pmd) & _PAGE_WRITE);
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return pmd_val(pmd) >> _PFN_SHIFT;
-}
-
static inline struct page *pmd_page(pmd_t pmd)
{
if (pmd_val(pmd) & _PAGE_HUGE)
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index af3788589ee6..431a1c9d53fc 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
@@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h
index 2022b18944b9..9af0f4d3d288 100644
--- a/arch/mips/include/asm/unaligned-emul.h
+++ b/arch/mips/include/asm/unaligned-emul.h
@@ -20,8 +20,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -41,8 +41,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -74,10 +74,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -102,8 +102,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -125,8 +125,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -145,8 +145,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -178,10 +178,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -223,14 +223,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -255,8 +255,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -276,8 +276,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -296,8 +296,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -325,10 +325,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -365,14 +365,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -398,8 +398,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -419,8 +419,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -452,10 +452,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -481,8 +481,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -504,8 +504,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -524,8 +524,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -557,10 +557,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -602,14 +602,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -632,8 +632,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -653,8 +653,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -673,8 +673,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -703,10 +703,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -743,14 +743,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index a39ec755e4c2..750fe569862b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1258,10 +1258,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1333,10 +1333,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1404,10 +1404,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1474,10 +1474,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1589,14 +1589,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1708,14 +1708,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1827,14 +1827,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1945,14 +1945,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -2007,7 +2007,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2065,7 +2065,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
@@ -2126,7 +2126,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2189,7 +2189,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index cbf6db98cfb3..2748c55820c2 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -23,14 +23,14 @@
#define EX(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
+ PTR_WD 9b,fault; \
.previous
#define EX2(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
- PTR 9b+4,fault; \
+ PTR_WD 9b,fault; \
+ PTR_WD 9b+4,fault; \
.previous
.set mips1
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index b91e91106475..2e687c60bc4f 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -31,7 +31,7 @@
.ex\@: \insn \reg, \src
.set pop
.section __ex_table,"a"
- PTR .ex\@, fault
+ PTR_WD .ex\@, fault
.previous
.endm
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index f3c908abdbb8..cfde14b48fd8 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -147,10 +147,10 @@ LEAF(kexec_smp_wait)
kexec_args:
EXPORT(kexec_args)
-arg0: PTR 0x0
-arg1: PTR 0x0
-arg2: PTR 0x0
-arg3: PTR 0x0
+arg0: PTR_WD 0x0
+arg1: PTR_WD 0x0
+arg2: PTR_WD 0x0
+arg3: PTR_WD 0x0
.size kexec_args,PTRSIZE*4
#ifdef CONFIG_SMP
@@ -161,10 +161,10 @@ arg3: PTR 0x0
*/
secondary_kexec_args:
EXPORT(secondary_kexec_args)
-s_arg0: PTR 0x0
-s_arg1: PTR 0x0
-s_arg2: PTR 0x0
-s_arg3: PTR 0x0
+s_arg0: PTR_WD 0x0
+s_arg1: PTR_WD 0x0
+s_arg2: PTR_WD 0x0
+s_arg3: PTR_WD 0x0
.size secondary_kexec_args,PTRSIZE*4
kexec_flag:
LONG 0x1
@@ -173,17 +173,17 @@ kexec_flag:
kexec_start_address:
EXPORT(kexec_start_address)
- PTR 0x0
+ PTR_WD 0x0
.size kexec_start_address, PTRSIZE
kexec_indirection_page:
EXPORT(kexec_indirection_page)
- PTR 0
+ PTR_WD 0
.size kexec_indirection_page, PTRSIZE
relocate_new_kernel_end:
relocate_new_kernel_size:
EXPORT(relocate_new_kernel_size)
- PTR relocate_new_kernel_end - relocate_new_kernel
+ PTR_WD relocate_new_kernel_end - relocate_new_kernel
.size relocate_new_kernel_size, PTRSIZE
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index b1b2e106f711..9bfce5f75f60 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -72,10 +72,10 @@ loads_done:
.set pop
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
lw t0, TI_FLAGS($28) # syscall tracing enabled?
@@ -216,7 +216,7 @@ einval: li v0, -ENOSYS
#endif /* CONFIG_MIPS_MT_FPAFF */
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 2
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f650c55a17dc..97456b2ca7dc 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -101,7 +101,7 @@ not_n32_scall:
END(handle_sysn32)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.type sysn32_call_table, @object
EXPORT(sysn32_call_table)
#include <asm/syscall_table_n32.h>
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index 5d7bfc65e4d0..5f6ed4b4c399 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -109,7 +109,7 @@ illegal_syscall:
j n64_syscall_exit
END(handle_sys64)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cedc8bd88804..d3c2616cba22 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp
loads_done:
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
li t1, _TIF_WORK_SYSCALL_ENTRY
@@ -214,7 +214,7 @@ einval: li v0, -ENOSYS
END(sys32_syscall)
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys32_call_table,@object
EXPORT(sys32_call_table)
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index f979adfd4fc2..ef73ba1e0ec1 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -803,7 +803,7 @@ early_param("coherentio", setcoherentio);
static int __init setnocoherentio(char *str)
{
- dma_default_coherent = true;
+ dma_default_coherent = false;
pr_info("Software DMA cache coherency (command line)\n");
return 0;
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index d542fb7af3ba..1986d1309410 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -351,6 +351,9 @@ asmlinkage void start_secondary(void)
cpu = smp_processor_id();
cpu_data[cpu].udelay_val = loops_per_jiffy;
+ set_cpu_sibling_map(cpu);
+ set_cpu_core_map(cpu);
+
cpumask_set_cpu(cpu, &cpu_coherent_mask);
notify_cpu_starting(cpu);
@@ -362,9 +365,6 @@ asmlinkage void start_secondary(void)
/* The CPU is running and counters synchronised, now mark it online */
set_cpu_online(cpu, true);
- set_cpu_sibling_map(cpu);
- set_cpu_core_map(cpu);
-
calculate_cpu_foreign_map();
/*
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 5512cd586e6e..ae93a607ddf7 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 4b \n"
- " "STR(PTR)" 2b, 4b \n"
+ " "STR(PTR_WD)" 1b, 4b \n"
+ " "STR(PTR_WD)" 2b, 4b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
@@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 5b \n"
- " "STR(PTR)" 2b, 5b \n"
+ " "STR(PTR_WD)" 1b, 5b \n"
+ " "STR(PTR_WD)" 2b, 5b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
diff --git a/arch/mips/kernel/topology.c b/arch/mips/kernel/topology.c
index 08ad6371fbe0..9429d85a4703 100644
--- a/arch/mips/kernel/topology.c
+++ b/arch/mips/kernel/topology.c
@@ -12,11 +12,6 @@ static int __init topology_init(void)
{
int i, ret;
-#ifdef CONFIG_NUMA
- for_each_online_node(i)
- register_one_node(i);
-#endif /* CONFIG_NUMA */
-
for_each_present_cpu(i) {
struct cpu *c = &per_cpu(cpu_devices, i);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index e59cb6246f76..a25e0b73ee70 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -ENOIOCTLCMD;
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+ ret = kvm_mips_callbacks->vcpu_run(vcpu);
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
int r = -EINTR;
@@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
lose_fpu(1);
local_irq_disable();
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
trace_kvm_enter(vcpu);
/*
@@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
- r = kvm_mips_callbacks->vcpu_run(vcpu);
+ r = kvm_mips_vcpu_enter_exit(vcpu);
+
+ /*
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
+ *
+ * TODO: is there a barrier which ensures that pending interrupts are
+ * recognised? Currently this just hopes that the CPU takes any pending
+ * interrupts between the enable and disable.
+ */
+ local_irq_enable();
+ local_irq_disable();
trace_kvm_out(vcpu);
- guest_exit_irqoff();
+ guest_timing_exit_irqoff();
local_irq_enable();
out:
@@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void)
/*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 cause = vcpu->arch.host_cp0_cause;
@@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
return ret;
}
+int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_exit_irqoff();
+ ret = __kvm_mips_handle_exit(vcpu);
+ guest_state_enter_irqoff();
+
+ return ret;
+}
+
/* Enable FPU for guest and restore context */
void kvm_own_fpu(struct kvm_vcpu *vcpu)
{
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 4adca5abbc72..c706f5890a05 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -458,8 +458,8 @@ void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
/**
* _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
* @vcpu: Virtual CPU.
- * @compare: Pointer to write compare value to.
- * @cause: Pointer to write cause value to.
+ * @out_compare: Pointer to write compare value to.
+ * @out_cause: Pointer to write cause value to.
*
* Save VZ guest timer state and switch to software emulation of guest CP0
* timer. The hard timer must already be in use, so preemption should be
@@ -1541,11 +1541,14 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
}
/**
- * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor.
+ * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor.
* @vcpu: Virtual CPU context.
*
* Handle when the guest attempts to use a coprocessor which hasn't been allowed
* by the root context.
+ *
+ * Return: value indicating whether to resume the host or the guest
+ * (RESUME_HOST or RESUME_GUEST)
*/
static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
@@ -1592,6 +1595,9 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
*
* Handle when the guest attempts to use MSA when it is disabled in the root
* context.
+ *
+ * Return: value indicating whether to resume the host or the guest
+ * (RESUME_HOST or RESUME_GUEST)
*/
static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
{
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index a46db0807195..7767137c3e49 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial)
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
/* This is enabled in EVA mode */ \
.else; \
@@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial)
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
.else; \
/* EVA without exception */ \
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 277c32296636..18a43f2e29c8 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -116,7 +116,7 @@
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
/* This is assembled in EVA mode */ \
.else; \
@@ -125,7 +125,7 @@
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
.else; \
/* \
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index b0baa3c79fad..0b342bae9a98 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -52,7 +52,7 @@
9: ___BUILD_EVA_INSN(insn, reg, addr); \
.endif; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
.macro f_fill64 dst, offset, val, fixup, mode
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 556acf684d7b..13aaa9927ad1 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -15,7 +15,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
@@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm)
jr ra
.section __ex_table,"a"
- PTR 1b, .Lfault
+ PTR_WD 1b, .Lfault
.previous
EXPORT_SYMBOL(__strncpy_from_user_asm)
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 92b63f20ec05..6de31b616f9c 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -14,7 +14,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c
index 9a29e94d3db1..3115d4de982c 100644
--- a/arch/mips/loongson64/vbios_quirk.c
+++ b/arch/mips/loongson64/vbios_quirk.c
@@ -3,7 +3,7 @@
#include <linux/pci.h>
#include <loongson.h>
-static void pci_fixup_radeon(struct pci_dev *pdev)
+static void pci_fixup_video(struct pci_dev *pdev)
{
struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
@@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev)
res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
IORESOURCE_PCI_FIXED;
- dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
- PCI_ROM_RESOURCE, res);
+ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res);
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
index d6efffd4dd20..fb0565bc34fd 100644
--- a/arch/mips/ralink/mt7621.c
+++ b/arch/mips/ralink/mt7621.c
@@ -22,7 +22,9 @@
#include "common.h"
-static void *detect_magic __initdata = detect_memory_region;
+#define MT7621_MEM_TEST_PATTERN 0xaa5555aa
+
+static u32 detect_magic __initdata;
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
@@ -58,24 +60,32 @@ phys_addr_t mips_cpc_default_phys_base(void)
panic("Cannot detect cpc address");
}
+static bool __init mt7621_addr_wraparound_test(phys_addr_t size)
+{
+ void *dm = (void *)KSEG1ADDR(&detect_magic);
+
+ if (CPHYSADDR(dm + size) >= MT7621_LOWMEM_MAX_SIZE)
+ return true;
+ __raw_writel(MT7621_MEM_TEST_PATTERN, dm);
+ if (__raw_readl(dm) != __raw_readl(dm + size))
+ return false;
+ __raw_writel(~MT7621_MEM_TEST_PATTERN, dm);
+ return __raw_readl(dm) == __raw_readl(dm + size);
+}
+
static void __init mt7621_memory_detect(void)
{
- void *dm = &detect_magic;
phys_addr_t size;
- for (size = 32 * SZ_1M; size < 256 * SZ_1M; size <<= 1) {
- if (!__builtin_memcmp(dm, dm + size, sizeof(detect_magic)))
- break;
+ for (size = 32 * SZ_1M; size <= 256 * SZ_1M; size <<= 1) {
+ if (mt7621_addr_wraparound_test(size)) {
+ memblock_add(MT7621_LOWMEM_BASE, size);
+ return;
+ }
}
- if ((size == 256 * SZ_1M) &&
- (CPHYSADDR(dm + size) < MT7621_LOWMEM_MAX_SIZE) &&
- __builtin_memcmp(dm, dm + size, sizeof(detect_magic))) {
- memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE);
- memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE);
- } else {
- memblock_add(MT7621_LOWMEM_BASE, size);
- }
+ memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE);
+ memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE);
}
void __init ralink_of_remap(void)
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 419f984eef70..7ff144467b29 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -308,6 +308,7 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
return pmd;
}
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd)))
/*
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index f63f839738c4..825c85cab1a1 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -18,7 +18,6 @@
#include <asm/tlb.h>
#include <asm/page.h>
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
DEFINE_SPINLOCK(anon_alias_lock);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 4a995fa628ee..262d0609268c 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -235,6 +235,7 @@ static inline void pte_clear(struct mm_struct *mm,
* and a page entry and page directory to the page they refer to.
*/
#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd))
+#define pmd_pfn(pmd) (pmd_phys(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index cdd657f80bfa..c3abbf71e09f 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -361,6 +361,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
pmd_val(*pmdp) = _KERNPG_TABLE | (unsigned long) ptep;
}
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 97305bde1b16..3a021ab6f1ae 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -38,8 +38,6 @@
int mem_init_done;
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 43c1c880def6..864b4756dcdf 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -10,10 +10,12 @@ config PARISC
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
select ARCH_SUPPORTS_HUGETLBFS if PA20
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_STACKWALK
+ select ARCH_HAS_DEBUG_VM_PGTABLE
select HAVE_RELIABLE_STACKTRACE
select DMA_OPS
select RTC_CLASS
@@ -259,18 +261,6 @@ config PARISC_PAGE_SIZE_64KB
endchoice
-config PARISC_SELF_EXTRACT
- bool "Build kernel as self-extracting executable"
- default y
- help
- Say Y if you want to build the parisc kernel as a kind of
- self-extracting executable.
-
- If you say N here, the kernel will be compressed with gzip
- which can be loaded by the palo bootloader directly too.
-
- If you don't know what to do here, say Y.
-
config SMP
bool "Symmetric multi-processing support"
help
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 82d77f4b0d08..2a9387a93592 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -15,12 +15,8 @@
# Mike Shaver, Helge Deller and Martin K. Petersen
#
-ifdef CONFIG_PARISC_SELF_EXTRACT
boot := arch/parisc/boot
KBUILD_IMAGE := $(boot)/bzImage
-else
-KBUILD_IMAGE := vmlinuz
-endif
NM = sh $(srctree)/arch/parisc/nm
CHECKFLAGS += -D__hppa__=1
@@ -44,6 +40,16 @@ endif
export LD_BFD
+# Set default 32 bits cross compilers for vdso
+CC_ARCHES_32 = hppa hppa2.0 hppa1.1
+CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
+CROSS32_COMPILE := $(call cc-cross-prefix, \
+ $(foreach a,$(CC_ARCHES_32), \
+ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
+CROSS32CC := $(CROSS32_COMPILE)gcc
+export CROSS32CC
+
+# Set default cross compiler for kernel build
ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
@@ -155,14 +161,29 @@ Image: vmlinux
bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-ifdef CONFIG_PARISC_SELF_EXTRACT
vmlinuz: bzImage
$(OBJCOPY) $(boot)/bzImage $@
-else
-vmlinuz: vmlinux
- @$(KGZIP) -cf -9 $< > $@
+
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+ $(if $(CONFIG_64BIT),$(Q)$(MAKE) \
+ $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h)
+ $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h
endif
+PHONY += vdso_install
+
+vdso_install:
+ $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@
+ $(if $(CONFIG_COMPAT_VDSO), \
+ $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@)
install:
$(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \
$(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 53061cb2cf7f..a5fee10d76ee 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -210,7 +210,6 @@ CONFIG_TMPFS_XATTR=y
CONFIG_NFS_FS=m
# CONFIG_NFS_V2 is not set
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 6369082c6c74..ea0cb318b13d 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -47,6 +47,12 @@
#define PRIV_USER 3
#define PRIV_KERNEL 0
+/* Space register used inside kernel */
+#define SR_KERNEL 0
+#define SR_TEMP1 1
+#define SR_TEMP2 2
+#define SR_USER 3
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_64BIT
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 0ec9cfc5131f..56ffd260c669 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -12,6 +12,14 @@
#include <asm/barrier.h>
#include <linux/atomic.h>
+/* compiler build environment sanity checks: */
+#if !defined(CONFIG_64BIT) && defined(__LP64__)
+#error "Please use 'ARCH=parisc' to build the 32-bit kernel."
+#endif
+#if defined(CONFIG_64BIT) && !defined(__LP64__)
+#error "Please use 'ARCH=parisc64' to build the 64-bit kernel."
+#endif
+
/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
* on use of volatile and __*_bit() (set/clear/change):
* *_bit() want use of volatile.
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index d53e9e27dba0..5032e758594e 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -39,16 +39,13 @@ extern int icache_stride;
extern struct pdc_cache_info cache_info;
void parisc_setup_cache_timing(void);
-#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \
+#define pdtlb(sr, addr) asm volatile("pdtlb 0(%%sr%0,%1)" \
ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
- : : "r" (addr) : "memory")
-#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \
+ : : "i"(sr), "r" (addr) : "memory")
+#define pitlb(sr, addr) asm volatile("pitlb 0(%%sr%0,%1)" \
ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \
- : : "r" (addr) : "memory")
-#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \
- ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
- : : "r" (addr) : "memory")
+ : : "i"(sr), "r" (addr) : "memory")
#define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \
ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 859b8a34adcf..e8b4a03343d3 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -9,16 +9,11 @@
/* The usual comment is "Caches aren't brain-dead on the <architecture>".
* Unfortunately, that doesn't apply to PA-RISC. */
-/* Internal implementation */
-void flush_data_cache_local(void *); /* flushes local data-cache only */
-void flush_instruction_cache_local(void *); /* flushes local code-cache only */
-#ifdef CONFIG_SMP
-void flush_data_cache(void); /* flushes data-cache only (all processors) */
-void flush_instruction_cache(void); /* flushes i-cache only (all processors) */
-#else
-#define flush_data_cache() flush_data_cache_local(NULL)
-#define flush_instruction_cache() flush_instruction_cache_local(NULL)
-#endif
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_TRUE(parisc_has_cache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_dcache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_icache);
#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h
index 568b739e42af..dc7aea07c3f3 100644
--- a/arch/parisc/include/asm/current.h
+++ b/arch/parisc/include/asm/current.h
@@ -2,14 +2,16 @@
#ifndef _ASM_PARISC_CURRENT_H
#define _ASM_PARISC_CURRENT_H
-#include <asm/special_insns.h>
-
#ifndef __ASSEMBLY__
struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
- return (struct task_struct *) mfctl(30);
+ struct task_struct *ts;
+
+ /* do not use mfctl() macro as it is marked volatile */
+ asm( "mfctl %%cr30,%0" : "=r" (ts) );
+ return ts;
}
#define current get_current()
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3bd465a27791..cc426d365892 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -359,4 +359,19 @@ struct mm_struct;
extern unsigned long arch_randomize_brk(struct mm_struct *);
#define arch_randomize_brk arch_randomize_brk
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack);
+#define VDSO_AUX_ENT(a, b) NEW_AUX_ENT(a, b)
+#define VDSO_CURRENT_BASE current->mm->context.vdso_base
+
+#define ARCH_DLINFO \
+do { \
+ if (VDSO_CURRENT_BASE) { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\
+ } \
+} while (0)
+
#endif
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index 904034da4974..0a175ac87698 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -18,8 +18,9 @@
#include <linux/notifier.h>
#define PARISC_KPROBES_BREAK_INSN 0x3ff801f
+#define PARISC_KPROBES_BREAK_INSN2 0x3ff801e
#define __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE 1
+#define MAX_INSN_SIZE 2
typedef u32 kprobe_opcode_t;
struct kprobe;
@@ -29,7 +30,7 @@ void arch_remove_kprobe(struct kprobe *p);
#define flush_insn_slot(p) \
flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \
(unsigned long)&(p)->ainsn.insn[0] + \
- sizeof(kprobe_opcode_t))
+ MAX_INSN_SIZE*sizeof(kprobe_opcode_t))
#define kretprobe_blacklist_size 0
diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h
index 3fb70a601d5c..44fd062b62ed 100644
--- a/arch/parisc/include/asm/mmu.h
+++ b/arch/parisc/include/asm/mmu.h
@@ -2,7 +2,9 @@
#ifndef _PARISC_MMU_H_
#define _PARISC_MMU_H_
-/* On parisc, we store the space id here */
-typedef unsigned long mm_context_t;
+typedef struct {
+ unsigned long space_id;
+ unsigned long vdso_base;
+} mm_context_t;
#endif /* _PARISC_MMU_H_ */
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 726257648d9f..c9187fe836a3 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
BUG_ON(atomic_read(&mm->mm_users) != 1);
- mm->context = alloc_sid();
+ mm->context.space_id = alloc_sid();
return 0;
}
@@ -28,22 +28,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
static inline void
destroy_context(struct mm_struct *mm)
{
- free_sid(mm->context);
- mm->context = 0;
+ free_sid(mm->context.space_id);
+ mm->context.space_id = 0;
}
static inline unsigned long __space_to_prot(mm_context_t context)
{
#if SPACEID_SHIFT == 0
- return context << 1;
+ return context.space_id << 1;
#else
- return context >> (SPACEID_SHIFT - 1);
+ return context.space_id >> (SPACEID_SHIFT - 1);
#endif
}
static inline void load_context(mm_context_t context)
{
- mtsp(context, 3);
+ mtsp(context.space_id, SR_USER);
mtctl(__space_to_prot(context), 8);
}
@@ -89,8 +89,8 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
BUG_ON(next == &init_mm); /* Should never happen */
- if (next->context == 0)
- next->context = alloc_sid();
+ if (next->context.space_id == 0)
+ next->context.space_id = alloc_sid();
switch_mm(prev,next,current);
}
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 3e7cf882639f..939db6fe620b 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -70,9 +70,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
unsigned long flags;
purge_tlb_start(flags);
- mtsp(mm->context, 1);
- pdtlb(addr);
- pitlb(addr);
+ mtsp(mm->context.space_id, SR_TEMP1);
+ pdtlb(SR_TEMP1, addr);
+ pitlb(SR_TEMP1, addr);
purge_tlb_end(flags);
}
@@ -219,9 +219,10 @@ extern void __update_cache(pte_t pte);
#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT))
#define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT))
#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT))
+#define _PAGE_SPECIAL (_PAGE_DMB)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
#define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC)
#define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE)
@@ -348,6 +349,7 @@ static inline void pud_clear(pud_t *pud) {
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
@@ -355,6 +357,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; }
/*
* Huge pte definitions.
@@ -405,6 +408,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return ((unsigned long) __va(pmd_address(pmd)));
}
+#define pmd_pfn(pmd) (pmd_address(pmd) >> PAGE_SHIFT)
#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 3a3d05438408..006364212795 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -236,7 +236,7 @@ on downward growing arches, it looks like this:
#define start_thread(regs, new_pc, new_sp) do { \
elf_addr_t *sp = (elf_addr_t *)new_sp; \
- __u32 spaceid = (__u32)current->mm->context; \
+ __u32 spaceid = (__u32)current->mm->context.space_id; \
elf_addr_t pc = (elf_addr_t)new_pc | 3; \
elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \
\
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 2b3010ade00e..bb7fb4153327 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,16 +2,8 @@
#ifndef _ASM_PARISC_RT_SIGFRAME_H
#define _ASM_PARISC_RT_SIGFRAME_H
-#define SIGRETURN_TRAMP 4
-#define SIGRESTARTBLOCK_TRAMP 5
-#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
-
struct rt_sigframe {
- /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c
- Secondary to that it must protect the ERESTART_RESTARTBLOCK
- trampoline we left on the stack (we were bad and didn't
- change sp so we could run really fast.) */
- unsigned int tramp[TRAMP_SIZE];
+ unsigned int tramp[2]; /* holds original return address */
struct siginfo info;
struct ucontext uc;
};
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index 16ee41e77174..41b3ddbd344c 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -55,8 +55,8 @@ static inline void set_eiem(unsigned long val)
#define mfsp(reg) ({ \
unsigned long cr; \
__asm__ __volatile__( \
- "mfsp " #reg ",%0" : \
- "=r" (cr) \
+ "mfsp %%sr%1,%0" \
+ : "=r" (cr) : "i"(reg) \
); \
cr; \
})
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index c5ded01d45be..5ffd7c17f593 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -17,7 +17,7 @@ int __flush_tlb_range(unsigned long sid,
unsigned long start, unsigned long end);
#define flush_tlb_range(vma, start, end) \
- __flush_tlb_range((vma)->vm_mm->context, start, end)
+ __flush_tlb_range((vma)->vm_mm->context.space_id, start, end)
#define flush_tlb_kernel_range(start, end) \
__flush_tlb_range(0, start, end)
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
index 34619f010c63..0ccdb738a9a3 100644
--- a/arch/parisc/include/asm/traps.h
+++ b/arch/parisc/include/asm/traps.h
@@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst);
const char *trap_name(unsigned long code);
void do_page_fault(struct pt_regs *regs, unsigned long code,
unsigned long address);
+int handle_nadtlb_fault(struct pt_regs *regs);
#endif
#endif
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index ebf8a845b017..b3eb4541e441 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -79,18 +79,18 @@ struct exception_table_entry {
#define __get_user(val, ptr) \
({ \
- __get_user_internal("%%sr3,", val, ptr); \
+ __get_user_internal(SR_USER, val, ptr); \
})
#define __get_user_asm(sr, val, ldx, ptr) \
{ \
register long __gu_val; \
\
- __asm__("1: " ldx " 0(" sr "%2),%0\n" \
+ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- : "=r"(__gu_val), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err)); \
+ : "=r"(__gu_val), "+r"(__gu_err) \
+ : "i"(sr), "r"(ptr)); \
\
(val) = (__force __typeof__(*(ptr))) __gu_val; \
}
@@ -100,7 +100,7 @@ struct exception_table_entry {
{ \
type __z; \
long __err; \
- __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+ __err = __get_user_internal(SR_KERNEL, __z, (type *)(src)); \
if (unlikely(__err)) \
goto err_label; \
else \
@@ -118,13 +118,13 @@ struct exception_table_entry {
} __gu_tmp; \
\
__asm__(" copy %%r0,%R0\n" \
- "1: ldw 0(" sr "%2),%0\n" \
- "2: ldw 4(" sr "%2),%R0\n" \
+ "1: ldw 0(%%sr%2,%3),%0\n" \
+ "2: ldw 4(%%sr%2,%3),%R0\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
- : "=&r"(__gu_tmp.l), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err)); \
+ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \
+ : "i"(sr), "r"(ptr)); \
\
(val) = __gu_tmp.t; \
}
@@ -135,13 +135,12 @@ struct exception_table_entry {
#define __put_user_internal(sr, x, ptr) \
({ \
ASM_EXCEPTIONTABLE_VAR(__pu_err); \
- __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \
\
switch (sizeof(*(ptr))) { \
- case 1: __put_user_asm(sr, "stb", __x, ptr); break; \
- case 2: __put_user_asm(sr, "sth", __x, ptr); break; \
- case 4: __put_user_asm(sr, "stw", __x, ptr); break; \
- case 8: STD_USER(sr, __x, ptr); break; \
+ case 1: __put_user_asm(sr, "stb", x, ptr); break; \
+ case 2: __put_user_asm(sr, "sth", x, ptr); break; \
+ case 4: __put_user_asm(sr, "stw", x, ptr); break; \
+ case 8: STD_USER(sr, x, ptr); break; \
default: BUILD_BUG(); \
} \
\
@@ -150,14 +149,16 @@ struct exception_table_entry {
#define __put_user(x, ptr) \
({ \
- __put_user_internal("%%sr3,", x, ptr); \
+ __typeof__(&*(ptr)) __ptr = ptr; \
+ __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \
+ __put_user_internal(SR_USER, __x, __ptr); \
})
#define __put_kernel_nofault(dst, src, type, err_label) \
{ \
type __z = *(type *)(src); \
long __err; \
- __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+ __err = __put_user_internal(SR_KERNEL, __z, (type *)(dst)); \
if (unlikely(__err)) \
goto err_label; \
}
@@ -177,24 +178,24 @@ struct exception_table_entry {
#define __put_user_asm(sr, stx, x, ptr) \
__asm__ __volatile__ ( \
- "1: " stx " %2,0(" sr "%1)\n" \
+ "1: " stx " %1,0(%%sr%2,%3)\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- : "=r"(__pu_err) \
- : "r"(ptr), "r"(x), "0"(__pu_err))
+ : "+r"(__pu_err) \
+ : "r"(x), "i"(sr), "r"(ptr))
#if !defined(CONFIG_64BIT)
#define __put_user_asm64(sr, __val, ptr) do { \
__asm__ __volatile__ ( \
- "1: stw %2,0(" sr "%1)\n" \
- "2: stw %R2,4(" sr "%1)\n" \
+ "1: stw %1,0(%%sr%2,%3)\n" \
+ "2: stw %R1,4(%%sr%2,%3)\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
- : "=r"(__pu_err) \
- : "r"(ptr), "r"(__val), "0"(__pu_err)); \
+ : "+r"(__pu_err) \
+ : "r"(__val), "i"(sr), "r"(ptr)); \
} while (0)
#endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index cd438e4150f6..7708a5806f09 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -63,10 +63,6 @@
); \
__sys_res = (long)__res; \
} \
- if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){ \
- errno = -__sys_res; \
- __sys_res = -1; \
- } \
__sys_res; \
})
diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h
new file mode 100644
index 000000000000..ef8206193f82
--- /dev/null
+++ b/arch/parisc/include/asm/vdso.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_VDSO_H__
+#define __PARISC_VDSO_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_64BIT
+#include <generated/vdso64-offsets.h>
+#endif
+#include <generated/vdso32-offsets.h>
+
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+
+extern struct vdso_data *vdso_data;
+
+#endif /* __ASSEMBLY __ */
+
+/* Default link addresses for the vDSOs */
+#define VDSO_LBASE 0
+
+#define VDSO_VERSION_STRING LINUX_5.18
+
+#endif /* __PARISC_VDSO_H__ */
diff --git a/arch/parisc/include/uapi/asm/auxvec.h b/arch/parisc/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000000..90d2aa699cf3
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/auxvec.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PARISC_AUXVEC_H
+#define _UAPI_PARISC_AUXVEC_H
+
+/* The vDSO location. */
+#define AT_SYSINFO_EHDR 33
+
+#endif /* _UAPI_PARISC_AUXVEC_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 8fb819bbbb17..d579243edc2f 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -39,3 +39,8 @@ obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
+
+# vdso
+obj-y += vdso.o
+obj-$(CONFIG_64BIT) += vdso64/
+obj-y += vdso32/
diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c
index fa28c4c9f972..daa1e9047275 100644
--- a/arch/parisc/kernel/alternative.c
+++ b/arch/parisc/kernel/alternative.c
@@ -8,6 +8,7 @@
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/alternative.h>
+#include <asm/cacheflush.h>
#include <linux/module.h>
@@ -107,5 +108,14 @@ void __init apply_alternatives_all(void)
apply_alternatives((struct alt_instr *) &__alt_instructions,
(struct alt_instr *) &__alt_instructions_end, NULL);
+ if (cache_info.dc_size == 0 && cache_info.ic_size == 0) {
+ pr_info("alternatives: optimizing cache-flushes.\n");
+ static_branch_disable(&parisc_has_cache);
+ }
+ if (cache_info.dc_size == 0)
+ static_branch_disable(&parisc_has_dcache);
+ if (cache_info.ic_size == 0)
+ static_branch_disable(&parisc_has_icache);
+
set_kernel_text_rw(0);
}
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 2a83ef36d216..2673d57eeb00 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -26,7 +26,11 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/pdc.h>
+#include <uapi/asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/rt_sigframe.h>
#include <linux/uaccess.h>
+#include "signal32.h"
/* Add FRAME_SIZE to the size x and align it to y. All definitions
* that use align_frame will include space for a frame.
@@ -218,6 +222,11 @@ int main(void)
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count));
BLANK();
+ DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE);
+ DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE);
+ DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32);
+ DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32);
+ BLANK();
DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base));
DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride));
DEFINE(ICACHE_COUNT, offsetof(struct pdc_cache_info, ic_count));
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 94150b91c96f..456e879d34a8 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -38,6 +38,9 @@ EXPORT_SYMBOL(flush_dcache_page_asm);
void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
+/* Internal implementation in arch/parisc/kernel/pacache.S */
+void flush_data_cache_local(void *); /* flushes local data-cache only */
+void flush_instruction_cache_local(void); /* flushes local code-cache only */
/* On some machines (i.e., ones with the Merced bus), there can be
* only a single PxTLB broadcast at a time; this must be guaranteed
@@ -58,26 +61,35 @@ struct pdc_cache_info cache_info __ro_after_init;
static struct pdc_btlb_info btlb_info __ro_after_init;
#endif
-#ifdef CONFIG_SMP
-void
-flush_data_cache(void)
+DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_dcache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_icache);
+
+static void cache_flush_local_cpu(void *dummy)
{
- on_each_cpu(flush_data_cache_local, NULL, 1);
+ if (static_branch_likely(&parisc_has_icache))
+ flush_instruction_cache_local();
+ if (static_branch_likely(&parisc_has_dcache))
+ flush_data_cache_local(NULL);
}
-void
-flush_instruction_cache(void)
+
+void flush_cache_all_local(void)
{
- on_each_cpu(flush_instruction_cache_local, NULL, 1);
+ cache_flush_local_cpu(NULL);
}
-#endif
-void
-flush_cache_all_local(void)
+void flush_cache_all(void)
{
- flush_instruction_cache_local(NULL);
- flush_data_cache_local(NULL);
+ if (static_branch_likely(&parisc_has_cache))
+ on_each_cpu(cache_flush_local_cpu, NULL, 1);
}
-EXPORT_SYMBOL(flush_cache_all_local);
+
+static inline void flush_data_cache(void)
+{
+ if (static_branch_likely(&parisc_has_dcache))
+ on_each_cpu(flush_data_cache_local, NULL, 1);
+}
+
/* Virtual address of pfn. */
#define pfn_va(pfn) __va(PFN_PHYS(pfn))
@@ -303,6 +315,8 @@ static inline void
__flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
unsigned long physaddr)
{
+ if (!static_branch_likely(&parisc_has_cache))
+ return;
preempt_disable();
flush_dcache_page_asm(physaddr, vmaddr);
if (vma->vm_flags & VM_EXEC)
@@ -314,6 +328,8 @@ static inline void
__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
unsigned long physaddr)
{
+ if (!static_branch_likely(&parisc_has_cache))
+ return;
preempt_disable();
purge_dcache_page_asm(physaddr, vmaddr);
if (vma->vm_flags & VM_EXEC)
@@ -375,7 +391,6 @@ EXPORT_SYMBOL(flush_dcache_page);
/* Defined in arch/parisc/kernel/pacache.S */
EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_data_cache_local);
EXPORT_SYMBOL(flush_kernel_icache_range_asm);
#define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
@@ -388,7 +403,7 @@ void __init parisc_setup_cache_timing(void)
{
unsigned long rangetime, alltime;
unsigned long size;
- unsigned long threshold;
+ unsigned long threshold, threshold2;
alltime = mfctl(16);
flush_data_cache();
@@ -403,8 +418,20 @@ void __init parisc_setup_cache_timing(void)
alltime, size, rangetime);
threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
- if (threshold > cache_info.dc_size)
- threshold = cache_info.dc_size;
+
+ /*
+ * The threshold computed above isn't very reliable since the
+ * flush times depend greatly on the percentage of dirty lines
+ * in the flush range. Further, the whole cache time doesn't
+ * include the time to refill lines that aren't in the mm/vma
+ * being flushed. By timing glibc build and checks on mako cpus,
+ * the following formula seems to work reasonably well. The
+ * value from the timing calculation is too small, and increases
+ * build and check times by almost a factor two.
+ */
+ threshold2 = cache_info.dc_size * num_online_cpus();
+ if (threshold2 > threshold)
+ threshold = threshold2;
if (threshold)
parisc_cache_flush_threshold = threshold;
printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
@@ -457,7 +484,7 @@ void flush_kernel_dcache_page_addr(void *addr)
flush_kernel_dcache_page_asm(addr);
purge_tlb_start(flags);
- pdtlb_kernel(addr);
+ pdtlb(SR_KERNEL, addr);
purge_tlb_end(flags);
}
EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
@@ -496,25 +523,15 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
but cause a purge request to be broadcast to other TLBs. */
while (start < end) {
purge_tlb_start(flags);
- mtsp(sid, 1);
- pdtlb(start);
- pitlb(start);
+ mtsp(sid, SR_TEMP1);
+ pdtlb(SR_TEMP1, start);
+ pitlb(SR_TEMP1, start);
purge_tlb_end(flags);
start += PAGE_SIZE;
}
return 0;
}
-static void cacheflush_h_tmp_function(void *dummy)
-{
- flush_cache_all_local();
-}
-
-void flush_cache_all(void)
-{
- on_each_cpu(cacheflush_h_tmp_function, NULL, 1);
-}
-
static inline unsigned long mm_total_size(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -558,15 +575,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
}
}
-static void flush_user_cache_tlb(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- flush_user_dcache_range_asm(start, end);
- if (vma->vm_flags & VM_EXEC)
- flush_user_icache_range_asm(start, end);
- flush_tlb_range(vma, start, end);
-}
-
void flush_cache_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -575,23 +583,14 @@ void flush_cache_mm(struct mm_struct *mm)
rp3440, etc. So, avoid it if the mm isn't too big. */
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
mm_total_size(mm) >= parisc_cache_flush_threshold) {
- if (mm->context)
+ if (mm->context.space_id)
flush_tlb_all();
flush_cache_all();
return;
}
- preempt_disable();
- if (mm->context == mfsp(3)) {
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end);
- preempt_enable();
- return;
- }
-
for (vma = mm->mmap; vma; vma = vma->vm_next)
flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
- preempt_enable();
}
void flush_cache_range(struct vm_area_struct *vma,
@@ -599,29 +598,21 @@ void flush_cache_range(struct vm_area_struct *vma,
{
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
end - start >= parisc_cache_flush_threshold) {
- if (vma->vm_mm->context)
+ if (vma->vm_mm->context.space_id)
flush_tlb_range(vma, start, end);
flush_cache_all();
return;
}
- preempt_disable();
- if (vma->vm_mm->context == mfsp(3)) {
- flush_user_cache_tlb(vma, start, end);
- preempt_enable();
- return;
- }
-
- flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end);
- preempt_enable();
+ flush_cache_pages(vma, vma->vm_mm, start, end);
}
void
flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
if (pfn_valid(pfn)) {
- if (likely(vma->vm_mm->context)) {
- flush_tlb_page(vma, vmaddr);
+ flush_tlb_page(vma, vmaddr);
+ if (likely(vma->vm_mm->context.space_id)) {
__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
} else {
__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
@@ -633,6 +624,7 @@ void flush_kernel_vmap_range(void *vaddr, int size)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
+ unsigned long flags, physaddr;
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
(unsigned long)size >= parisc_cache_flush_threshold) {
@@ -641,8 +633,14 @@ void flush_kernel_vmap_range(void *vaddr, int size)
return;
}
- flush_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ while (start < end) {
+ physaddr = lpa(start);
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, start);
+ purge_tlb_end(flags);
+ flush_dcache_page_asm(physaddr, start);
+ start += PAGE_SIZE;
+ }
}
EXPORT_SYMBOL(flush_kernel_vmap_range);
@@ -650,6 +648,7 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
+ unsigned long flags, physaddr;
if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
(unsigned long)size >= parisc_cache_flush_threshold) {
@@ -658,7 +657,13 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
return;
}
- purge_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ while (start < end) {
+ physaddr = lpa(start);
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, start);
+ purge_tlb_end(flags);
+ purge_dcache_page_asm(physaddr, start);
+ start += PAGE_SIZE;
+ }
}
EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 6e9cdb269862..ecf50159359e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1288,74 +1288,12 @@ nadtlb_check_alias_20:
nadtlb_emulate:
/*
- * Non access misses can be caused by fdc,fic,pdc,lpa,probe and
- * probei instructions. We don't want to fault for these
- * instructions (not only does it not make sense, it can cause
- * deadlocks, since some flushes are done with the mmap
- * semaphore held). If the translation doesn't exist, we can't
- * insert a translation, so have to emulate the side effects
- * of the instruction. Since we don't insert a translation
- * we can get a lot of faults during a flush loop, so it makes
- * sense to try to do it here with minimum overhead. We only
- * emulate fdc,fic,pdc,probew,prober instructions whose base
- * and index registers are not shadowed. We defer everything
- * else to the "slow" path.
+ * Non-access misses can be caused by fdc,fic,pdc,lpa,probe and
+ * probei instructions. The kernel no longer faults doing flushes.
+ * Use of lpa and probe instructions is rare. Given the issue
+ * with shadow registers, we defer everything to the "slow" path.
*/
-
- mfctl %cr19,%r9 /* Get iir */
-
- /* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits.
- Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */
-
- /* Checks for fdc,fdce,pdc,"fic,4f" only */
- ldi 0x280,%r16
- and %r9,%r16,%r17
- cmpb,<>,n %r16,%r17,nadtlb_probe_check
- bb,>=,n %r9,26,nadtlb_nullify /* m bit not set, just nullify */
- BL get_register,%r25
- extrw,u %r9,15,5,%r8 /* Get index register # */
- cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
- copy %r1,%r24
- BL get_register,%r25
- extrw,u %r9,10,5,%r8 /* Get base register # */
- cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
- BL set_register,%r25
- add,l %r1,%r24,%r1 /* doesn't affect c/b bits */
-
-nadtlb_nullify:
- mfctl %ipsw,%r8
- ldil L%PSW_N,%r9
- or %r8,%r9,%r8 /* Set PSW_N */
- mtctl %r8,%ipsw
-
- rfir
- nop
-
- /*
- When there is no translation for the probe address then we
- must nullify the insn and return zero in the target register.
- This will indicate to the calling code that it does not have
- write/read privileges to this address.
-
- This should technically work for prober and probew in PA 1.1,
- and also probe,r and probe,w in PA 2.0
-
- WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN!
- THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET.
-
- */
-nadtlb_probe_check:
- ldi 0x80,%r16
- and %r9,%r16,%r17
- cmpb,<>,n %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/
- BL get_register,%r25 /* Find the target register */
- extrw,u %r9,31,5,%r8 /* Get target register */
- cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */
- BL set_register,%r25
- copy %r0,%r1 /* Write zero to target register */
- b nadtlb_nullify /* Nullify return insn */
- nop
-
+ b,n nadtlb_fault
#ifdef CONFIG_64BIT
itlb_miss_20w:
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index e2bdb5a5f93e..3343d2fb7889 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -5,6 +5,7 @@
* PA-RISC kprobes implementation
*
* Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ * Copyright (c) 2022 Helge Deller <deller@gmx.de>
*/
#include <linux/types.h>
@@ -25,9 +26,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if (!p->ainsn.insn)
return -ENOMEM;
- memcpy(p->ainsn.insn, p->addr,
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ /*
+ * Set up new instructions. Second break instruction will
+ * trigger call of parisc_kprobe_ss_handler().
+ */
p->opcode = *p->addr;
+ p->ainsn.insn[0] = p->opcode;
+ p->ainsn.insn[1] = PARISC_KPROBES_BREAK_INSN2;
+
flush_insn_slot(p);
return 0;
}
@@ -73,9 +79,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
{
kcb->iaoq[0] = regs->iaoq[0];
kcb->iaoq[1] = regs->iaoq[1];
- regs->iaoq[0] = (unsigned long)p->ainsn.insn;
- mtctl(0, 0);
- regs->gr[0] |= PSW_R;
+ instruction_pointer_set(regs, (unsigned long)p->ainsn.insn);
}
int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs)
@@ -165,9 +169,8 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
regs->iaoq[0] = kcb->iaoq[1];
break;
default:
- regs->iaoq[1] = kcb->iaoq[0];
- regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4;
regs->iaoq[0] = kcb->iaoq[1];
+ regs->iaoq[1] = regs->iaoq[0] + 4;
break;
}
kcb->kprobe_status = KPROBE_HIT_SSDONE;
@@ -191,14 +194,17 @@ static struct kprobe trampoline_p = {
static int __kprobes trampoline_probe_handler(struct kprobe *p,
struct pt_regs *regs)
{
- unsigned long orig_ret_address;
-
- orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
- instruction_pointer_set(regs, orig_ret_address);
+ __kretprobe_trampoline_handler(regs, NULL);
return 1;
}
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+ kprobe_opcode_t *correct_ret_addr)
+{
+ regs->gr[2] = (unsigned long)correct_ret_addr;
+}
+
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 36a57aa38e87..160996f2198e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -91,7 +91,7 @@ static inline int map_pte_uncached(pte_t * pte,
printk(KERN_ERR "map_pte_uncached: page already exists\n");
purge_tlb_start(flags);
set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
- pdtlb_kernel(orig_vaddr);
+ pdtlb(SR_KERNEL, orig_vaddr);
purge_tlb_end(flags);
vaddr += PAGE_SIZE;
orig_vaddr += PAGE_SIZE;
@@ -175,7 +175,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr,
pte_clear(&init_mm, vaddr, pte);
purge_tlb_start(flags);
- pdtlb_kernel(orig_vaddr);
+ pdtlb(SR_KERNEL, orig_vaddr);
purge_tlb_end(flags);
vaddr += PAGE_SIZE;
orig_vaddr += PAGE_SIZE;
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 46b1050640b8..24443908f905 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -1,16 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * linux/arch/parisc/kernel/signal.c: Architecture-specific signal
- * handling support.
+ * PA-RISC architecture-specific signal handling support.
*
* Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
* Copyright (C) 2000 Linuxcare, Inc.
+ * Copyright (C) 2000-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
*
* Based on the ia64, i386, and alpha versions.
- *
- * Like the IA-64, we are a recent enough port (we are *starting*
- * with glibc2.2) that we do not need to support the old non-realtime
- * Linux signals. Therefore we don't.
*/
#include <linux/sched.h>
@@ -32,6 +29,7 @@
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/asm-offsets.h>
+#include <asm/vdso.h>
#ifdef CONFIG_COMPAT
#include "signal32.h"
@@ -59,14 +57,6 @@
* Do a signal return - restore sigcontext.
*/
-/* Trampoline for calling rt_sigreturn() */
-#define INSN_LDI_R25_0 0x34190000 /* ldi 0,%r25 (in_syscall=0) */
-#define INSN_LDI_R25_1 0x34190002 /* ldi 1,%r25 (in_syscall=1) */
-#define INSN_LDI_R20 0x3414015a /* ldi __NR_rt_sigreturn,%r20 */
-#define INSN_BLE_SR2_R0 0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */
-/* For debugging */
-#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */
-
static long
restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
{
@@ -77,9 +67,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
err |= __copy_from_user(regs->iaoq, sc->sc_iaoq, sizeof(regs->iaoq));
err |= __copy_from_user(regs->iasq, sc->sc_iasq, sizeof(regs->iasq));
err |= __get_user(regs->sar, &sc->sc_sar);
- DBG(2,"restore_sigcontext: iaoq is %#lx / %#lx\n",
- regs->iaoq[0],regs->iaoq[1]);
- DBG(2,"restore_sigcontext: r28 is %ld\n", regs->gr[28]);
+ DBG(2, "%s: iaoq is %#lx / %#lx\n",
+ __func__, regs->iaoq[0], regs->iaoq[1]);
+ DBG(2, "%s: r28 is %ld\n", __func__, regs->gr[28]);
return err;
}
@@ -102,7 +92,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
/* Unwind the user stack to get the rt_sigframe structure. */
frame = (struct rt_sigframe __user *)
(usp - sigframe_size);
- DBG(2,"sys_rt_sigreturn: frame is %p\n", frame);
+ DBG(2, "%s: frame is %p pid %d\n", __func__, frame, task_pid_nr(current));
regs->orig_r28 = 1; /* no restarts for sigreturn */
@@ -110,7 +100,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
compat_frame = (struct compat_rt_sigframe __user *)frame;
if (is_compat_task()) {
- DBG(2,"sys_rt_sigreturn: ELF32 process.\n");
if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask))
goto give_sigsegv;
} else
@@ -125,25 +114,25 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
/* Good thing we saved the old gr[30], eh? */
#ifdef CONFIG_64BIT
if (is_compat_task()) {
- DBG(1,"sys_rt_sigreturn: compat_frame->uc.uc_mcontext 0x%p\n",
- &compat_frame->uc.uc_mcontext);
+ DBG(1, "%s: compat_frame->uc.uc_mcontext 0x%p\n",
+ __func__, &compat_frame->uc.uc_mcontext);
// FIXME: Load upper half from register file
if (restore_sigcontext32(&compat_frame->uc.uc_mcontext,
&compat_frame->regs, regs))
goto give_sigsegv;
- DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n",
- usp, &compat_frame->uc.uc_stack);
+ DBG(1, "%s: usp %#08lx stack 0x%p\n",
+ __func__, usp, &compat_frame->uc.uc_stack);
if (compat_restore_altstack(&compat_frame->uc.uc_stack))
goto give_sigsegv;
} else
#endif
{
- DBG(1,"sys_rt_sigreturn: frame->uc.uc_mcontext 0x%p\n",
- &frame->uc.uc_mcontext);
+ DBG(1, "%s: frame->uc.uc_mcontext 0x%p\n",
+ __func__, &frame->uc.uc_mcontext);
if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
goto give_sigsegv;
- DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n",
- usp, &frame->uc.uc_stack);
+ DBG(1, "%s: usp %#08lx stack 0x%p\n",
+ __func__, usp, &frame->uc.uc_stack);
if (restore_altstack(&frame->uc.uc_stack))
goto give_sigsegv;
}
@@ -155,14 +144,11 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
*/
if (in_syscall)
regs->gr[31] = regs->iaoq[0];
-#if DEBUG_SIG
- DBG(1,"sys_rt_sigreturn: returning to %#lx, DUMPING REGS:\n", regs->iaoq[0]);
- show_regs(regs);
-#endif
+
return;
give_sigsegv:
- DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n");
+ DBG(1, "%s: Sending SIGSEGV\n", __func__);
force_sig(SIGSEGV);
return;
}
@@ -177,15 +163,15 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size)
/*FIXME: ELF32 vs. ELF64 has different frame_size, but since we
don't use the parameter it doesn't matter */
- DBG(1,"get_sigframe: ka = %#lx, sp = %#lx, frame_size = %#lx\n",
- (unsigned long)ka, sp, frame_size);
+ DBG(1, "%s: ka = %#lx, sp = %#lx, frame_size = %zu\n",
+ __func__, (unsigned long)ka, sp, frame_size);
/* Align alternate stack and reserve 64 bytes for the signal
handler's frame marker. */
if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp))
sp = (current->sas_ss_sp + 0x7f) & ~0x3f; /* Stacks grow up! */
- DBG(1,"get_sigframe: Returning sp = %#lx\n", (unsigned long)sp);
+ DBG(1, "%s: Returning sp = %#lx\n", __func__, (unsigned long)sp);
return (void __user *) sp; /* Stacks grow up. Fun. */
}
@@ -205,20 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, int in_sysc
err |= __put_user(regs->gr[31]+4, &sc->sc_iaoq[1]);
err |= __put_user(regs->sr[3], &sc->sc_iasq[0]);
err |= __put_user(regs->sr[3], &sc->sc_iasq[1]);
- DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (in syscall)\n",
- regs->gr[31], regs->gr[31]+4);
+ DBG(1, "%s: iaoq %#lx / %#lx (in syscall)\n",
+ __func__, regs->gr[31], regs->gr[31]+4);
} else {
err |= __copy_to_user(sc->sc_iaoq, regs->iaoq, sizeof(regs->iaoq));
err |= __copy_to_user(sc->sc_iasq, regs->iasq, sizeof(regs->iasq));
- DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (not in syscall)\n",
- regs->iaoq[0], regs->iaoq[1]);
+ DBG(1, "%s: iaoq %#lx / %#lx (not in syscall)\n",
+ __func__, regs->iaoq[0], regs->iaoq[1]);
}
err |= __put_user(flags, &sc->sc_flags);
err |= __copy_to_user(sc->sc_gr, regs->gr, sizeof(regs->gr));
err |= __copy_to_user(sc->sc_fr, regs->fr, sizeof(regs->fr));
err |= __put_user(regs->sar, &sc->sc_sar);
- DBG(1,"setup_sigcontext: r28 is %ld\n", regs->gr[28]);
+ DBG(1, "%s: r28 is %ld\n", __func__, regs->gr[28]);
return err;
}
@@ -230,7 +216,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
struct rt_sigframe __user *frame;
unsigned long rp, usp;
unsigned long haddr, sigframe_size;
- unsigned long start, end;
+ unsigned long start;
int err = 0;
#ifdef CONFIG_64BIT
struct compat_rt_sigframe __user * compat_frame;
@@ -247,8 +233,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
#endif
frame = get_sigframe(&ksig->ka, usp, sigframe_size);
- DBG(1,"SETUP_RT_FRAME: START\n");
- DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
+ DBG(1, "%s: frame %p info %p\n", __func__, frame, &ksig->info);
start = (unsigned long) frame;
if (start >= user_addr_max() - sigframe_size)
@@ -259,11 +244,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
compat_frame = (struct compat_rt_sigframe __user *)frame;
if (is_compat_task()) {
- DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &compat_frame->info);
+ DBG(1, "%s: frame->info = 0x%p\n", __func__, &compat_frame->info);
err |= copy_siginfo_to_user32(&compat_frame->info, &ksig->info);
err |= __compat_save_altstack( &compat_frame->uc.uc_stack, regs->gr[30]);
- DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &compat_frame->uc);
- DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext);
+ DBG(1, "%s: frame->uc = 0x%p\n", __func__, &compat_frame->uc);
+ DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+ __func__, &compat_frame->uc.uc_mcontext);
err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext,
&compat_frame->regs, regs, in_syscall);
err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set,
@@ -271,11 +257,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
} else
#endif
{
- DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &frame->info);
+ DBG(1, "%s: frame->info = 0x%p\n", __func__, &frame->info);
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= __save_altstack(&frame->uc.uc_stack, regs->gr[30]);
- DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &frame->uc);
- DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &frame->uc.uc_mcontext);
+ DBG(1, "%s: frame->uc = 0x%p\n", __func__, &frame->uc);
+ DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+ __func__, &frame->uc.uc_mcontext);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, in_syscall);
/* FIXME: Should probably be converted as well for the compat case */
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -284,32 +271,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
if (err)
return -EFAULT;
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. The first words of tramp are used to
- save the previous sigrestartblock trampoline that might be
- on the stack. We start the sigreturn trampoline at
- SIGRESTARTBLOCK_TRAMP+X. */
- err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
- &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
- err |= __put_user(INSN_LDI_R20,
- &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
- err |= __put_user(INSN_BLE_SR2_R0,
- &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
- err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
-
- start = (unsigned long) &frame->tramp[0];
- end = (unsigned long) &frame->tramp[TRAMP_SIZE];
- flush_user_dcache_range_asm(start, end);
- flush_user_icache_range_asm(start, end);
-
- /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
- * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
- * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
- */
- rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+#ifdef CONFIG_64BIT
+ if (!is_compat_task())
+ rp = VDSO64_SYMBOL(current, sigtramp_rt);
+ else
+#endif
+ rp = VDSO32_SYMBOL(current, sigtramp_rt);
- if (err)
- return -EFAULT;
+ if (in_syscall)
+ rp += 4*4; /* skip 4 instructions and start at ldi 1,%r25 */
haddr = A(ksig->ka.sa.sa_handler);
/* The sa_handler may be a pointer to a function descriptor */
@@ -340,8 +310,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
haddr = fdesc.addr;
regs->gr[19] = fdesc.gp;
- DBG(1,"setup_rt_frame: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
- haddr, regs->gr[19], in_syscall);
+ DBG(1, "%s: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
+ __func__, haddr, regs->gr[19], in_syscall);
}
#endif
@@ -351,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
regs->gr[31] = haddr;
#ifdef CONFIG_64BIT
if (!test_thread_flag(TIF_32BIT))
- sigframe_size |= 1;
+ sigframe_size |= 1; /* XXX ???? */
#endif
} else {
unsigned long psw = USER_PSW;
@@ -373,11 +343,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
}
regs->gr[0] = psw;
- regs->iaoq[0] = haddr | 3;
+ regs->iaoq[0] = haddr | PRIV_USER;
regs->iaoq[1] = regs->iaoq[0] + 4;
}
- regs->gr[2] = rp; /* userland return pointer */
+ regs->gr[2] = rp; /* userland return pointer */
regs->gr[26] = ksig->sig; /* signal number */
#ifdef CONFIG_64BIT
@@ -391,15 +361,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
regs->gr[24] = A(&frame->uc); /* ucontext pointer */
}
- DBG(1,"setup_rt_frame: making sigreturn frame: %#lx + %#lx = %#lx\n",
+ DBG(1, "%s: making sigreturn frame: %#lx + %#lx = %#lx\n", __func__,
regs->gr[30], sigframe_size,
regs->gr[30] + sigframe_size);
/* Raise the user stack pointer to make a proper call frame. */
regs->gr[30] = (A(frame) + sigframe_size);
- DBG(1,"setup_rt_frame: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
- current->comm, current->pid, frame, regs->gr[30],
+ DBG(1, "%s: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
+ __func__, current->comm, current->pid, frame, regs->gr[30],
regs->iaoq[0], regs->iaoq[1], rp);
return 0;
@@ -415,8 +385,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
int ret;
sigset_t *oldset = sigmask_to_save();
- DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n",
- ksig->sig, ksig->ka, ksig->info, oldset, regs);
+ DBG(1, "%s: sig=%d, ka=%p, info=%p, oldset=%p, regs=%p\n",
+ __func__, ksig->sig, &ksig->ka, &ksig->info, oldset, regs);
/* Set up the stack frame */
ret = setup_rt_frame(ksig, oldset, regs, in_syscall);
@@ -424,8 +394,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP) ||
test_thread_flag(TIF_BLOCKSTEP));
- DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n",
- regs->gr[28]);
+ DBG(1, "%s: Exit (success), regs->gr[28] = %ld\n",
+ __func__, regs->gr[28]);
}
/*
@@ -483,21 +453,27 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
if (regs->orig_r28)
return;
regs->orig_r28 = 1; /* no more restarts */
+
+ DBG(1, "%s: orig_r28 = %ld pid %d r20 %ld\n",
+ __func__, regs->orig_r28, task_pid_nr(current), regs->gr[20]);
+
/* Check the return code */
switch (regs->gr[28]) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
- DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
+ DBG(1, "%s: ERESTARTNOHAND: returning -EINTR\n", __func__);
regs->gr[28] = -EINTR;
break;
case -ERESTARTSYS:
if (!(ka->sa.sa_flags & SA_RESTART)) {
- DBG(1,"ERESTARTSYS: putting -EINTR\n");
+ DBG(1, "%s: ERESTARTSYS: putting -EINTR pid %d\n",
+ __func__, task_pid_nr(current));
regs->gr[28] = -EINTR;
break;
}
fallthrough;
case -ERESTARTNOINTR:
+ DBG(1, "%s: %ld\n", __func__, regs->gr[28]);
check_syscallno_in_delay_branch(regs);
break;
}
@@ -509,50 +485,52 @@ insert_restart_trampoline(struct pt_regs *regs)
if (regs->orig_r28)
return;
regs->orig_r28 = 1; /* no more restarts */
- switch(regs->gr[28]) {
+
+ DBG(2, "%s: gr28 = %ld pid %d\n",
+ __func__, regs->gr[28], task_pid_nr(current));
+
+ switch (regs->gr[28]) {
case -ERESTART_RESTARTBLOCK: {
/* Restart the system call - no handlers present */
unsigned int *usp = (unsigned int *)regs->gr[30];
- unsigned long start = (unsigned long) &usp[2];
- unsigned long end = (unsigned long) &usp[5];
+ unsigned long rp;
long err = 0;
/* check that we don't exceed the stack */
if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
return;
- /* Setup a trampoline to restart the syscall
- * with __NR_restart_syscall
+ /* Call trampoline in vdso to restart the syscall
+ * with __NR_restart_syscall.
+ * Original return addresses are on stack like this:
*
* 0: <return address (orig r31)>
* 4: <2nd half for 64-bit>
- * 8: ldw 0(%sp), %r31
- * 12: be 0x100(%sr2, %r0)
- * 16: ldi __NR_restart_syscall, %r20
*/
#ifdef CONFIG_64BIT
- err |= put_user(regs->gr[31] >> 32, &usp[0]);
- err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
- err |= put_user(0x0fc010df, &usp[2]);
-#else
- err |= put_user(regs->gr[31], &usp[0]);
- err |= put_user(0x0fc0109f, &usp[2]);
+ if (!is_compat_task()) {
+ err |= put_user(regs->gr[31] >> 32, &usp[0]);
+ err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
+ rp = VDSO64_SYMBOL(current, restart_syscall);
+ } else
#endif
- err |= put_user(0xe0008200, &usp[3]);
- err |= put_user(0x34140000, &usp[4]);
-
+ {
+ err |= put_user(regs->gr[31], &usp[0]);
+ rp = VDSO32_SYMBOL(current, restart_syscall);
+ }
WARN_ON(err);
- /* flush data/instruction cache for new insns */
- flush_user_dcache_range_asm(start, end);
- flush_user_icache_range_asm(start, end);
-
- regs->gr[31] = regs->gr[30] + 8;
+ regs->gr[31] = rp;
+ DBG(1, "%s: ERESTART_RESTARTBLOCK\n", __func__);
return;
}
+ case -EINTR:
+ /* ok, was handled before and should be returned. */
+ break;
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
+ DBG(1, "%s: Type %ld\n", __func__, regs->gr[28]);
check_syscallno_in_delay_branch(regs);
return;
default:
@@ -567,30 +545,35 @@ insert_restart_trampoline(struct pt_regs *regs)
* registers). As noted below, the syscall number gets restored for
* us due to the magic of delayed branching.
*/
-asmlinkage void
-do_signal(struct pt_regs *regs, long in_syscall)
+static void do_signal(struct pt_regs *regs, long in_syscall)
{
struct ksignal ksig;
+ int restart_syscall;
+ bool has_handler;
- DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n",
- regs, regs->sr[7], in_syscall);
+ has_handler = get_signal(&ksig);
- if (get_signal(&ksig)) {
- DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]);
+ restart_syscall = 0;
+ if (in_syscall)
+ restart_syscall = 1;
+
+ if (has_handler) {
/* Restart a system call if necessary. */
- if (in_syscall)
+ if (restart_syscall)
syscall_restart(regs, &ksig.ka);
handle_signal(&ksig, regs, in_syscall);
+ DBG(1, "%s: Handled signal pid %d\n",
+ __func__, task_pid_nr(current));
return;
}
- /* Did we come from a system call? */
- if (in_syscall)
+ /* Do we need to restart the system call? */
+ if (restart_syscall)
insert_restart_trampoline(regs);
- DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n",
- regs->gr[28]);
+ DBG(1, "%s: Exit (not delivered), regs->gr[28] = %ld orig_r28 = %ld pid %d\n",
+ __func__, regs->gr[28], regs->orig_r28, task_pid_nr(current));
restore_saved_sigmask();
}
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index f166250f2d06..c03eb1ed4c53 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,21 +36,12 @@ struct compat_regfile {
compat_int_t rf_sar;
};
-#define COMPAT_SIGRETURN_TRAMP 4
-#define COMPAT_SIGRESTARTBLOCK_TRAMP 5
-#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
- COMPAT_SIGRESTARTBLOCK_TRAMP)
-
struct compat_rt_sigframe {
- /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c
- Secondary to that it must protect the ERESTART_RESTARTBLOCK
- trampoline we left on the stack (we were bad and didn't
- change sp so we could run really fast.) */
- compat_uint_t tramp[COMPAT_TRAMP_SIZE];
- compat_siginfo_t info;
- struct compat_ucontext uc;
- /* Hidden location of truncated registers, *must* be last. */
- struct compat_regfile regs;
+ unsigned int tramp[2]; /* holds original return address */
+ compat_siginfo_t info;
+ struct compat_ucontext uc;
+ /* Hidden location of truncated registers, *must* be last. */
+ struct compat_regfile regs;
};
/*
diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c
index 0a10e4ddc528..e88a6ce7c96d 100644
--- a/arch/parisc/kernel/topology.c
+++ b/arch/parisc/kernel/topology.c
@@ -101,8 +101,8 @@ void __init store_cpu_topology(unsigned int cpuid)
update_siblings_masks(cpuid);
- pr_info("CPU%u: thread %d, cpu %d, socket %d\n",
- cpuid, cpu_topology[cpuid].thread_id,
+ pr_info("CPU%u: cpu core %d of socket %d\n",
+ cpuid,
cpu_topology[cpuid].core_id,
cpu_topology[cpuid].socket_id);
}
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b6fdebddc8e9..a6e61cf2cad0 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -302,7 +302,10 @@ static void handle_break(struct pt_regs *regs)
parisc_kprobe_break_handler(regs);
return;
}
-
+ if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) {
+ parisc_kprobe_ss_handler(regs);
+ return;
+ }
#endif
#ifdef CONFIG_KGDB
@@ -539,11 +542,6 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
/* Recovery counter trap */
regs->gr[0] &= ~PSW_R;
-#ifdef CONFIG_KPROBES
- if (parisc_kprobe_ss_handler(regs))
- return;
-#endif
-
#ifdef CONFIG_KGDB
if (kgdb_single_step) {
kgdb_handle_exception(0, SIGTRAP, 0, regs);
@@ -662,6 +660,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
by hand. Technically we need to emulate:
fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw
*/
+ if (code == 17 && handle_nadtlb_fault(regs))
+ return;
fault_address = regs->ior;
fault_space = regs->isr;
break;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 237d20dd5622..ed1e88a74dc4 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -3,14 +3,11 @@
* Unaligned memory access handler
*
* Copyright (C) 2001 Randolph Chung <tausq@debian.org>
+ * Copyright (C) 2022 Helge Deller <deller@gmx.de>
* Significantly tweaked by LaMont Jones <lamont@debian.org>
*/
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
#include <linux/signal.h>
#include <linux/ratelimit.h>
#include <linux/uaccess.h>
@@ -25,18 +22,7 @@
#define DPRINTF(fmt, args...)
#endif
-#ifdef CONFIG_64BIT
-#define RFMT "%016lx"
-#else
-#define RFMT "%08lx"
-#endif
-
-#define FIXUP_BRANCH(lbl) \
- "\tldil L%%" #lbl ", %%r1\n" \
- "\tldo R%%" #lbl "(%%r1), %%r1\n" \
- "\tbv,n %%r0(%%r1)\n"
-/* If you use FIXUP_BRANCH, then you must list this clobber */
-#define FIXUP_BRANCH_CLOBBER "r1"
+#define RFMT "%#08lx"
/* 1111 1100 0000 0000 0001 0011 1100 0000 */
#define OPCODE1(a,b,c) ((a)<<26|(b)<<12|(c)<<6)
@@ -114,37 +100,30 @@
#define IM14(i) IM((i),14)
#define ERR_NOTHANDLED -1
-#define ERR_PAGEFAULT -2
int unaligned_enabled __read_mostly = 1;
static int emulate_ldh(struct pt_regs *regs, int toreg)
{
unsigned long saddr = regs->ior;
- unsigned long val = 0;
- int ret;
+ unsigned long val = 0, temp1;
+ ASM_EXCEPTIONTABLE_VAR(ret);
DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n",
regs->isr, regs->ior, toreg);
__asm__ __volatile__ (
" mtsp %4, %%sr1\n"
-"1: ldbs 0(%%sr1,%3), %%r20\n"
+"1: ldbs 0(%%sr1,%3), %2\n"
"2: ldbs 1(%%sr1,%3), %0\n"
-" depw %%r20, 23, 24, %0\n"
-" copy %%r0, %1\n"
+" depw %2, 23, 24, %0\n"
"3: \n"
-" .section .fixup,\"ax\"\n"
-"4: ldi -2, %1\n"
- FIXUP_BRANCH(3b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
- : "=r" (val), "=r" (ret)
- : "0" (val), "r" (saddr), "r" (regs->isr)
- : "r20", FIXUP_BRANCH_CLOBBER );
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ : "+r" (val), "+r" (ret), "=&r" (temp1)
+ : "r" (saddr), "r" (regs->isr) );
- DPRINTF("val = 0x" RFMT "\n", val);
+ DPRINTF("val = " RFMT "\n", val);
if (toreg)
regs->gr[toreg] = val;
@@ -155,34 +134,28 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
{
unsigned long saddr = regs->ior;
- unsigned long val = 0;
- int ret;
+ unsigned long val = 0, temp1, temp2;
+ ASM_EXCEPTIONTABLE_VAR(ret);
DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n",
regs->isr, regs->ior, toreg);
__asm__ __volatile__ (
-" zdep %3,28,2,%%r19\n" /* r19=(ofs&3)*8 */
-" mtsp %4, %%sr1\n"
-" depw %%r0,31,2,%3\n"
-"1: ldw 0(%%sr1,%3),%0\n"
-"2: ldw 4(%%sr1,%3),%%r20\n"
-" subi 32,%%r19,%%r19\n"
-" mtctl %%r19,11\n"
-" vshd %0,%%r20,%0\n"
-" copy %%r0, %1\n"
+" zdep %4,28,2,%2\n" /* r19=(ofs&3)*8 */
+" mtsp %5, %%sr1\n"
+" depw %%r0,31,2,%4\n"
+"1: ldw 0(%%sr1,%4),%0\n"
+"2: ldw 4(%%sr1,%4),%3\n"
+" subi 32,%4,%2\n"
+" mtctl %2,11\n"
+" vshd %0,%3,%0\n"
"3: \n"
-" .section .fixup,\"ax\"\n"
-"4: ldi -2, %1\n"
- FIXUP_BRANCH(3b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
- ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
- : "=r" (val), "=r" (ret)
- : "0" (val), "r" (saddr), "r" (regs->isr)
- : "r19", "r20", FIXUP_BRANCH_CLOBBER );
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
+ : "r" (saddr), "r" (regs->isr) );
- DPRINTF("val = 0x" RFMT "\n", val);
+ DPRINTF("val = " RFMT "\n", val);
if (flop)
((__u32*)(regs->fr))[toreg] = val;
@@ -195,16 +168,15 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
{
unsigned long saddr = regs->ior;
__u64 val = 0;
- int ret;
+ ASM_EXCEPTIONTABLE_VAR(ret);
DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
regs->isr, regs->ior, toreg);
-#ifdef CONFIG_PA20
-#ifndef CONFIG_64BIT
- if (!flop)
- return -1;
-#endif
+ if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+ return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
__asm__ __volatile__ (
" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */
" mtsp %4, %%sr1\n"
@@ -214,44 +186,32 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
" subi 64,%%r19,%%r19\n"
" mtsar %%r19\n"
" shrpd %0,%%r20,%%sar,%0\n"
-" copy %%r0, %1\n"
"3: \n"
-" .section .fixup,\"ax\"\n"
-"4: ldi -2, %1\n"
- FIXUP_BRANCH(3b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
- : "=r" (val), "=r" (ret)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ : "=r" (val), "+r" (ret)
: "0" (val), "r" (saddr), "r" (regs->isr)
- : "r19", "r20", FIXUP_BRANCH_CLOBBER );
+ : "r19", "r20" );
#else
{
- unsigned long valh=0,vall=0;
+ unsigned long shift, temp1;
__asm__ __volatile__ (
-" zdep %5,29,2,%%r19\n" /* r19=(ofs&3)*8 */
-" mtsp %6, %%sr1\n"
-" dep %%r0,31,2,%5\n"
-"1: ldw 0(%%sr1,%5),%0\n"
-"2: ldw 4(%%sr1,%5),%1\n"
-"3: ldw 8(%%sr1,%5),%%r20\n"
-" subi 32,%%r19,%%r19\n"
-" mtsar %%r19\n"
-" vshd %0,%1,%0\n"
-" vshd %1,%%r20,%1\n"
-" copy %%r0, %2\n"
+" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */
+" mtsp %5, %%sr1\n"
+" dep %%r0,31,2,%2\n"
+"1: ldw 0(%%sr1,%2),%0\n"
+"2: ldw 4(%%sr1,%2),%R0\n"
+"3: ldw 8(%%sr1,%2),%4\n"
+" subi 32,%3,%3\n"
+" mtsar %3\n"
+" vshd %0,%R0,%0\n"
+" vshd %R0,%4,%R0\n"
"4: \n"
-" .section .fixup,\"ax\"\n"
-"5: ldi -2, %2\n"
- FIXUP_BRANCH(4b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,5b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,5b)
- ASM_EXCEPTIONTABLE_ENTRY(3b,5b)
- : "=r" (valh), "=r" (vall), "=r" (ret)
- : "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr)
- : "r19", "r20", FIXUP_BRANCH_CLOBBER );
- val=((__u64)valh<<32)|(__u64)vall;
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+ : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+ : "r" (regs->isr) );
}
#endif
@@ -267,31 +227,25 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
static int emulate_sth(struct pt_regs *regs, int frreg)
{
- unsigned long val = regs->gr[frreg];
- int ret;
+ unsigned long val = regs->gr[frreg], temp1;
+ ASM_EXCEPTIONTABLE_VAR(ret);
if (!frreg)
val = 0;
- DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg,
+ DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg,
val, regs->isr, regs->ior);
__asm__ __volatile__ (
-" mtsp %3, %%sr1\n"
-" extrw,u %1, 23, 8, %%r19\n"
-"1: stb %1, 1(%%sr1, %2)\n"
-"2: stb %%r19, 0(%%sr1, %2)\n"
-" copy %%r0, %0\n"
+" mtsp %4, %%sr1\n"
+" extrw,u %2, 23, 8, %1\n"
+"1: stb %1, 0(%%sr1, %3)\n"
+"2: stb %2, 1(%%sr1, %3)\n"
"3: \n"
-" .section .fixup,\"ax\"\n"
-"4: ldi -2, %0\n"
- FIXUP_BRANCH(3b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
- : "=r" (ret)
- : "r" (val), "r" (regs->ior), "r" (regs->isr)
- : "r19", FIXUP_BRANCH_CLOBBER );
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ : "+r" (ret), "=&r" (temp1)
+ : "r" (val), "r" (regs->ior), "r" (regs->isr) );
return ret;
}
@@ -299,7 +253,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
{
unsigned long val;
- int ret;
+ ASM_EXCEPTIONTABLE_VAR(ret);
if (flop)
val = ((__u32*)(regs->fr))[frreg];
@@ -308,7 +262,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
else
val = 0;
- DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg,
+ DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg,
val, regs->isr, regs->ior);
@@ -328,24 +282,19 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
" or %%r1, %%r21, %%r21\n"
" stw %%r20,0(%%sr1,%2)\n"
" stw %%r21,4(%%sr1,%2)\n"
-" copy %%r0, %0\n"
"3: \n"
-" .section .fixup,\"ax\"\n"
-"4: ldi -2, %0\n"
- FIXUP_BRANCH(3b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
- : "=r" (ret)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+ : "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
- : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+ : "r19", "r20", "r21", "r22", "r1" );
- return 0;
+ return ret;
}
static int emulate_std(struct pt_regs *regs, int frreg, int flop)
{
__u64 val;
- int ret;
+ ASM_EXCEPTIONTABLE_VAR(ret);
if (flop)
val = regs->fr[frreg];
@@ -357,11 +306,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
DPRINTF("store r%d (0x%016llx) to " RFMT ":" RFMT " for 8 bytes\n", frreg,
val, regs->isr, regs->ior);
-#ifdef CONFIG_PA20
-#ifndef CONFIG_64BIT
- if (!flop)
- return -1;
-#endif
+ if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+ return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
__asm__ __volatile__ (
" mtsp %3, %%sr1\n"
" depd,z %2, 60, 3, %%r19\n"
@@ -378,26 +326,21 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
" or %%r1, %%r21, %%r21\n"
"3: std %%r20,0(%%sr1,%2)\n"
"4: std %%r21,8(%%sr1,%2)\n"
-" copy %%r0, %0\n"
"5: \n"
-" .section .fixup,\"ax\"\n"
-"6: ldi -2, %0\n"
- FIXUP_BRANCH(5b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,6b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,6b)
- ASM_EXCEPTIONTABLE_ENTRY(3b,6b)
- ASM_EXCEPTIONTABLE_ENTRY(4b,6b)
- : "=r" (ret)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+ : "+r" (ret)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
- : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+ : "r19", "r20", "r21", "r22", "r1" );
#else
{
unsigned long valh=(val>>32),vall=(val&0xffffffffl);
__asm__ __volatile__ (
" mtsp %4, %%sr1\n"
" zdep %2, 29, 2, %%r19\n"
-" dep %%r0, 31, 2, %2\n"
+" dep %%r0, 31, 2, %3\n"
" mtsar %%r19\n"
" zvdepi -2, 32, %%r19\n"
"1: ldw 0(%%sr1,%3),%%r20\n"
@@ -409,23 +352,18 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
" andcm %%r21, %%r19, %%r21\n"
" or %1, %%r20, %1\n"
" or %2, %%r21, %2\n"
-"3: stw %1,0(%%sr1,%1)\n"
+"3: stw %1,0(%%sr1,%3)\n"
"4: stw %%r1,4(%%sr1,%3)\n"
"5: stw %2,8(%%sr1,%3)\n"
-" copy %%r0, %0\n"
"6: \n"
-" .section .fixup,\"ax\"\n"
-"7: ldi -2, %0\n"
- FIXUP_BRANCH(6b)
-" .previous\n"
- ASM_EXCEPTIONTABLE_ENTRY(1b,7b)
- ASM_EXCEPTIONTABLE_ENTRY(2b,7b)
- ASM_EXCEPTIONTABLE_ENTRY(3b,7b)
- ASM_EXCEPTIONTABLE_ENTRY(4b,7b)
- ASM_EXCEPTIONTABLE_ENTRY(5b,7b)
- : "=r" (ret)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+ : "+r" (ret)
: "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr)
- : "r19", "r20", "r21", "r1", FIXUP_BRANCH_CLOBBER );
+ : "r19", "r20", "r21", "r1" );
}
#endif
@@ -438,7 +376,6 @@ void handle_unaligned(struct pt_regs *regs)
unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
int modify = 0;
int ret = ERR_NOTHANDLED;
- register int flop=0; /* true if this is a flop */
__inc_irq_stat(irq_unaligned_count);
@@ -450,10 +387,10 @@ void handle_unaligned(struct pt_regs *regs)
if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
__ratelimit(&ratelimit)) {
- char buf[256];
- sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
- current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
- printk(KERN_WARNING "%s", buf);
+ printk(KERN_WARNING "%s(%d): unaligned access to " RFMT
+ " at ip " RFMT " (iir " RFMT ")\n",
+ current->comm, task_pid_nr(current), regs->ior,
+ regs->iaoq[0], regs->iir);
#ifdef DEBUG_UNALIGNED
show_regs(regs);
#endif
@@ -547,7 +484,7 @@ void handle_unaligned(struct pt_regs *regs)
ret = emulate_stw(regs, R2(regs->iir),0);
break;
-#ifdef CONFIG_PA20
+#ifdef CONFIG_64BIT
case OPCODE_LDD_I:
case OPCODE_LDDA_I:
case OPCODE_LDD_S:
@@ -565,13 +502,11 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_FLDWS:
case OPCODE_FLDWXR:
case OPCODE_FLDWSR:
- flop=1;
ret = emulate_ldw(regs,FR3(regs->iir),1);
break;
case OPCODE_FLDDX:
case OPCODE_FLDDS:
- flop=1;
ret = emulate_ldd(regs,R3(regs->iir),1);
break;
@@ -579,13 +514,11 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_FSTWS:
case OPCODE_FSTWXR:
case OPCODE_FSTWSR:
- flop=1;
ret = emulate_stw(regs,FR3(regs->iir),1);
break;
case OPCODE_FSTDX:
case OPCODE_FSTDS:
- flop=1;
ret = emulate_std(regs,R3(regs->iir),1);
break;
@@ -596,37 +529,33 @@ void handle_unaligned(struct pt_regs *regs)
ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */
break;
}
-#ifdef CONFIG_PA20
switch (regs->iir & OPCODE2_MASK)
{
case OPCODE_FLDD_L:
- flop=1;
ret = emulate_ldd(regs,R2(regs->iir),1);
break;
case OPCODE_FSTD_L:
- flop=1;
ret = emulate_std(regs, R2(regs->iir),1);
break;
+#ifdef CONFIG_64BIT
case OPCODE_LDD_L:
ret = emulate_ldd(regs, R2(regs->iir),0);
break;
case OPCODE_STD_L:
ret = emulate_std(regs, R2(regs->iir),0);
break;
- }
#endif
+ }
switch (regs->iir & OPCODE3_MASK)
{
case OPCODE_FLDW_L:
- flop=1;
- ret = emulate_ldw(regs, R2(regs->iir),0);
+ ret = emulate_ldw(regs, R2(regs->iir), 1);
break;
case OPCODE_LDW_M:
- ret = emulate_ldw(regs, R2(regs->iir),1);
+ ret = emulate_ldw(regs, R2(regs->iir), 0);
break;
case OPCODE_FSTW_L:
- flop=1;
ret = emulate_stw(regs, R2(regs->iir),1);
break;
case OPCODE_STW_M:
@@ -673,7 +602,7 @@ void handle_unaligned(struct pt_regs *regs)
printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret);
die_if_kernel("Unaligned data reference", regs, 28);
- if (ret == ERR_PAGEFAULT)
+ if (ret == -EFAULT)
{
force_sig_fault(SIGSEGV, SEGV_MAPERR,
(void __user *)regs->ior);
diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c
new file mode 100644
index 000000000000..63dc44c4c246
--- /dev/null
+++ b/arch/parisc/kernel/vdso.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Helge Deller <deller@gmx.de>
+ *
+ * based on arch/s390/kernel/vdso.c which is
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/elf.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/compat.h>
+#include <linux/nsproxy.h>
+#include <linux/time_namespace.h>
+#include <linux/random.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/cacheflush.h>
+
+extern char vdso32_start, vdso32_end;
+extern char vdso64_start, vdso64_end;
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma)
+{
+ current->mm->context.vdso_base = vma->vm_start;
+ return 0;
+}
+
+#ifdef CONFIG_64BIT
+static struct vm_special_mapping vdso64_mapping = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+};
+#endif
+
+static struct vm_special_mapping vdso32_mapping = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack)
+{
+
+ unsigned long vdso_text_start, vdso_text_len, map_base;
+ struct vm_special_mapping *vdso_mapping;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+#ifdef CONFIG_64BIT
+ if (!is_compat_task()) {
+ vdso_text_len = &vdso64_end - &vdso64_start;
+ vdso_mapping = &vdso64_mapping;
+ } else
+#endif
+ {
+ vdso_text_len = &vdso32_end - &vdso32_start;
+ vdso_mapping = &vdso32_mapping;
+ }
+
+ map_base = mm->mmap_base;
+ if (current->flags & PF_RANDOMIZE)
+ map_base -= (get_random_int() & 0x1f) * PAGE_SIZE;
+
+ vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0);
+
+ /* VM_MAYWRITE for COW so gdb can set breakpoints */
+ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_mapping);
+ if (IS_ERR(vma)) {
+ do_munmap(mm, vdso_text_start, PAGE_SIZE, NULL);
+ rc = PTR_ERR(vma);
+ } else {
+ current->mm->context.vdso_base = vdso_text_start;
+ rc = 0;
+ }
+
+ mmap_write_unlock(mm);
+ return rc;
+}
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+ int pages = (end - start) >> PAGE_SHIFT;
+ struct page **pagelist;
+ int i;
+
+ pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pagelist)
+ panic("%s: Cannot allocate page list for VDSO", __func__);
+ for (i = 0; i < pages; i++)
+ pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+ return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+#ifdef CONFIG_64BIT
+ vdso64_mapping.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
+#endif
+ if (IS_ENABLED(CONFIG_COMPAT) || !IS_ENABLED(CONFIG_64BIT))
+ vdso32_mapping.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
+ return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..85b1c6d261d1
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -0,0 +1,53 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls
+# -march=1.1 -mschedule=7100LC
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ $(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO32__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CROSS32CC) -print-libgcc-file-name)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C # -U$(ARCH)
+
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC)
+ $(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S FORCE
+ $(call if_changed_dep,vdso32as)
+
+$(obj-cvdso32): %.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..da39d6cff7f1
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso32/note.S b/arch/parisc/kernel/vdso32/note.S
new file mode 100644
index 000000000000..bb350918bebd
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/note.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
+ .section name, flags; \
+ .balign 4; \
+ .long 1f - 0f; /* name length */ \
+ .long 3f - 2f; /* data length */ \
+ .long type; /* note type */ \
+0: .asciz vendor; /* vendor name */ \
+1: .balign 4; \
+2:
+
+#define ASM_ELF_NOTE_END \
+3: .balign 4; /* pad out section */ \
+ .previous
+
+ ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+ .long LINUX_VERSION_CODE
+ ASM_ELF_NOTE_END
diff --git a/arch/parisc/kernel/vdso32/restart_syscall.S b/arch/parisc/kernel/vdso32/restart_syscall.S
new file mode 100644
index 000000000000..7e82008d7e40
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/restart_syscall.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Syscall restart trampoline for 32 and 64 bits processes.
+ *
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+#include <linux/linkage.h>
+
+ .text
+
+ENTRY_CFI(__kernel_restart_syscall)
+ /*
+ * Setup a trampoline to restart the syscall
+ * with __NR_restart_syscall
+ */
+
+ /* load return pointer */
+#if defined(__VDSO64__)
+ ldd 0(%sp), %r31
+#elif defined(__VDSO32__)
+ ldw 0(%sp), %r31
+#endif
+
+ be 0x100(%sr2, %r0)
+ ldi __NR_restart_syscall, %r20
+
+ENDPROC_CFI(__kernel_restart_syscall)
diff --git a/arch/parisc/kernel/vdso32/sigtramp.S b/arch/parisc/kernel/vdso32/sigtramp.S
new file mode 100644
index 000000000000..192da7077869
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 32 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <tausq@debian.org>
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+ .text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+ a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+ is not on the stack, we need a new variant with different offsets and
+ data to tell gdb where to find the signal context on the stack.
+
+ Here we put the offset to the context data at the start of the trampoline
+ region and offset the first trampoline by 2 instructions. Please do
+ not change the trampoline as the code in gdb depends on the following
+ instruction sequence exactly.
+ */
+ .align 64
+ .word SIGFRAME_CONTEXT_REGS32
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by adding a nop before
+ the real start.
+ */
+ nop
+
+ .globl __kernel_sigtramp_rt
+ .type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+ .proc
+ .callinfo FRAME=ASM_SIGFRAME_SIZE32,CALLS,SAVE_RP
+ .entry
+
+.Lsigrt_start = . - 4
+0: ldi 0, %r25 /* (in_syscall=0) */
+ ldi __NR_rt_sigreturn, %r20
+ ble 0x100(%sr2, %r0)
+ nop
+
+1: ldi 1, %r25 /* (in_syscall=1) */
+ ldi __NR_rt_sigreturn, %r20
+ ble 0x100(%sr2, %r0)
+ nop
+.Lsigrt_end:
+ .exit
+ .procend
+ .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+
+ .section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack. */
+#define PTREGS SIGFRAME_CONTEXT_REGS32 /* 32-bit process offset is -672 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+ .macro rsave regno,ofs
+ .byte 0x05 /* DW_CFA_offset_extended */
+ .uleb128 \regno; /* regno */
+ .uleb128 \ofs /* factored offset */
+ .endm
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .stringz "zRS" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 4 /* Data alignment factor */
+ .byte 89 /* Return address register column, iaoq[0] */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0f /* DW_CFA_def_cfa_expresion */
+ .uleb128 9f - 1f /* length */
+1:
+ .byte 0x8e /* DW_OP_breg30 */
+ .sleb128 PTREGS
+9:
+ .balign 4
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+
+ /* General registers */
+ rsave 1, 2
+ rsave 2, 3
+ rsave 3, 4
+ rsave 4, 5
+ rsave 5, 6
+ rsave 6, 7
+ rsave 7, 8
+ rsave 8, 9
+ rsave 9, 10
+ rsave 10, 11
+ rsave 11, 12
+ rsave 12, 13
+ rsave 13, 14
+ rsave 14, 15
+ rsave 15, 16
+ rsave 16, 17
+ rsave 17, 18
+ rsave 18, 19
+ rsave 19, 20
+ rsave 20, 21
+ rsave 21, 22
+ rsave 22, 23
+ rsave 23, 24
+ rsave 24, 25
+ rsave 25, 26
+ rsave 26, 27
+ rsave 27, 28
+ rsave 28, 29
+ rsave 29, 30
+ rsave 30, 31
+ rsave 31, 32
+
+ /* Floating-point registers */
+ rsave 32, 42
+ rsave 33, 43
+ rsave 34, 44
+ rsave 35, 45
+ rsave 36, 46
+ rsave 37, 47
+ rsave 38, 48
+ rsave 39, 49
+ rsave 40, 50
+ rsave 41, 51
+ rsave 42, 52
+ rsave 43, 53
+ rsave 44, 54
+ rsave 45, 55
+ rsave 46, 56
+ rsave 47, 57
+ rsave 48, 58
+ rsave 49, 59
+ rsave 50, 60
+ rsave 51, 61
+ rsave 52, 62
+ rsave 53, 63
+ rsave 54, 64
+ rsave 55, 65
+ rsave 56, 66
+ rsave 57, 67
+ rsave 58, 68
+ rsave 59, 69
+ rsave 60, 70
+ rsave 61, 71
+ rsave 62, 72
+ rsave 63, 73
+ rsave 64, 74
+ rsave 65, 75
+ rsave 66, 76
+ rsave 67, 77
+ rsave 68, 78
+ rsave 69, 79
+ rsave 70, 80
+ rsave 71, 81
+ rsave 72, 82
+ rsave 73, 83
+ rsave 74, 84
+ rsave 75, 85
+ rsave 76, 86
+ rsave 77, 87
+ rsave 78, 88
+ rsave 79, 89
+ rsave 80, 90
+ rsave 81, 91
+ rsave 82, 92
+ rsave 83, 93
+ rsave 84, 94
+ rsave 85, 95
+ rsave 86, 96
+ rsave 87, 97
+
+ /* SAR register */
+ rsave 88, 102
+
+ /* iaoq[0] return address register */
+ rsave 89, 100
+ .balign 4
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..d4aff3af5262
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 32 bits vdso library
+ */
+#include <asm/vdso.h>
+#include <asm/page.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-hppa-linux")
+OUTPUT_ARCH(hppa)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .rodata2 : { *(.data.rel.ro) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .plt : { *(.plt) }
+ .got : { *(.got) }
+
+ _end = .;
+ __end = .;
+ PROVIDE (end = .);
+
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_sigtramp_rt32;
+ __kernel_restart_syscall32;
+ local: *;
+ };
+}
diff --git a/arch/parisc/kernel/vdso32/vdso32_wrapper.S b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000000..5ac06093e8ec
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/parisc/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile
new file mode 100644
index 000000000000..a30f5ec5eb4b
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/Makefile
@@ -0,0 +1,48 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ $(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO64__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CC) -print-libgcc-file-name)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC)
+ $(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..37f05cb38dad
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso64/note.S b/arch/parisc/kernel/vdso64/note.S
new file mode 100644
index 000000000000..bd1fa23597d6
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/note.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../vdso32/note.S"
diff --git a/arch/parisc/kernel/vdso64/restart_syscall.S b/arch/parisc/kernel/vdso64/restart_syscall.S
new file mode 100644
index 000000000000..83004451f6b1
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/restart_syscall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../vdso32/restart_syscall.S"
diff --git a/arch/parisc/kernel/vdso64/sigtramp.S b/arch/parisc/kernel/vdso64/sigtramp.S
new file mode 100644
index 000000000000..66a6d2b241e1
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 64 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <tausq@debian.org>
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+ .text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+ a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+ is not on the stack, we need a new variant with different offsets and
+ data to tell gdb where to find the signal context on the stack.
+
+ Here we put the offset to the context data at the start of the trampoline
+ region and offset the first trampoline by 2 instructions. Please do
+ not change the trampoline as the code in gdb depends on the following
+ instruction sequence exactly.
+ */
+ .align 64
+ .word SIGFRAME_CONTEXT_REGS
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by adding a nop before
+ the real start.
+ */
+ nop
+
+ .globl __kernel_sigtramp_rt
+ .type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+ .proc
+ .callinfo FRAME=ASM_SIGFRAME_SIZE,CALLS,SAVE_RP
+ .entry
+
+.Lsigrt_start = . - 4
+0: ldi 0, %r25 /* (in_syscall=0) */
+ ldi __NR_rt_sigreturn, %r20
+ ble 0x100(%sr2, %r0)
+ nop
+
+1: ldi 1, %r25 /* (in_syscall=1) */
+ ldi __NR_rt_sigreturn, %r20
+ ble 0x100(%sr2, %r0)
+ nop
+.Lsigrt_end:
+ .exit
+ .procend
+ .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+ .section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack. */
+#define PTREGS SIGFRAME_CONTEXT_REGS /* 64-bit process offset is -720 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+ .macro rsave regno,ofs
+ .byte 0x05 /* DW_CFA_offset_extended */
+ .uleb128 \regno; /* regno */
+ .uleb128 \ofs /* factored offset */
+ .endm
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .stringz "zRS" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 8 /* Data alignment factor */
+ .byte 61 /* Return address register column, iaoq[0] */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0f /* DW_CFA_def_cfa_expresion */
+ .uleb128 9f - 1f /* length */
+1:
+ .byte 0x8e /* DW_OP_breg30 */
+ .sleb128 PTREGS
+9:
+ .balign 8
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+
+ /* General registers */
+ rsave 1, 2
+ rsave 2, 3
+ rsave 3, 4
+ rsave 4, 5
+ rsave 5, 6
+ rsave 6, 7
+ rsave 7, 8
+ rsave 8, 9
+ rsave 9, 10
+ rsave 10, 11
+ rsave 11, 12
+ rsave 12, 13
+ rsave 13, 14
+ rsave 14, 15
+ rsave 15, 16
+ rsave 16, 17
+ rsave 17, 18
+ rsave 18, 19
+ rsave 19, 20
+ rsave 20, 21
+ rsave 21, 22
+ rsave 22, 23
+ rsave 23, 24
+ rsave 24, 25
+ rsave 25, 26
+ rsave 26, 27
+ rsave 27, 28
+ rsave 28, 29
+ rsave 29, 30
+ rsave 30, 31
+ rsave 31, 32
+
+ /* Floating-point registers */
+ rsave 32, 36
+ rsave 33, 37
+ rsave 34, 38
+ rsave 35, 39
+ rsave 36, 40
+ rsave 37, 41
+ rsave 38, 42
+ rsave 39, 43
+ rsave 40, 44
+ rsave 41, 45
+ rsave 42, 46
+ rsave 43, 47
+ rsave 44, 48
+ rsave 45, 49
+ rsave 46, 50
+ rsave 47, 51
+ rsave 48, 52
+ rsave 49, 53
+ rsave 50, 54
+ rsave 51, 55
+ rsave 52, 56
+ rsave 53, 57
+ rsave 54, 58
+ rsave 55, 59
+ rsave 56, 60
+ rsave 57, 61
+ rsave 58, 62
+ rsave 59, 63
+
+ /* SAR register */
+ rsave 60, 67
+
+ /* iaoq[0] return address register */
+ rsave 61, 65
+ .balign 8
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000000..de1fb4b19286
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf64-hppa-linux")
+OUTPUT_ARCH(hppa:hppa2.0w)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .plt : { *(.plt) }
+ .got : { *(.got) }
+
+ _end = .;
+ __end = .;
+ PROVIDE (end = .);
+
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_sigtramp_rt64;
+ __kernel_restart_syscall64;
+ local: *;
+ };
+}
diff --git a/arch/parisc/kernel/vdso64/vdso64_wrapper.S b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000000..66f929482d3d
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/parisc/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 367f6397bda7..860385058085 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr)
return *((u64 *)addr);
}
+u64 ioread64_lo_hi(const void __iomem *addr)
+{
+ u32 low, high;
+
+ low = ioread32(addr);
+ high = ioread32(addr + sizeof(u32));
+
+ return low + ((u64)high << 32);
+}
+
u64 ioread64_hi_lo(const void __iomem *addr)
{
u32 low, high;
@@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr)
}
}
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+ iowrite32(val, addr);
+ iowrite32(val >> 32, addr + sizeof(u32));
+}
+
void iowrite64_hi_lo(u64 val, void __iomem *addr)
{
iowrite32(val >> 32, addr + sizeof(u32));
@@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(ioread32be);
EXPORT_SYMBOL(ioread64);
EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64_lo_hi);
EXPORT_SYMBOL(ioread64_hi_lo);
EXPORT_SYMBOL(iowrite8);
EXPORT_SYMBOL(iowrite16);
@@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32);
EXPORT_SYMBOL(iowrite32be);
EXPORT_SYMBOL(iowrite64);
EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64_lo_hi);
EXPORT_SYMBOL(iowrite64_hi_lo);
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index ea70a0e08321..5fc0c852c84c 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -13,8 +13,8 @@
#include <linux/compiler.h>
#include <linux/uaccess.h>
-#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3))
-#define get_kernel_space() (0)
+#define get_user_space() mfsp(SR_USER)
+#define get_kernel_space() SR_KERNEL
/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
extern unsigned long pa_memcpy(void *dst, const void *src,
@@ -23,8 +23,8 @@ extern unsigned long pa_memcpy(void *dst, const void *src,
unsigned long raw_copy_to_user(void __user *dst, const void *src,
unsigned long len)
{
- mtsp(get_kernel_space(), 1);
- mtsp(get_user_space(), 2);
+ mtsp(get_kernel_space(), SR_TEMP1);
+ mtsp(get_user_space(), SR_TEMP2);
return pa_memcpy((void __force *)dst, src, len);
}
EXPORT_SYMBOL(raw_copy_to_user);
@@ -32,16 +32,16 @@ EXPORT_SYMBOL(raw_copy_to_user);
unsigned long raw_copy_from_user(void *dst, const void __user *src,
unsigned long len)
{
- mtsp(get_user_space(), 1);
- mtsp(get_kernel_space(), 2);
+ mtsp(get_user_space(), SR_TEMP1);
+ mtsp(get_kernel_space(), SR_TEMP2);
return pa_memcpy(dst, (void __force *)src, len);
}
EXPORT_SYMBOL(raw_copy_from_user);
void * memcpy(void * dst,const void *src, size_t count)
{
- mtsp(get_kernel_space(), 1);
- mtsp(get_kernel_space(), 2);
+ mtsp(get_kernel_space(), SR_TEMP1);
+ mtsp(get_kernel_space(), SR_TEMP2);
pa_memcpy(dst, src, count);
return dst;
}
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e9eabf8f14d7..f114e102aaf2 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -425,3 +425,92 @@ out_of_memory:
}
pagefault_out_of_memory();
}
+
+/* Handle non-access data TLB miss faults.
+ *
+ * For probe instructions, accesses to userspace are considered allowed
+ * if they lie in a valid VMA and the access type matches. We are not
+ * allowed to handle MM faults here so there may be situations where an
+ * actual access would fail even though a probe was successful.
+ */
+int
+handle_nadtlb_fault(struct pt_regs *regs)
+{
+ unsigned long insn = regs->iir;
+ int breg, treg, xreg, val = 0;
+ struct vm_area_struct *vma, *prev_vma;
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ unsigned long address;
+ unsigned long acc_type;
+
+ switch (insn & 0x380) {
+ case 0x280:
+ /* FDC instruction */
+ fallthrough;
+ case 0x380:
+ /* PDC and FIC instructions */
+ if (printk_ratelimit()) {
+ pr_warn("BUG: nullifying cache flush/purge instruction\n");
+ show_regs(regs);
+ }
+ if (insn & 0x20) {
+ /* Base modification */
+ breg = (insn >> 21) & 0x1f;
+ xreg = (insn >> 16) & 0x1f;
+ if (breg && xreg)
+ regs->gr[breg] += regs->gr[xreg];
+ }
+ regs->gr[0] |= PSW_N;
+ return 1;
+
+ case 0x180:
+ /* PROBE instruction */
+ treg = insn & 0x1f;
+ if (regs->isr) {
+ tsk = current;
+ mm = tsk->mm;
+ if (mm) {
+ /* Search for VMA */
+ address = regs->ior;
+ mmap_read_lock(mm);
+ vma = find_vma_prev(mm, address, &prev_vma);
+ mmap_read_unlock(mm);
+
+ /*
+ * Check if access to the VMA is okay.
+ * We don't allow for stack expansion.
+ */
+ acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
+ if (vma
+ && address >= vma->vm_start
+ && (vma->vm_flags & acc_type) == acc_type)
+ val = 1;
+ }
+ }
+ if (treg)
+ regs->gr[treg] = val;
+ regs->gr[0] |= PSW_N;
+ return 1;
+
+ case 0x300:
+ /* LPA instruction */
+ if (insn & 0x20) {
+ /* Base modification */
+ breg = (insn >> 21) & 0x1f;
+ xreg = (insn >> 16) & 0x1f;
+ if (breg && xreg)
+ regs->gr[breg] += regs->gr[xreg];
+ }
+ treg = insn & 0x1f;
+ if (treg)
+ regs->gr[treg] = 0;
+ regs->gr[0] |= PSW_N;
+ return 1;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1ae31db9988f..1dc2e88e7b04 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -337,9 +337,9 @@ static void __init setup_bootmem(void)
static bool kernel_set_to_readonly;
-static void __init map_pages(unsigned long start_vaddr,
- unsigned long start_paddr, unsigned long size,
- pgprot_t pgprot, int force)
+static void __ref map_pages(unsigned long start_vaddr,
+ unsigned long start_paddr, unsigned long size,
+ pgprot_t pgprot, int force)
{
pmd_t *pmd;
pte_t *pg_table;
@@ -449,7 +449,7 @@ void __init set_kernel_text_rw(int enable_read_write)
flush_tlb_all();
}
-void __ref free_initmem(void)
+void free_initmem(void)
{
unsigned long init_begin = (unsigned long)__init_begin;
unsigned long init_end = (unsigned long)__init_end;
@@ -463,7 +463,6 @@ void __ref free_initmem(void)
/* The init text pages are marked R-X. We have to
* flush the icache and mark them RW-
*
- * This is tricky, because map_pages is in the init section.
* Do a dummy remap of the data section first (the data
* section is already PAGE_KERNEL) to pull in the TLB entries
* for map_kernel */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b779603978e1..7e7387bd7d53 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -108,6 +108,7 @@ config PPC
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_COPY_MC if PPC64
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index d4be64f190ff..fa707de761be 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -120,7 +120,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_932=m
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 1fed6be95d53..d1c7fd5bf34b 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -101,7 +101,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=m
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 7be27862329f..78c6a5fde1d6 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val)
update_user_segment(15, val);
}
+int __init find_free_bat(void);
+unsigned int bat_block_size(unsigned long base, unsigned long top);
#endif /* !__ASSEMBLY__ */
/* We happily ignore the smaller BATs on 601, we don't actually use
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 609c80f67194..95e06f2a8e23 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte)
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
@@ -371,8 +372,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
-#define pmd_page(pmd) \
- pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/*
* Encode and decode a swap entry.
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index ba5b1becf518..006cbec70ffe 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -202,7 +202,6 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
/*
* The current system page and segment sizes
*/
-extern int mmu_linear_psize;
extern int mmu_virtual_psize;
extern int mmu_vmalloc_psize;
extern int mmu_io_psize;
@@ -213,6 +212,7 @@ extern int mmu_io_psize;
#define mmu_virtual_psize MMU_PAGE_4K
#endif
#endif
+extern int mmu_linear_psize;
extern int mmu_vmemmap_psize;
/* MMU initialization */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 33e073d6b0c4..875730d5af40 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p
return hash__map_kernel_page(ea, pa, prot);
}
+void unmap_kernel_page(unsigned long va);
+
static inline int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
index 52189928ec08..81bcb9abb371 100644
--- a/arch/powerpc/include/asm/fadump-internal.h
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -19,11 +19,6 @@
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-/* Alignment per CMA requirement. */
-#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
- max_t(unsigned long, MAX_ORDER - 1, \
- pageblock_order))
-
/* FAD commands */
#define FADUMP_REGISTER 1
#define FADUMP_UNREGISTER 2
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 947b5b9c4424..a832aeafe560 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx,
BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
else if (WARN_ON(idx >= __end_of_fixed_addresses))
return;
-
- map_kernel_page(__fix_to_virt(idx), phys, flags);
+ if (pgprot_val(flags))
+ map_kernel_page(__fix_to_virt(idx), phys, flags);
+ else
+ unmap_kernel_page(__fix_to_virt(idx));
}
#define __early_set_fixmap __set_fixmap
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index a58fb4aa6c81..674e5aaafcbd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline bool should_hard_irq_enable(void)
+static __always_inline bool should_hard_irq_enable(void)
{
return false;
}
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
index 7a90000f8d15..f83866a19e87 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -9,7 +9,7 @@ struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
int add_tce_mem_ranges(struct crash_mem **mem_ranges);
int add_initrd_mem_range(struct crash_mem **mem_ranges);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
int add_htab_mem_range(struct crash_mem **mem_ranges);
#else
static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fe07558173ef..827038a33064 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,7 +39,6 @@ struct kvm_nested_guest {
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
- u64 hfscr; /* HFSCR that the L1 requested for this nested guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a770443cd6e0..d9bf60bf0816 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -818,6 +818,7 @@ struct kvm_vcpu_arch {
/* For support of nested guests */
struct kvm_nested_guest *nested;
+ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */
u32 nested_vcpu_id;
gpa_t nested_io_gpr;
#endif
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index fd277b15635c..b8527a74bd4d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -21,7 +21,6 @@ extern void destroy_context(struct mm_struct *mm);
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct mm_iommu_table_group_mem_t;
-extern int isolate_lru_page(struct page *page); /* from internal.h */
extern bool mm_iommu_preregistered(struct mm_struct *mm);
extern long mm_iommu_new(struct mm_struct *mm,
unsigned long ua, unsigned long entries,
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 160abcb8e9fa..ea0e487f87b1 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs);
static inline void arch_touch_nmi_watchdog(void) {}
#endif
-#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
+#ifdef CONFIG_NMI_IPI
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
bool exclude_self);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index 64b6c608eca4..de092b04ee1a 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -71,9 +71,9 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags
size_t size = 1UL << shift;
if (size == SZ_16K)
- return __pte(pte_val(entry) & ~_PAGE_HUGE);
+ return __pte(pte_val(entry) | _PAGE_SPS);
else
- return entry;
+ return __pte(pte_val(entry) | _PAGE_SPS | _PAGE_HUGE);
}
#define arch_make_huge_pte arch_make_huge_pte
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b67742e2a9b2..dcc9b338e042 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,6 +64,7 @@ extern int icache_44x_need_flush;
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
@@ -348,15 +349,14 @@ static inline int pte_young(pte_t pte)
* of the pte page. -- paulus
*/
#ifndef CONFIG_BOOKE
-#define pmd_page(pmd) \
- pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#else
#define pmd_page_vaddr(pmd) \
((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
-#define pmd_page(pmd) \
- pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
+#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
#endif
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/*
* Encode and decode a swap entry.
* Note that the bits we use in a PTE for representing a swap entry
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index a3313e853e5e..78888b0c30f6 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -142,6 +142,7 @@ static inline pte_t pmd_pte(pmd_t pmd)
#define pmd_present(pmd) (!pmd_none(pmd))
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
extern struct page *pmd_page(pmd_t pmd);
+#define pmd_pfn(pmd) (page_to_pfn(pmd_page(pmd)))
static inline void pud_set(pud_t *pudp, unsigned long val)
{
@@ -308,6 +309,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __swp_entry_to_pte(x) __pte((x).val)
int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
extern int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index efad07081cc0..9675303b724e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -500,6 +500,7 @@
#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 52d05b465e3e..25fc8ad9a27a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long val, mask = -1UL;
unsigned int n = 6;
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
mask = 0xffffffff;
while (n--) {
@@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline int syscall_get_arch(struct task_struct *task)
{
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
return AUDIT_ARCH_PPC;
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
return AUDIT_ARCH_PPC64LE;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 5725029aaa29..d6e649b3c70b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
+#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
#else
#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
+#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
#endif
#if defined(CONFIG_PPC64)
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
index 6ca923510b59..294620a25f80 100644
--- a/arch/powerpc/include/asm/xor_altivec.h
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -3,17 +3,20 @@
#define _ASM_POWERPC_XOR_ALTIVEC_H
#ifdef CONFIG_ALTIVEC
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in);
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
#endif
#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index d03e488cfe9c..4fdb7c77fda1 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -113,6 +113,12 @@ static int __init fadump_cma_init(void)
}
/*
+ * If CMA activation fails, keep the pages reserved, instead of
+ * exposing them to buddy allocator. Same as 'fadump=nocma' case.
+ */
+ cma_reserve_pages_on_error(fadump_cma);
+
+ /*
* So we now have successfully initialized cma area for fadump.
*/
pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
@@ -544,7 +550,7 @@ int __init fadump_reserve_mem(void)
if (!fw_dump.nocma) {
fw_dump.boot_memory_size =
ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ CMA_MIN_ALIGNMENT_BYTES);
}
#endif
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index fa84744d6b24..b876ef8c70a7 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -421,14 +421,14 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SDR1
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
rlwinm r2, r2, 28, 0xfffff000
-#ifdef CONFIG_MODULES
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 92088f848266..7bab2d7de372 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
@@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index d45a415d5374..2069bbb90a9a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1110,14 +1110,6 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void __init register_nodes(void)
-{
- int i;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- register_one_node(i);
-}
-
int sysfs_add_device_to_node(struct device *dev, int nid)
{
struct node *node = node_devices[nid];
@@ -1132,13 +1124,6 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
}
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
-
-#else
-static void __init register_nodes(void)
-{
- return;
-}
-
#endif
/* Only valid if CPU is present. */
@@ -1155,8 +1140,6 @@ static int __init topology_init(void)
{
int cpu, r;
- register_nodes();
-
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 62361cc7281c..cd0b8b71ecdd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec_or_work(now);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d1817cd9a691..84c89f08ae9a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
{
- struct kvm_nested_guest *nested = vcpu->arch.nested;
int r;
int srcu_idx;
@@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
* it into a HEAI.
*/
if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
- (nested->hfscr & (1UL << cause))) {
+ (vcpu->arch.nested_hfscr & (1UL << cause))) {
vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 8f8daaeeb3b7..9d373f8963ee 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
- l2->hfscr = l2_hv.hfscr;
+ vcpu->arch.nested_hfscr = l2_hv.hfscr;
vcpu->arch.regs = l2_regs;
/* Guest must always run with ME enabled, HV disabled. */
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index e414ca44839f..8cabdb39cbbc 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -91,6 +91,7 @@
#include <linux/kvm_host.h>
#include <linux/ksm.h>
#include <linux/of.h>
+#include <linux/memremap.h>
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
@@ -712,7 +713,6 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
dpage = pfn_to_page(uvmem_pfn);
dpage->zone_device_data = pvt;
- get_page(dpage);
lock_page(dpage);
return dpage;
out_clear:
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a94b0cd0bdc5..bd3734d5be89 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
case BARRIER_EIEIO:
eieio();
break;
+#ifdef CONFIG_PPC64
case BARRIER_LWSYNC:
asm volatile("lwsync" : : : "memory");
break;
case BARRIER_PTESYNC:
asm volatile("ptesync" : : : "memory");
break;
+#endif
}
break;
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index 54e61979e80e..aab49d056d18 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -49,8 +49,9 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
} while (--lines > 0);
}
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
{
DEFINE(v1);
DEFINE(v2);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
index 5c2b0839b179..573c41d90dac 100644
--- a/arch/powerpc/lib/xor_vmx.h
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -6,16 +6,17 @@
* outside of the enable/disable altivec block.
*/
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in);
-
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in);
-
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in);
-
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in);
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 80dba916c367..35d917ece4d1 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -12,47 +12,51 @@
#include <asm/xor_altivec.h>
#include "xor_vmx.h"
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_2(bytes, v1_in, v2_in);
+ __xor_altivec_2(bytes, p1, p2);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_2);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+ __xor_altivec_3(bytes, p1, p2, p3);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_3);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+ __xor_altivec_4(bytes, p1, p2, p3, p4);
disable_kernel_altivec();
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_4);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+ __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
disable_kernel_altivec();
preempt_enable();
}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 94045b265b6b..203735caf691 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static int __init find_free_bat(void)
+int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -100,7 +100,7 @@ static int __init find_free_bat(void)
* - block size has to be a power of two. This is calculated by finding the
* highest bit set to 1.
*/
-static unsigned int block_size(unsigned long base, unsigned long top)
+unsigned int bat_block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
@@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
int idx;
while ((idx = find_free_bat()) != -1 && base != top) {
- unsigned int size = block_size(base, top);
+ unsigned int size = bat_block_size(base, top);
if (size < 128 << 10)
break;
@@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void)
unsigned long size;
for (i = 0; i < nb - 1 && base < top;) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 79ce3c22a29d..052e6590f84f 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/memblock.h>
+#include <linux/memremap.h>
#include <linux/debugfs.h>
#include <misc/cxl-base.h>
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index 35b287b0a8da..450a67ef0bbe 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
- unsigned long k_cur = k_start;
- int k_size = k_end - k_start;
- int k_size_base = 1 << (ffs(k_size) - 1);
+ unsigned long k_nobat = k_start;
+ unsigned long k_cur;
+ phys_addr_t phys;
int ret;
- void *block;
- block = memblock_alloc(k_size, k_size_base);
-
- if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
- int shift = ffs(k_size - k_size_base);
- int k_size_more = shift ? 1 << (shift - 1) : 0;
-
- setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
- if (k_size_more >= SZ_128K)
- setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
- k_size_more, PAGE_KERNEL);
- if (v_block_mapped(k_start))
- k_cur = k_start + k_size_base;
- if (v_block_mapped(k_start + k_size_base))
- k_cur = k_start + k_size_base + k_size_more;
-
- update_bats();
+ while (k_nobat < k_end) {
+ unsigned int k_size = bat_block_size(k_nobat, k_end);
+ int idx = find_free_bat();
+
+ if (idx == -1)
+ break;
+ if (k_size < SZ_128K)
+ break;
+ phys = memblock_phys_alloc_range(k_size, k_size, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ break;
+
+ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+ k_nobat += k_size;
}
+ if (k_nobat != k_start)
+ update_bats();
- if (!block)
- block = memblock_alloc(k_size, PAGE_SIZE);
- if (!block)
- return -ENOMEM;
+ if (k_nobat < k_end) {
+ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ return -ENOMEM;
+ }
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
- kasan_update_early_region(k_start, k_cur, __pte(0));
+ kasan_update_early_region(k_start, k_nobat, __pte(0));
- for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_off_k(k_cur);
- void *va = block + k_cur - k_start;
- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
}
flush_tlb_kernel_range(k_start, k_end);
+ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
return 0;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index abb3198bd277..6ec5a7dd7913 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
__set_pte_at(mm, addr, ptep, pte, 0);
}
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
/*
* This is called when relaxing access to a PTE. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d6ffdd0f2309..56dd1f4e3e44 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx, u32 *addrs)
+/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
+static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
+ struct codegen_context *ctx, u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
bool func_addr_fixed;
u64 func_addr;
u32 tmp_idx;
- int i, ret;
+ int i, j, ret;
for (i = 0; i < fp->len; i++) {
/*
@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
* of the JITed sequence remains unchanged.
*/
ctx->idx = tmp_idx;
+ } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+ tmp_idx = ctx->idx;
+ ctx->idx = addrs[i] / 4;
+#ifdef CONFIG_PPC32
+ PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
+ PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
+ for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
+#else
+ func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
+ PPC_LI64(b2p[insn[i].dst_reg], func_addr);
+ /* overwrite rest with nops */
+ for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
+#endif
+ ctx->idx = tmp_idx;
+ i++;
}
}
@@ -200,13 +217,13 @@ skip_init_ctx:
/*
* Do not touch the prologue and epilogue as they will remain
* unchanged. Only fix the branch target address for subprog
- * calls in the body.
+ * calls in the body, and ldimm64 instructions.
*
* This does not change the offsets and lengths of the subprog
* call instruction sequences and hence, the size of the JITed
* image as well.
*/
- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+ bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
/* There is no need to perform the usual passes. */
goto skip_codegen_passes;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index faaebd446cad..cf8dd8aea386 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
if (image && rel < 0x2000000 && rel >= -0x2000000) {
PPC_BL_ABS(func);
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
} else {
/* Load function address into r0 */
EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
@@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
bool func_addr_fixed;
u64 func_addr;
u32 true_cond;
+ u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* padding to allow full 4 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 9eae8d8ed340..e1e8c934308a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u64 imm64;
u32 true_cond;
u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -633,17 +634,21 @@ bpf_alu32_trunc:
EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
break;
case 64:
- /*
- * Way easier and faster(?) to store the value
- * into stack and then use ldbrx
- *
- * ctx->seen will be reliable in pass2, but
- * the instructions generated will remain the
- * same across all passes
- */
+ /* Store the value to stack and then use byte-reverse loads */
PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ } else {
+ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
+ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
+ }
break;
}
break;
@@ -848,9 +853,13 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
+ tmp_idx = ctx->idx;
+ PPC_LI64(dst_reg, imm64);
+ /* padding to allow full 5 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
- PPC_LI64(dst_reg, imm64);
break;
/*
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a684901b6965..b5b42cf0a703 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -1327,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu)
* Otherwise provide a warning if there is PMI pending, but
* no counter is found overflown.
*/
- if (any_pmc_overflown(cpuhw))
- clear_pmi_irq_pending();
- else
+ if (any_pmc_overflown(cpuhw)) {
+ /*
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+ } else
WARN_ON(pmi_irq_pending());
val = mmcra = cpuhw->mmcr.mmcra;
@@ -2438,36 +2477,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
}
-/*
- * If the perf subsystem wants performance monitor interrupts as soon as
- * possible (e.g., to sample the instruction address and stack chain),
- * this should return true. The IRQ masking code can then enable MSR[EE]
- * in some places (e.g., interrupt handlers) that allows PMI interrupts
- * though to improve accuracy of profiles, at the cost of some performance.
- *
- * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
- * access), but in that case there is no need for prompt PMI handling.
- *
- * This currently returns true if any perf counter is being used. It
- * could possibly return false if only events are being counted rather than
- * samples being taken, but for now this is good enough.
- */
-bool power_pmu_wants_prompt_pmi(void)
-{
- struct cpu_hw_events *cpuhw;
-
- /*
- * This could simply test local_paca->pmcregs_in_use if that were not
- * under ifdef KVM.
- */
-
- if (!ppmu)
- return false;
-
- cpuhw = this_cpu_ptr(&cpu_hw_events);
- return cpuhw->n_events;
-}
-
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 5adcbd9b5e88..0804b9a11934 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -40,6 +40,7 @@ config RISCV
select ARCH_USE_MEMTEST
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
@@ -171,9 +172,6 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SELECT_MEMORY_MODEL
def_bool ARCH_SPARSEMEM_ENABLE
-config ARCH_WANT_GENERAL_HUGETLB
- def_bool y
-
config ARCH_SUPPORTS_UPROBES
def_bool y
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index b44d6ecdb46e..0aacd7052585 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -2,6 +2,7 @@ menu "CPU errata selection"
config RISCV_ERRATA_ALTERNATIVE
bool "RISC-V alternative scheme"
+ depends on !XIP_KERNEL
default y
help
This Kconfig allows the kernel to automatically patch the
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 6ec44a22278a..c112ab2a9052 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -14,8 +14,8 @@ config SOC_SIFIVE
select CLK_SIFIVE
select CLK_SIFIVE_PRCI
select SIFIVE_PLIC
- select RISCV_ERRATA_ALTERNATIVE
- select ERRATA_SIFIVE
+ select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL
+ select ERRATA_SIFIVE if !XIP_KERNEL
help
This enables support for SiFive SoC platform hardware.
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 8a107ed18b0d..7d81102cffd4 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -50,6 +50,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
+
+# Newer binutils versions default to ISA spec version 20191213 which moves some
+# instructions from the I extension to the Zicsr and Zifencei extensions.
+toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
+riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
+
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
KBUILD_AFLAGS += -march=$(riscv-march-y)
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index 56f57118c633..44d338514761 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -113,7 +113,8 @@
compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
reg = <0xC000000 0x4000000>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>;
riscv,ndev = <65>;
};
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 2a82a3b2992b..af64b95e88cc 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -23,7 +23,7 @@ CONFIG_SLOB=y
CONFIG_SOC_CANAAN=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
+CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
CONFIG_CMDLINE_FORCE=y
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 160e3a1e8f8b..004372f8da54 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -119,7 +119,7 @@ extern phys_addr_t phys_ram_base;
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
#define is_linear_mapping(x) \
- ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
+ ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
#define kernel_mapping_pa_to_va(y) ({ \
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7e949f25c933..e3549e50de95 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -13,6 +13,7 @@
#ifndef CONFIG_MMU
#define KERNEL_LINK_ADDR PAGE_OFFSET
+#define KERN_VIRT_SIZE (UL(-1))
#else
#define ADDRESS_SPACE_END (UL(-1))
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 612556faa527..ffc87e76b1dd 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
+obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
+
obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index be7f05b542bb..f7a832e3a1d1 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -12,6 +12,7 @@
#include <linux/sched/hotplug.h>
#include <asm/irq.h>
#include <asm/cpu_ops.h>
+#include <asm/numa.h>
#include <asm/sbi.h>
bool cpu_has_hotplug(unsigned int cpu)
@@ -40,6 +41,7 @@ int __cpu_disable(void)
return ret;
remove_cpu_topology(cpu);
+ numa_remove_cpu(cpu);
set_cpu_online(cpu, false);
irq_migrate_all_off_this_cpu();
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index ed29e9c8f660..d6a46ed0bf05 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -108,7 +108,7 @@ _save_context:
.option pop
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_off
+ call __trace_hardirqs_off
#endif
#ifdef CONFIG_CONTEXT_TRACKING
@@ -143,7 +143,7 @@ skip_context_tracking:
li t0, EXC_BREAKPOINT
beq s4, t0, 1f
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_on
+ call __trace_hardirqs_on
#endif
csrs CSR_STATUS, SR_IE
@@ -234,7 +234,7 @@ ret_from_exception:
REG_L s0, PT_STATUS(sp)
csrc CSR_STATUS, SR_IE
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_off
+ call __trace_hardirqs_off
#endif
#ifdef CONFIG_RISCV_M_MODE
/* the MPP value is too large to be used as an immediate arg for addi */
@@ -270,10 +270,10 @@ restore_all:
REG_L s1, PT_STATUS(sp)
andi t0, s1, SR_PIE
beqz t0, 1f
- call trace_hardirqs_on
+ call __trace_hardirqs_on
j 2f
1:
- call trace_hardirqs_off
+ call __trace_hardirqs_off
2:
#endif
REG_L a0, PT_STATUS(sp)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 2363b43312fc..ec07f991866a 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -22,14 +22,13 @@
add \reg, \reg, t0
.endm
.macro XIP_FIXUP_FLASH_OFFSET reg
- la t1, __data_loc
- li t0, XIP_OFFSET_MASK
- and t1, t1, t0
- li t1, XIP_OFFSET
- sub t0, t0, t1
- sub \reg, \reg, t0
+ la t0, __data_loc
+ REG_L t1, _xip_phys_offset
+ sub \reg, \reg, t1
+ add \reg, \reg, t0
.endm
_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
+_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET
#else
.macro XIP_FIXUP_OFFSET reg
.endm
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 68a9e3d1fe16..4a48287513c3 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -13,6 +13,19 @@
#include <linux/pgtable.h>
#include <asm/sections.h>
+/*
+ * The auipc+jalr instruction pair can reach any PC-relative offset
+ * in the range [-2^31 - 2^11, 2^31 - 2^11)
+ */
+static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
+{
+#ifdef CONFIG_32BIT
+ return true;
+#else
+ return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11));
+#endif
+}
+
static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
{
if (v != (u32)v) {
@@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
ptrdiff_t offset = (void *)v - (void *)location;
s32 hi20;
- if (offset != (s32)offset) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
@@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
/* Only emit the plt entry if offset over 32-bit range */
if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
offset = module_emit_plt_entry(me, v);
@@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index f72527fcb347..775d3322b422 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -5,6 +5,7 @@
* Copyright (c) 2020 Western Digital Corporation or its affiliates.
*/
+#include <linux/bits.h>
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/reboot.h>
@@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas
pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
break;
}
- hmask |= 1 << hartid;
+ hmask |= BIT(hartid);
}
return hmask;
@@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
unsigned long hart_mask;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
@@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
int result = 0;
unsigned long hart_mask;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
@@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
struct sbiret ret = {0};
int result;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
- if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
- ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask, hbase, 0, 0, 0, 0);
- if (ret.error)
- goto ecall_failed;
- hmask = 0;
- hbase = 0;
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ ret = sbi_ecall(SBI_EXT_IPI,
+ SBI_EXT_IPI_SEND_IPI, hmask,
+ hbase, 0, 0, 0, 0);
+ if (ret.error)
+ goto ecall_failed;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask)
+ if (!hmask) {
hbase = hartid;
- hmask |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
if (hmask) {
@@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
int result;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
- if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask, hbase,
- start, size, arg4, arg5);
- if (result)
- return result;
- hmask = 0;
- hbase = 0;
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ result = __sbi_rfence_v02_call(fid, hmask,
+ hbase, start, size, arg4, arg5);
+ if (result)
+ return result;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask)
+ if (!hmask) {
hbase = hartid;
- hmask |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
if (hmask) {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b42bfdc67482..834eb652a7b9 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -301,9 +301,6 @@ static int __init topology_init(void)
{
int i, ret;
- for_each_online_node(i)
- register_one_node(i);
-
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_devices, i);
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 201ee206fb57..14d2b53ec322 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -22,15 +22,16 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int level = 0;
if (regs) {
fp = frame_pointer(regs);
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
- fp = (unsigned long)__builtin_frame_address(1);
- sp = (unsigned long)__builtin_frame_address(0);
- pc = (unsigned long)__builtin_return_address(0);
+ fp = (unsigned long)__builtin_frame_address(0);
+ sp = sp_in_global;
+ pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
fp = task->thread.s[0];
@@ -42,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
unsigned long low, high;
struct stackframe *frame;
- if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
+ if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc))))
break;
/* Validate frame pointer */
diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
new file mode 100644
index 000000000000..095ac976d7da
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kprobes.h>
+#include "trace_irq.h"
+
+/*
+ * trace_hardirqs_on/off require the caller to setup frame pointer properly.
+ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
+ * Here we add one extra level so they can be safely called by low
+ * level entry code which $fp is used for other purpose.
+ */
+
+void __trace_hardirqs_on(void)
+{
+ trace_hardirqs_on();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_on);
+
+void __trace_hardirqs_off(void)
+{
+ trace_hardirqs_off();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_off);
diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
new file mode 100644
index 000000000000..99fe67377e5e
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+#ifndef __TRACE_IRQ_H
+#define __TRACE_IRQ_H
+
+void __trace_hardirqs_on(void);
+void __trace_hardirqs_off(void);
+
+#endif /* __TRACE_IRQ_H */
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 0c5239e05721..624166004e36 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *cntx;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
@@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ /* By default, make CY, TM, and IR counters accessible in VU mode */
+ reset_csr->scounteren = 0x7;
+
/* Setup VCPU timer */
kvm_riscv_vcpu_timer_init(vcpu);
@@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
csr_write(CSR_HVIP, csr->hvip);
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ guest_state_enter_irqoff();
+ __kvm_riscv_switch_to(&vcpu->arch);
+ guest_state_exit_irqoff();
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
int ret;
@@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
continue;
}
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
- __kvm_riscv_switch_to(&vcpu->arch);
+ kvm_riscv_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_sync_interrupts(vcpu);
/*
- * We may have taken a host interrupt in VS/VU-mode (i.e.
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in HS-mode with interrupts disabled
- * so enabling the interrupts now will have the effect
- * of taking the interrupt again, in HS-mode this time.
+ * There's no barrier which ensures that pending interrupts are
+ * recognised, so we just hope that the CPU takes any pending
+ * interrupts between the enable and disable.
*/
local_irq_enable();
+ local_irq_disable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest
- * we account that tick as being spent in the guest. We
- * enable preemption after calling guest_exit() so that if
- * we get preempted we make sure ticks after that is not
- * counted as guest time.
- */
- guest_exit();
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
preempt_enable();
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
index 4ecf377f483b..48f431091cdb 100644
--- a/arch/riscv/kvm/vcpu_sbi_base.c
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
+#include <linux/version.h>
#include <asm/csr.h>
#include <asm/sbi.h>
#include <asm/kvm_vcpu_timer.h>
@@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
*out_val = KVM_SBI_IMPID;
break;
case SBI_EXT_BASE_GET_IMP_VERSION:
- *out_val = 0;
+ *out_val = LINUX_VERSION_CODE;
break;
case SBI_EXT_BASE_PROBE_EXT:
if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 7ebaef10ea1b..ac7a25298a04 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o
ifdef CONFIG_KASAN
KASAN_SANITIZE_kasan_init.o := n
KASAN_SANITIZE_init.o := n
+ifdef CONFIG_DEBUG_VIRTUAL
+KASAN_SANITIZE_physaddr.o := n
+endif
endif
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 05978f78579f..35484d830fd6 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -33,7 +33,7 @@ static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
if (unlikely(offset > MAX_REG_OFFSET))
return;
- if (!offset)
+ if (offset)
*(unsigned long *)((unsigned long)regs + offset) = val;
}
@@ -43,8 +43,8 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
- regs_set_gpr(regs, reg_err, -EFAULT);
- regs_set_gpr(regs, reg_zero, 0);
+ regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT);
+ regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0);
regs->epc = get_ex_fixup(ex);
return true;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index cf4d018b7d66..0d588032d6e6 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -125,7 +125,6 @@ void __init mem_init(void)
else
swiotlb_force = SWIOTLB_NO_FORCE;
#endif
- high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
memblock_free_all();
print_vm_layout();
@@ -195,6 +194,7 @@ static void __init setup_bootmem(void)
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
+ high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
@@ -232,6 +232,7 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
+#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -522,6 +523,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
+#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
extern char _xiprom[], _exiprom[], __data_loc;
/* called from head.S with MMU off */
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index f61f7ca6fe0f..cd1a145257b7 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -113,8 +113,11 @@ static void __init kasan_populate_pud(pgd_t *pgd,
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
- if (base_pud == lm_alias(kasan_early_shadow_pud))
+ if (base_pud == lm_alias(kasan_early_shadow_pud)) {
base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ memcpy(base_pud, (void *)kasan_early_shadow_pud,
+ sizeof(pud_t) * PTRS_PER_PUD);
+ }
}
pudp = base_pud + pud_index(vaddr);
@@ -202,8 +205,7 @@ asmlinkage void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
+ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index e7fd0c253c7b..19cf25a74ee2 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -8,12 +8,10 @@
phys_addr_t __virt_to_phys(unsigned long x)
{
- phys_addr_t y = x - PAGE_OFFSET;
-
/*
* Boundary checking aginst the kernel linear mapping space.
*/
- WARN(y >= KERN_VIRT_SIZE,
+ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x),
"virt_to_phys used for non-linear address: %pK (%pS)\n",
(void *)x, (void *)x);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9750f92380f5..4845ab549dd1 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -60,6 +60,7 @@ config S390
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
@@ -945,6 +946,9 @@ config S390_GUEST
endmenu
+config S390_MODULES_SANITY_TEST_HELPERS
+ def_bool n
+
menu "Selftests"
config S390_UNWIND_SELFTEST
@@ -971,4 +975,16 @@ config S390_KPROBES_SANITY_TEST
Say N if you are unsure.
+config S390_MODULES_SANITY_TEST
+ def_tristate n
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ prompt "Enable s390 specific modules tests"
+ select S390_MODULES_SANITY_TEST_HELPERS
+ help
+ This option enables an s390 specific modules test. This option is
+ not useful for distributions or general kernels, but only for
+ kernel developers working on architecture code.
+
+ Say N if you are unsure.
endmenu
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 7fe8975b49ec..498bed9b261b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -96,7 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
@@ -109,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -116,7 +117,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -185,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -391,6 +390,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -400,6 +400,7 @@ CONFIG_PCI_IOV=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -501,6 +502,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -511,7 +513,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -542,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -592,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -756,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -774,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
CONFIG_CRC7=m
@@ -807,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y
CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_VMACACHE=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
@@ -819,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_TEST_LOCKUP=m
-CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 466780c465f5..61e36b999f67 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
# CONFIG_GCC_PLUGINS is not set
@@ -91,7 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
@@ -101,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -108,7 +109,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -177,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -382,6 +381,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -391,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -492,6 +493,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -502,7 +504,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -533,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -582,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -743,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -762,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index eed3b9acfa71..c55c668dc3c7 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,6 +1,7 @@
# CONFIG_SWAP is not set
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
@@ -34,6 +35,7 @@ CONFIG_NET=y
# CONFIG_PCPU_DEV_REFCNT is not set
# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_DCSSBLK is not set
# CONFIG_DASD is not set
@@ -58,6 +60,7 @@ CONFIG_ZFCP=y
# CONFIG_HID is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 33f973ff9744..e8f15dbb89d0 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -20,6 +20,7 @@
static char local_guest[] = " ";
static char all_guests[] = "* ";
+static char *all_groups = all_guests;
static char *guest_query;
struct diag2fc_data {
@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr)
memcpy(parm_list.userid, query, NAME_LEN);
ASCEBC(parm_list.userid, NAME_LEN);
- parm_list.addr = (unsigned long) addr ;
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
parm_list.size = size;
parm_list.fmt = 0x02;
- memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 16dc57dd90b3..8511f0e59290 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -69,8 +69,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
{
a->fixup = b->fixup + delta;
b->fixup = tmp.fixup - delta;
- a->handler = b->handler + delta;
- b->handler = tmp.handler - delta;
+ a->handler = b->handler;
+ if (a->handler)
+ a->handler += delta;
+ b->handler = tmp.handler;
+ if (b->handler)
+ b->handler -= delta;
}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
#endif
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 267f70f4393f..6f80ec9c04be 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -47,15 +47,17 @@ struct ftrace_regs {
static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
- return &fregs->regs;
+ struct pt_regs *regs = &fregs->regs;
+
+ if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS))
+ return regs;
+ return NULL;
}
static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs,
unsigned long ip)
{
- struct pt_regs *regs = arch_ftrace_get_regs(fregs);
-
- regs->psw.addr = ip;
+ fregs->regs.psw.addr = ip;
}
/*
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 4ffa8e7f0ed3..ddb70fb13fbc 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -15,11 +15,13 @@
#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
+#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */
#define _PIF_SYSCALL BIT(PIF_SYSCALL)
#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
+#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS)
#ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 147cb3534ce4..d74e26b48604 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
int __put_user_bad(void) __attribute__((noreturn));
int __get_user_bad(void) __attribute__((noreturn));
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-
union oac {
unsigned int val;
struct {
@@ -71,6 +69,8 @@ union oac {
};
};
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
#define __put_get_user_asm(to, from, size, oac_spec) \
({ \
int __rc; \
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 21d62d8b6b9a..89c0870d5679 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -159,9 +159,38 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return 0;
}
+static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec)
+{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ struct ftrace_insn insn;
+ s64 disp;
+ u16 opc;
+
+ if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn)))
+ return ERR_PTR(-EFAULT);
+ disp = (s64)insn.disp * 2;
+ trampoline = (void *)(rec->ip + disp);
+ if (get_kernel_nofault(opc, &trampoline->brasl_opc))
+ return ERR_PTR(-EFAULT);
+ if (opc != 0xc015)
+ return ERR_PTR(-EINVAL);
+ return trampoline;
+}
+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ u64 old;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ if (get_kernel_nofault(old, &trampoline->interceptor))
+ return -EFAULT;
+ if (old != old_addr)
+ return -EINVAL;
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
return 0;
}
@@ -188,6 +217,12 @@ static void brcl_enable(void *brcl)
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
+ struct ftrace_hotpatch_trampoline *trampoline;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
brcl_enable((void *)rec->ip);
return 0;
}
@@ -291,7 +326,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
+ if (!regs || unlikely(!p) || kprobe_disabled(p))
goto out;
if (kprobe_running()) {
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 39bcc0e39a10..a24177dcd12a 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -27,6 +27,7 @@ ENDPROC(ftrace_stub)
#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2)
+#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS)
#ifdef __PACK_STACK
/* allocate just enough for r14, r15 and backchain */
#define TRACED_FUNC_FRAME_SIZE 24
@@ -57,6 +58,14 @@ ENDPROC(ftrace_stub)
.if \allregs == 1
stg %r14,(STACK_PTREGS_PSW)(%r15)
stosm (STACK_PTREGS_PSW)(%r15),0
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+#else
+ lghi %r14,_PIF_FTRACE_FULL_REGS
+ stg %r14,STACK_PTREGS_FLAGS(%r15)
+#endif
+ .else
+ xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
.endif
lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index d52d85367bf7..b032e556eeb7 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -33,7 +33,7 @@
#define DEBUGP(fmt , ...)
#endif
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
void *module_alloc(unsigned long size)
{
@@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int insn[5];
- unsigned int *ip = me->core_layout.base +
- me->arch.plt_offset +
- info->plt_offset;
-
- insn[0] = 0x0d10e310; /* basr 1,0 */
- insn[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->core_layout.base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
- unsigned int *ij;
- ij = me->core_layout.base +
- me->arch.plt_offset +
- me->arch.plt_size - PLT_ENTRY_SIZE;
- insn[2] = 0xa7f40000 + /* j __jump_r1 */
- (unsigned int)(u16)
- (((unsigned long) ij - 8 -
- (unsigned long) ip) / 2);
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
} else {
- insn[2] = 0x07f10000; /* br %r1 */
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
}
- insn[3] = (unsigned int) (val >> 32);
- insn[4] = (unsigned int) val;
+ *(long *)&insn[14] = val;
write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0c9e894913dc..651a51914e34 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode)
/* Validate vector registers */
union ctlreg0 cr0;
- if (!mci.vr) {
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
/*
* Vector registers can't be restored. If the kernel
* currently uses vector registers the system is
@@ -307,11 +314,21 @@ static int notrace s390_validate_registers(union mci mci, int umode)
if (cr2.gse) {
if (!mci.gs) {
/*
- * Guarded storage register can't be restored and
- * the current processes uses guarded storage.
- * It has to be terminated.
+ * 2 cases:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of
+ * guarded storage control can not be recreated, the
+ * process must be terminated.
+ * For SIE the guest values of guarded storage can not
+ * be recreated. This is either due to a bug or due to
+ * GS being disabled in the guest. The guest will be
+ * notified by KVM code and the guests machine check
+ * handling must take care of this. The host values
+ * are saved by KVM and are not affected.
*/
- kill_task = 1;
+ if (!test_cpu_flag(CIF_MCCK_GUEST))
+ kill_task = 1;
} else {
load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 51c5a9f6e525..23ab9f02f278 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -33,10 +33,3 @@ void __init numa_setup(void)
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
-
-static int __init numa_init_late(void)
-{
- register_one_node(0);
- return 0;
-}
-arch_initcall(numa_init_late);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f2c25d113e7b..05327be3a982 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -800,6 +800,8 @@ static void __init check_initrd(void)
static void __init reserve_kernel(void)
{
memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+ memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
+ memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
memblock_reserve(__amode31_base, __eamode31 - __samode31);
memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
memblock_reserve(__pa(_stext), _end - _stext);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 577f1ead6a51..2296b1ff1e02 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4667,6 +4667,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
return -EINVAL;
if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
return -E2BIG;
+ if (!kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
switch (mop->op) {
case KVM_S390_MEMOP_SIDA_READ:
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 707cd4622c13..69feb8ed3312 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n
obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
+obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
+
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
new file mode 100644
index 000000000000..9894009fc1f2
--- /dev/null
+++ b/arch/s390/lib/test_modules.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <kunit/test.h>
+#include <linux/module.h>
+
+#include "test_modules.h"
+
+/*
+ * Test that modules with many relocations are loaded properly.
+ */
+static void test_modules_many_vmlinux_relocs(struct kunit *test)
+{
+ int result = 0;
+
+#define CALL_RETURN(i) result += test_modules_return_ ## i()
+ REPEAT_10000(CALL_RETURN);
+ KUNIT_ASSERT_EQ(test, result, 49995000);
+}
+
+static struct kunit_case modules_testcases[] = {
+ KUNIT_CASE(test_modules_many_vmlinux_relocs),
+ {}
+};
+
+static struct kunit_suite modules_test_suite = {
+ .name = "modules_test_s390",
+ .test_cases = modules_testcases,
+};
+
+kunit_test_suites(&modules_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h
new file mode 100644
index 000000000000..6371fcf17684
--- /dev/null
+++ b/arch/s390/lib/test_modules.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_MODULES_H
+#define TEST_MODULES_H
+
+#define __REPEAT_10000_3(f, x) \
+ f(x ## 0); \
+ f(x ## 1); \
+ f(x ## 2); \
+ f(x ## 3); \
+ f(x ## 4); \
+ f(x ## 5); \
+ f(x ## 6); \
+ f(x ## 7); \
+ f(x ## 8); \
+ f(x ## 9)
+#define __REPEAT_10000_2(f, x) \
+ __REPEAT_10000_3(f, x ## 0); \
+ __REPEAT_10000_3(f, x ## 1); \
+ __REPEAT_10000_3(f, x ## 2); \
+ __REPEAT_10000_3(f, x ## 3); \
+ __REPEAT_10000_3(f, x ## 4); \
+ __REPEAT_10000_3(f, x ## 5); \
+ __REPEAT_10000_3(f, x ## 6); \
+ __REPEAT_10000_3(f, x ## 7); \
+ __REPEAT_10000_3(f, x ## 8); \
+ __REPEAT_10000_3(f, x ## 9)
+#define __REPEAT_10000_1(f, x) \
+ __REPEAT_10000_2(f, x ## 0); \
+ __REPEAT_10000_2(f, x ## 1); \
+ __REPEAT_10000_2(f, x ## 2); \
+ __REPEAT_10000_2(f, x ## 3); \
+ __REPEAT_10000_2(f, x ## 4); \
+ __REPEAT_10000_2(f, x ## 5); \
+ __REPEAT_10000_2(f, x ## 6); \
+ __REPEAT_10000_2(f, x ## 7); \
+ __REPEAT_10000_2(f, x ## 8); \
+ __REPEAT_10000_2(f, x ## 9)
+#define REPEAT_10000(f) \
+ __REPEAT_10000_1(f, 0); \
+ __REPEAT_10000_1(f, 1); \
+ __REPEAT_10000_1(f, 2); \
+ __REPEAT_10000_1(f, 3); \
+ __REPEAT_10000_1(f, 4); \
+ __REPEAT_10000_1(f, 5); \
+ __REPEAT_10000_1(f, 6); \
+ __REPEAT_10000_1(f, 7); \
+ __REPEAT_10000_1(f, 8); \
+ __REPEAT_10000_1(f, 9)
+
+#define DECLARE_RETURN(i) int test_modules_return_ ## i(void)
+REPEAT_10000(DECLARE_RETURN);
+
+#endif
diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c
new file mode 100644
index 000000000000..1670349a03eb
--- /dev/null
+++ b/arch/s390/lib/test_modules_helpers.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/export.h>
+
+#include "test_modules.h"
+
+#define DEFINE_RETURN(i) \
+ int test_modules_return_ ## i(void) \
+ { \
+ return 1 ## i - 10000; \
+ } \
+ EXPORT_SYMBOL_GPL(test_modules_return_ ## i)
+REPEAT_10000(DEFINE_RETURN);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index a963c3d8ad0d..fb924a8041dc 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -11,7 +11,8 @@
#include <linux/raid/xor.h>
#include <asm/xor.h>
-static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
asm volatile(
" larl 1,2f\n"
@@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: "0", "1", "cc", "memory");
}
-static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
asm volatile(
" larl 1,2f\n"
@@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: : "0", "1", "cc", "memory");
}
-static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
asm volatile(
" larl 1,2f\n"
@@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: : "0", "1", "cc", "memory");
}
-static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
asm volatile(
" larl 1,2f\n"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 2474a04ceac4..1c2b53bf3093 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,6 +7,7 @@ config SUPERH
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_BINFMT_FLAT if !MMU
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index 5193b3e099b9..4d83576b89c6 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -93,7 +93,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index 03cb916819fa..d90d29d44469 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -123,7 +123,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index e6c5ddf070c0..492a0a2e0e36 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -108,7 +108,6 @@ CONFIG_UFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFS_V3=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_SMB_FS=m
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 6c719ab4332a..7d6d32359848 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -120,7 +120,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index a26f7f1841c7..d817df7cc4a7 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -122,7 +122,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index ff502683132e..9fcf68b22dba 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -75,7 +75,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_737=m
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 5c725c75fcef..7eb3c10f28ad 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -129,7 +129,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index cd5c58916c65..73a0d68b0de6 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -239,7 +239,6 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
CONFIG_SMB_FS=m
CONFIG_CIFS=m
CONFIG_PARTITION_ADVANCED=y
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 41be43e99cff..d0240decacca 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -406,6 +406,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return (unsigned long)pmd_val(pmd);
}
+#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
#define pmd_page(pmd) (virt_to_page(pmd_val(pmd)))
#ifdef CONFIG_X2TLB
diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c
index 76af6db9daa2..2d2a7509b565 100644
--- a/arch/sh/kernel/topology.c
+++ b/arch/sh/kernel/topology.c
@@ -46,11 +46,6 @@ static int __init topology_init(void)
{
int i, ret;
-#ifdef CONFIG_NUMA
- for_each_online_node(i)
- register_one_node(i);
-#endif
-
for_each_present_cpu(i) {
struct cpu *c = &per_cpu(cpu_devices, i);
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index ffccfe3b22ed..4866625da314 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -127,11 +127,16 @@ static inline int srmmu_device_memory(unsigned long x)
return ((x & 0xF0000000) != 0);
}
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return (pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4);
+}
+
static inline struct page *pmd_page(pmd_t pmd)
{
if (srmmu_device_memory(pmd_val(pmd)))
BUG();
- return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
+ return pfn_to_page(pmd_pfn(pmd));
}
static inline unsigned long __pmd_page(pmd_t pmd)
diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
index 3e5af37e4b9c..0351813cf3af 100644
--- a/arch/sparc/include/asm/xor_32.h
+++ b/arch/sparc/include/asm/xor_32.h
@@ -13,7 +13,8 @@
*/
static void
-sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+sparc_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
int lines = bytes / (sizeof (long)) / 8;
@@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+sparc_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
int lines = bytes / (sizeof (long)) / 8;
@@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+sparc_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
int lines = bytes / (sizeof (long)) / 8;
@@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+sparc_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
int lines = bytes / (sizeof (long)) / 8;
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index 16169f3edcd5..caaddea8ad79 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -12,13 +12,20 @@
#include <asm/spitfire.h>
-void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
-void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
- unsigned long *);
-void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
-void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *, unsigned long *);
+void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
/* XXX Ugh, write cheetah versions... -DaveM */
@@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = {
.do_5 = xor_vis_5,
};
-void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
-void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
- unsigned long *);
-void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
-void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *, unsigned long *);
+void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
static struct xor_block_template xor_block_niagara = {
.name = "Niagara",
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 6d60d416f0dd..f19487e4cc71 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -244,22 +244,10 @@ static void __init check_mmu_stats(void)
mmu_stats_supported = 1;
}
-static void register_nodes(void)
-{
-#ifdef CONFIG_NUMA
- int i;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- register_one_node(i);
-#endif
-}
-
static int __init topology_init(void)
{
int cpu, ret;
- register_nodes();
-
check_mmu_stats();
for_each_possible_cpu(cpu) {
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 0f49fada2093..d8e0e3c7038d 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -181,6 +181,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
pte_t pte;
+ entry = pte_mkhuge(entry);
pte = hugepage_shift_to_tte(entry, shift);
#ifdef CONFIG_SPARC64
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index b9e20bbe2f75..167e236d9bb8 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -109,6 +109,7 @@ extern unsigned long end_iomem;
#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE)
#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
#define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 84a0777c2a45..c09a5fd5e225 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -93,6 +93,7 @@ int execvp_noalloc(char *buf, const char *file, char *const argv[])
up finding no executable we can use, we want to diagnose
that we did find one but were denied access. */
got_eacces = 1;
+ break;
case ENOENT:
case ESTALE:
case ENOTDIR:
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f384cb1a4f7a..5a83da703e87 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/
+
obj-y += entry/
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660..61e511c51890 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,6 +69,7 @@ config X86
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
select ARCH_HAS_DEVMEM_IS_ALLOWED
@@ -118,8 +119,10 @@ config X86
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
+ select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_RT_DELAYED_SIGNALS
select ARCH_WANTS_THP_SWAP if X86_64
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
@@ -246,7 +249,7 @@ config X86
select HAVE_STACK_VALIDATION if X86_64
select HAVE_STATIC_CALL
select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
- select HAVE_PREEMPT_DYNAMIC
+ select HAVE_PREEMPT_DYNAMIC_CALL
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
@@ -347,9 +350,6 @@ config ARCH_NR_GPIO
config ARCH_SUSPEND_POSSIBLE
def_bool y
-config ARCH_WANT_GENERAL_HUGETLB
- def_bool y
-
config AUDIT_ARCH
def_bool y if X86_64
@@ -1275,23 +1275,6 @@ config TOSHIBA
Say Y if you intend to run this kernel on a Toshiba portable.
Say N otherwise.
-config I8K
- tristate "Dell i8k legacy laptop support"
- depends on HWMON
- depends on PROC_FS
- select SENSORS_DELL_SMM
- help
- This option enables legacy /proc/i8k userspace interface in hwmon
- dell-smm-hwmon driver. Character file /proc/i8k reports bios version,
- temperature and allows controlling fan speeds of Dell laptops via
- System Management Mode. For old Dell laptops (like Dell Inspiron 8000)
- it reports also power and hotkey status. For fan speed control is
- needed userspace package i8kutils.
-
- Say Y if you intend to run this kernel on old Dell laptops or want to
- use userspace package i8kutils.
- Say N otherwise.
-
config X86_REBOOTFIXUPS
bool "Enable X86 board specific fixups for reboot"
depends on X86_32
@@ -1638,7 +1621,7 @@ config ARCH_SPARSEMEM_DEFAULT
config ARCH_SELECT_MEMORY_MODEL
def_bool y
- depends on ARCH_SPARSEMEM_ENABLE
+ depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE
config ARCH_MEMORY_PROBE
bool "Enable sysfs memory/probe interface"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 659fad53ca82..3b354eb9516d 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
SYM_FUNC_START(efi32_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
/* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
#endif
.text
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fd9441f40457..dea95301196b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -535,7 +535,6 @@ SYM_CODE_END(startup_64)
#ifdef CONFIG_EFI_STUB
.org 0x390
SYM_FUNC_START(efi64_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
and $~0xf, %rsp /* realign the stack */
movq %rdx, %rbx /* save boot_params pointer */
call efi_main
@@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
leaq rva(startup_64)(%rax), %rax
jmp *%rax
SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
#endif
.text
diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
new file mode 100644
index 000000000000..c1ead00017a7
--- /dev/null
+++ b/arch/x86/coco/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS_REMOVE_core.o = -pg
+KASAN_SANITIZE_core.o := n
+CFLAGS_core.o += -fno-stack-protector
+
+obj-y += core.o
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c
index 6a6ffcd978f6..fc1365dd927e 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/coco/core.c
@@ -9,18 +9,16 @@
#include <linux/export.h>
#include <linux/cc_platform.h>
-#include <linux/mem_encrypt.h>
-#include <asm/mshyperv.h>
+#include <asm/coco.h>
#include <asm/processor.h>
-static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
+static enum cc_vendor vendor __ro_after_init;
+static u64 cc_mask __ro_after_init;
+
+static bool intel_cc_platform_has(enum cc_attr attr)
{
-#ifdef CONFIG_INTEL_TDX_GUEST
- return false;
-#else
return false;
-#endif
}
/*
@@ -74,12 +72,46 @@ static bool hyperv_cc_platform_has(enum cc_attr attr)
bool cc_platform_has(enum cc_attr attr)
{
- if (sme_me_mask)
+ switch (vendor) {
+ case CC_VENDOR_AMD:
return amd_cc_platform_has(attr);
-
- if (hv_is_isolation_supported())
+ case CC_VENDOR_INTEL:
+ return intel_cc_platform_has(attr);
+ case CC_VENDOR_HYPERV:
return hyperv_cc_platform_has(attr);
-
- return false;
+ default:
+ return false;
+ }
}
EXPORT_SYMBOL_GPL(cc_platform_has);
+
+u64 cc_mkenc(u64 val)
+{
+ switch (vendor) {
+ case CC_VENDOR_AMD:
+ return val | cc_mask;
+ default:
+ return val;
+ }
+}
+
+u64 cc_mkdec(u64 val)
+{
+ switch (vendor) {
+ case CC_VENDOR_AMD:
+ return val & ~cc_mask;
+ default:
+ return val;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_mkdec);
+
+__init void cc_set_vendor(enum cc_vendor v)
+{
+ vendor = v;
+}
+
+__init void cc_set_mask(u64 mask)
+{
+ cc_mask = mask;
+}
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index c3af959648e6..2831685adf6f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
+sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
+
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index c799838242a6..43852ba6e19c 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -1,65 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
/*
- * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
- *
- * This is AES128/192/256 CTR mode optimization implementation. It requires
- * the support of Intel(R) AESNI and AVX instructions.
- *
- * This work was inspired by the AES CTR mode optimization published
- * in Intel Optimized IPSEC Cryptograhpic library.
- * Additional information on it can be found at:
- * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
+ * AES CTR mode by8 optimization with AVX instructions. (x86_64)
*
* Copyright(c) 2014 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
* Contact Information:
* James Guilford <james.guilford@intel.com>
* Sean Gulley <sean.m.gulley@intel.com>
* Chandramouli Narayanan <mouli@linux.intel.com>
+ */
+/*
+ * This is AES128/192/256 CTR mode optimization implementation. It requires
+ * the support of Intel(R) AESNI and AVX instructions.
*
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * This work was inspired by the AES CTR mode optimization published
+ * in Intel Optimized IPSEC Cryptographic library.
+ * Additional information on it can be found at:
+ * https://github.com/intel/intel-ipsec-mb
*/
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 363699dd7220..837c1e0aa021 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize)
#endif
-
-SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
SYM_FUNC_START_LOCAL(_key_expansion_256a)
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
@@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
add $0x10, TKEYP
RET
SYM_FUNC_END(_key_expansion_256a)
-SYM_FUNC_END_ALIAS(_key_expansion_128)
+SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
SYM_FUNC_START_LOCAL(_key_expansion_192a)
pshufd $0b01010101, %xmm1, %xmm1
diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
index f9e2fecdb761..59ae28abe35c 100644
--- a/arch/x86/crypto/blake2s-shash.c
+++ b/arch/x86/crypto/blake2s-shash.c
@@ -18,12 +18,12 @@
static int crypto_blake2s_update_x86(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+ return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
{
- return crypto_blake2s_final(desc, out, blake2s_compress);
+ return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index a880e0b1c255..fda6066437aa 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -32,24 +32,12 @@ static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
__blowfish_enc_blk(ctx, dst, src, false);
}
-static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __blowfish_enc_blk(ctx, dst, src, true);
-}
-
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
-static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __blowfish_enc_blk_4way(ctx, dst, src, true);
-}
-
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 787c234d2469..abb8b1fe123b 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -45,14 +45,6 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
}
-static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- u32 *enc_ctx = ctx->enc.expkey;
-
- des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
-}
-
static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
new file mode 100644
index 000000000000..71e6aae23e17
--- /dev/null
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 AVX accelerated transform.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at:
+ * https://gnupg.org/software/libgcrypt/index.html
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Constants */
+
+/* Round constant macros */
+
+#define K0 2043430169 /* 0x79cc4519 */
+#define K1 -208106958 /* 0xf3988a32 */
+#define K2 -416213915 /* 0xe7311465 */
+#define K3 -832427829 /* 0xce6228cb */
+#define K4 -1664855657 /* 0x9cc45197 */
+#define K5 965255983 /* 0x3988a32f */
+#define K6 1930511966 /* 0x7311465e */
+#define K7 -433943364 /* 0xe6228cbc */
+#define K8 -867886727 /* 0xcc451979 */
+#define K9 -1735773453 /* 0x988a32f3 */
+#define K10 823420391 /* 0x311465e7 */
+#define K11 1646840782 /* 0x6228cbce */
+#define K12 -1001285732 /* 0xc451979c */
+#define K13 -2002571463 /* 0x88a32f39 */
+#define K14 289824371 /* 0x11465e73 */
+#define K15 579648742 /* 0x228cbce6 */
+#define K16 -1651869049 /* 0x9d8a7a87 */
+#define K17 991229199 /* 0x3b14f50f */
+#define K18 1982458398 /* 0x7629ea1e */
+#define K19 -330050500 /* 0xec53d43c */
+#define K20 -660100999 /* 0xd8a7a879 */
+#define K21 -1320201997 /* 0xb14f50f3 */
+#define K22 1654563303 /* 0x629ea1e7 */
+#define K23 -985840690 /* 0xc53d43ce */
+#define K24 -1971681379 /* 0x8a7a879d */
+#define K25 351604539 /* 0x14f50f3b */
+#define K26 703209078 /* 0x29ea1e76 */
+#define K27 1406418156 /* 0x53d43cec */
+#define K28 -1482130984 /* 0xa7a879d8 */
+#define K29 1330705329 /* 0x4f50f3b1 */
+#define K30 -1633556638 /* 0x9ea1e762 */
+#define K31 1027854021 /* 0x3d43cec5 */
+#define K32 2055708042 /* 0x7a879d8a */
+#define K33 -183551212 /* 0xf50f3b14 */
+#define K34 -367102423 /* 0xea1e7629 */
+#define K35 -734204845 /* 0xd43cec53 */
+#define K36 -1468409689 /* 0xa879d8a7 */
+#define K37 1358147919 /* 0x50f3b14f */
+#define K38 -1578671458 /* 0xa1e7629e */
+#define K39 1137624381 /* 0x43cec53d */
+#define K40 -2019718534 /* 0x879d8a7a */
+#define K41 255530229 /* 0x0f3b14f5 */
+#define K42 511060458 /* 0x1e7629ea */
+#define K43 1022120916 /* 0x3cec53d4 */
+#define K44 2044241832 /* 0x79d8a7a8 */
+#define K45 -206483632 /* 0xf3b14f50 */
+#define K46 -412967263 /* 0xe7629ea1 */
+#define K47 -825934525 /* 0xcec53d43 */
+#define K48 -1651869049 /* 0x9d8a7a87 */
+#define K49 991229199 /* 0x3b14f50f */
+#define K50 1982458398 /* 0x7629ea1e */
+#define K51 -330050500 /* 0xec53d43c */
+#define K52 -660100999 /* 0xd8a7a879 */
+#define K53 -1320201997 /* 0xb14f50f3 */
+#define K54 1654563303 /* 0x629ea1e7 */
+#define K55 -985840690 /* 0xc53d43ce */
+#define K56 -1971681379 /* 0x8a7a879d */
+#define K57 351604539 /* 0x14f50f3b */
+#define K58 703209078 /* 0x29ea1e76 */
+#define K59 1406418156 /* 0x53d43cec */
+#define K60 -1482130984 /* 0xa7a879d8 */
+#define K61 1330705329 /* 0x4f50f3b1 */
+#define K62 -1633556638 /* 0x9ea1e762 */
+#define K63 1027854021 /* 0x3d43cec5 */
+
+/* Register macros */
+
+#define RSTATE %rdi
+#define RDATA %rsi
+#define RNBLKS %rdx
+
+#define t0 %eax
+#define t1 %ebx
+#define t2 %ecx
+
+#define a %r8d
+#define b %r9d
+#define c %r10d
+#define d %r11d
+#define e %r12d
+#define f %r13d
+#define g %r14d
+#define h %r15d
+
+#define W0 %xmm0
+#define W1 %xmm1
+#define W2 %xmm2
+#define W3 %xmm3
+#define W4 %xmm4
+#define W5 %xmm5
+
+#define XTMP0 %xmm6
+#define XTMP1 %xmm7
+#define XTMP2 %xmm8
+#define XTMP3 %xmm9
+#define XTMP4 %xmm10
+#define XTMP5 %xmm11
+#define XTMP6 %xmm12
+
+#define BSWAP_REG %xmm15
+
+/* Stack structure */
+
+#define STACK_W_SIZE (32 * 2 * 3)
+#define STACK_REG_SAVE_SIZE (64)
+
+#define STACK_W (0)
+#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE)
+#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE)
+
+/* Instruction helpers. */
+
+#define roll2(v, reg) \
+ roll $(v), reg;
+
+#define roll3mov(v, src, dst) \
+ movl src, dst; \
+ roll $(v), dst;
+
+#define roll3(v, src, dst) \
+ rorxl $(32-(v)), src, dst;
+
+#define addl2(a, out) \
+ leal (a, out), out;
+
+/* Round function macros. */
+
+#define GG1(x, y, z, o, t) \
+ movl x, o; \
+ xorl y, o; \
+ xorl z, o;
+
+#define FF1(x, y, z, o, t) GG1(x, y, z, o, t)
+
+#define GG2(x, y, z, o, t) \
+ andnl z, x, o; \
+ movl y, t; \
+ andl x, t; \
+ addl2(t, o);
+
+#define FF2(x, y, z, o, t) \
+ movl y, o; \
+ xorl x, o; \
+ movl y, t; \
+ andl x, t; \
+ andl z, o; \
+ xorl t, o;
+
+#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \
+ /* rol(a, 12) => t0 */ \
+ roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \
+ /* rol (t0 + e + t), 7) => t1 */ \
+ leal K##round(t0, e, 1), t1; \
+ roll2(7, t1); \
+ /* h + w1 => h */ \
+ addl wtype##_W1_ADDR(round, widx), h; \
+ /* h + t1 => h */ \
+ addl2(t1, h); \
+ /* t1 ^ t0 => t0 */ \
+ xorl t1, t0; \
+ /* w1w2 + d => d */ \
+ addl wtype##_W1W2_ADDR(round, widx), d; \
+ /* FF##i(a,b,c) => t1 */ \
+ FF##i(a, b, c, t1, t2); \
+ /* d + t1 => d */ \
+ addl2(t1, d); \
+ /* GG#i(e,f,g) => t2 */ \
+ GG##i(e, f, g, t2, t1); \
+ /* h + t2 => h */ \
+ addl2(t2, h); \
+ /* rol (f, 19) => f */ \
+ roll2(19, f); \
+ /* d + t0 => d */ \
+ addl2(t0, d); \
+ /* rol (b, 9) => b */ \
+ roll2(9, b); \
+ /* P0(h) => h */ \
+ roll3(9, h, t2); \
+ roll3(17, h, t1); \
+ xorl t2, h; \
+ xorl t1, h;
+
+#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \
+ R(1, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \
+ R(2, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+ (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+ (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32)
+
+/* Input block loading. */
+#define LOAD_W_XMM_1() \
+ vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \
+ vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \
+ vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \
+ vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \
+ vpshufb BSWAP_REG, XTMP0, XTMP0; \
+ vpshufb BSWAP_REG, XTMP1, XTMP1; \
+ vpshufb BSWAP_REG, XTMP2, XTMP2; \
+ vpshufb BSWAP_REG, XTMP3, XTMP3; \
+ vpxor XTMP0, XTMP1, XTMP4; \
+ vpxor XTMP1, XTMP2, XTMP5; \
+ vpxor XTMP2, XTMP3, XTMP6; \
+ leaq 64(RDATA), RDATA; \
+ vmovdqa XTMP0, IW_W1_ADDR(0, 0); \
+ vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \
+ vmovdqa XTMP1, IW_W1_ADDR(4, 0); \
+ vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);
+
+#define LOAD_W_XMM_2() \
+ vmovdqa XTMP2, IW_W1_ADDR(8, 0); \
+ vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);
+
+#define LOAD_W_XMM_3() \
+ vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \
+ vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \
+ vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \
+ vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \
+ vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \
+ vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */
+
+/* Message scheduling. Note: 3 words per XMM register. */
+#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \
+ /* Load (w[i - 16]) => XTMP0 */ \
+ vpshufd $0b10111111, w0, XTMP0; \
+ vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \
+ /* Load (w[i - 13]) => XTMP1 */ \
+ vpshufd $0b10111111, w1, XTMP1; \
+ vpalignr $12, XTMP1, w2, XTMP1; \
+ /* w[i - 9] == w3 */ \
+ /* XMM3 ^ XTMP0 => XTMP0 */ \
+ vpxor w3, XTMP0, XTMP0;
+
+#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \
+ /* w[i - 3] == w5 */ \
+ /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
+ vpslld $15, w5, XTMP2; \
+ vpsrld $(32-15), w5, XTMP3; \
+ vpxor XTMP2, XTMP3, XTMP3; \
+ vpxor XTMP3, XTMP0, XTMP0; \
+ /* rol(XTMP1, 7) => XTMP1 */ \
+ vpslld $7, XTMP1, XTMP5; \
+ vpsrld $(32-7), XTMP1, XTMP1; \
+ vpxor XTMP5, XTMP1, XTMP1; \
+ /* XMM4 ^ XTMP1 => XTMP1 */ \
+ vpxor w4, XTMP1, XTMP1; \
+ /* w[i - 6] == XMM4 */ \
+ /* P1(XTMP0) ^ XTMP1 => XMM0 */ \
+ vpslld $15, XTMP0, XTMP5; \
+ vpsrld $(32-15), XTMP0, XTMP6; \
+ vpslld $23, XTMP0, XTMP2; \
+ vpsrld $(32-23), XTMP0, XTMP3; \
+ vpxor XTMP0, XTMP1, XTMP1; \
+ vpxor XTMP6, XTMP5, XTMP5; \
+ vpxor XTMP3, XTMP2, XTMP2; \
+ vpxor XTMP2, XTMP5, XTMP5; \
+ vpxor XTMP5, XTMP1, w0;
+
+#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \
+ /* W1 in XMM12 */ \
+ vpshufd $0b10111111, w4, XTMP4; \
+ vpalignr $12, XTMP4, w5, XTMP4; \
+ vmovdqa XTMP4, XW_W1_ADDR((round), 0); \
+ /* W1 ^ W2 => XTMP1 */ \
+ vpxor w0, XTMP4, XTMP1; \
+ vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
+
+
+.section .rodata.cst16, "aM", @progbits, 16
+.align 16
+
+.Lbe32mask:
+ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.text
+
+/*
+ * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA.
+ *
+ * void sm3_transform_avx(struct sm3_state *state,
+ * const u8 *data, int nblocks);
+ */
+.align 16
+SYM_FUNC_START(sm3_transform_avx)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: data (64*nblks bytes)
+ * %rdx: nblocks
+ */
+ vzeroupper;
+
+ pushq %rbp;
+ movq %rsp, %rbp;
+
+ movq %rdx, RNBLKS;
+
+ subq $STACK_SIZE, %rsp;
+ andq $(~63), %rsp;
+
+ movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp);
+ movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp);
+ movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp);
+ movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp);
+ movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp);
+
+ vmovdqa .Lbe32mask (%rip), BSWAP_REG;
+
+ /* Get the values of the chaining variables. */
+ movl state_h0(RSTATE), a;
+ movl state_h1(RSTATE), b;
+ movl state_h2(RSTATE), c;
+ movl state_h3(RSTATE), d;
+ movl state_h4(RSTATE), e;
+ movl state_h5(RSTATE), f;
+ movl state_h6(RSTATE), g;
+ movl state_h7(RSTATE), h;
+
+.align 16
+.Loop:
+ /* Load data part1. */
+ LOAD_W_XMM_1();
+
+ leaq -1(RNBLKS), RNBLKS;
+
+ /* Transform 0-3 + Load data part2. */
+ R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2();
+ R1(d, a, b, c, h, e, f, g, 1, 1, IW);
+ R1(c, d, a, b, g, h, e, f, 2, 2, IW);
+ R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3();
+
+ /* Transform 4-7 + Precalc 12-14. */
+ R1(a, b, c, d, e, f, g, h, 4, 0, IW);
+ R1(d, a, b, c, h, e, f, g, 5, 1, IW);
+ R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5);
+ R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5);
+
+ /* Transform 8-11 + Precalc 12-17. */
+ R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5);
+ R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0);
+ R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0);
+ R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0);
+
+ /* Transform 12-14 + Precalc 18-20 */
+ R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1);
+ R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1);
+ R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1);
+
+ /* Transform 15-17 + Precalc 21-23 */
+ R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2);
+ R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2);
+ R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2);
+
+ /* Transform 18-20 + Precalc 24-26 */
+ R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3);
+ R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3);
+ R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3);
+
+ /* Transform 21-23 + Precalc 27-29 */
+ R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4);
+ R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4);
+ R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4);
+
+ /* Transform 24-26 + Precalc 30-32 */
+ R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5);
+ R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5);
+ R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5);
+
+ /* Transform 27-29 + Precalc 33-35 */
+ R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0);
+ R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0);
+ R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0);
+
+ /* Transform 30-32 + Precalc 36-38 */
+ R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1);
+ R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1);
+ R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1);
+
+ /* Transform 33-35 + Precalc 39-41 */
+ R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2);
+ R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2);
+ R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2);
+
+ /* Transform 36-38 + Precalc 42-44 */
+ R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3);
+ R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3);
+ R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3);
+
+ /* Transform 39-41 + Precalc 45-47 */
+ R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4);
+ R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4);
+ R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4);
+
+ /* Transform 42-44 + Precalc 48-50 */
+ R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5);
+ R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5);
+ R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5);
+
+ /* Transform 45-47 + Precalc 51-53 */
+ R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0);
+ R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0);
+ R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0);
+
+ /* Transform 48-50 + Precalc 54-56 */
+ R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1);
+ R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1);
+ R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1);
+
+ /* Transform 51-53 + Precalc 57-59 */
+ R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2);
+ R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2);
+ R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2);
+
+ /* Transform 54-56 + Precalc 60-62 */
+ R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3);
+ R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3);
+ R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3);
+
+ /* Transform 57-59 + Precalc 63 */
+ R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4);
+ R2(c, d, a, b, g, h, e, f, 58, 1, XW);
+ R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4);
+
+ /* Transform 60-62 + Precalc 63 */
+ R2(a, b, c, d, e, f, g, h, 60, 0, XW);
+ R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4);
+ R2(c, d, a, b, g, h, e, f, 62, 2, XW);
+
+ /* Transform 63 */
+ R2(b, c, d, a, f, g, h, e, 63, 0, XW);
+
+ /* Update the chaining variables. */
+ xorl state_h0(RSTATE), a;
+ xorl state_h1(RSTATE), b;
+ xorl state_h2(RSTATE), c;
+ xorl state_h3(RSTATE), d;
+ movl a, state_h0(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl d, state_h3(RSTATE);
+ xorl state_h4(RSTATE), e;
+ xorl state_h5(RSTATE), f;
+ xorl state_h6(RSTATE), g;
+ xorl state_h7(RSTATE), h;
+ movl e, state_h4(RSTATE);
+ movl f, state_h5(RSTATE);
+ movl g, state_h6(RSTATE);
+ movl h, state_h7(RSTATE);
+
+ cmpq $0, RNBLKS;
+ jne .Loop;
+
+ vzeroall;
+
+ movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx;
+ movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15;
+ movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14;
+ movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13;
+ movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12;
+
+ vmovdqa %xmm0, IW_W1_ADDR(0, 0);
+ vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);
+ vmovdqa %xmm0, IW_W1_ADDR(4, 0);
+ vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);
+ vmovdqa %xmm0, IW_W1_ADDR(8, 0);
+ vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);
+
+ movq %rbp, %rsp;
+ popq %rbp;
+ ret;
+SYM_FUNC_END(sm3_transform_avx)
diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c
new file mode 100644
index 000000000000..661b6f22ffcd
--- /dev/null
+++ b/arch/x86/crypto/sm3_avx_glue.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 Secure Hash Algorithm, AVX assembler accelerated.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <asm/simd.h>
+
+asmlinkage void sm3_transform_avx(struct sm3_state *state,
+ const u8 *data, int nblocks);
+
+static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sm3_state *sctx = shash_desc_ctx(desc);
+
+ if (!crypto_simd_usable() ||
+ (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) {
+ sm3_update(sctx, data, len);
+ return 0;
+ }
+
+ /*
+ * Make sure struct sm3_state begins directly with the SM3
+ * 256-bit internal state, as this is what the asm functions expect.
+ */
+ BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
+
+ kernel_fpu_begin();
+ sm3_base_do_update(desc, data, len, sm3_transform_avx);
+ kernel_fpu_end();
+
+ return 0;
+}
+
+static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!crypto_simd_usable()) {
+ struct sm3_state *sctx = shash_desc_ctx(desc);
+
+ if (len)
+ sm3_update(sctx, data, len);
+
+ sm3_final(sctx, out);
+ return 0;
+ }
+
+ kernel_fpu_begin();
+ if (len)
+ sm3_base_do_update(desc, data, len, sm3_transform_avx);
+ sm3_base_do_finalize(desc, sm3_transform_avx);
+ kernel_fpu_end();
+
+ return sm3_base_finish(desc, out);
+}
+
+static int sm3_avx_final(struct shash_desc *desc, u8 *out)
+{
+ if (!crypto_simd_usable()) {
+ sm3_final(shash_desc_ctx(desc), out);
+ return 0;
+ }
+
+ kernel_fpu_begin();
+ sm3_base_do_finalize(desc, sm3_transform_avx);
+ kernel_fpu_end();
+
+ return sm3_base_finish(desc, out);
+}
+
+static struct shash_alg sm3_avx_alg = {
+ .digestsize = SM3_DIGEST_SIZE,
+ .init = sm3_base_init,
+ .update = sm3_avx_update,
+ .final = sm3_avx_final,
+ .finup = sm3_avx_finup,
+ .descsize = sizeof(struct sm3_state),
+ .base = {
+ .cra_name = "sm3",
+ .cra_driver_name = "sm3-avx",
+ .cra_priority = 300,
+ .cra_blocksize = SM3_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sm3_avx_mod_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX)) {
+ pr_info("AVX instruction are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_BMI2)) {
+ pr_info("BMI2 instruction are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return crypto_register_shash(&sm3_avx_alg);
+}
+
+static void __exit sm3_avx_mod_exit(void)
+{
+ crypto_unregister_shash(&sm3_avx_alg);
+}
+
+module_init(sm3_avx_mod_init);
+module_exit(sm3_avx_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated");
+MODULE_ALIAS_CRYPTO("sm3");
+MODULE_ALIAS_CRYPTO("sm3-avx");
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fd9f908debe5..e88791b420ee 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -181,6 +181,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ FIXED_EVENT_CONSTRAINT(0x0500, 4),
+ FIXED_EVENT_CONSTRAINT(0x0600, 5),
+ FIXED_EVENT_CONSTRAINT(0x0700, 6),
+ FIXED_EVENT_CONSTRAINT(0x0800, 7),
+ FIXED_EVENT_CONSTRAINT(0x0900, 8),
+ FIXED_EVENT_CONSTRAINT(0x0a00, 9),
+ FIXED_EVENT_CONSTRAINT(0x0b00, 10),
+ FIXED_EVENT_CONSTRAINT(0x0c00, 11),
+ FIXED_EVENT_CONSTRAINT(0x0d00, 12),
+ FIXED_EVENT_CONSTRAINT(0x0e00, 13),
+ FIXED_EVENT_CONSTRAINT(0x0f00, 14),
+ FIXED_EVENT_CONSTRAINT(0x1000, 15),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -4703,6 +4724,19 @@ static __initconst const struct x86_pmu intel_pmu = {
.lbr_read = intel_pmu_lbr_read_64,
.lbr_save = intel_pmu_lbr_save,
.lbr_restore = intel_pmu_lbr_restore,
+
+ /*
+ * SMM has access to all 4 rings and while traditionally SMM code only
+ * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
+ *
+ * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
+ * between SMM or not, this results in what should be pure userspace
+ * counters including SMM data.
+ *
+ * This is a clear privilege issue, therefore globally disable
+ * counting SMM by default.
+ */
+ .attr_freeze_on_smi = 1,
};
static __init void intel_clovertown_quirk(void)
@@ -6236,6 +6270,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
+
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6282,7 +6329,9 @@ __init int intel_pmu_init(void)
pr_cont("generic architected perfmon v1, ");
name = "generic_arch_v1";
break;
- default:
+ case 2:
+ case 3:
+ case 4:
/*
* default constraints for v2 and up
*/
@@ -6290,6 +6339,21 @@ __init int intel_pmu_init(void)
pr_cont("generic architected perfmon, ");
name = "generic_arch_v2+";
break;
+ default:
+ /*
+ * The default constraints for v5 and up can support up to
+ * 16 fixed counters. For the fixed counters 4 and later,
+ * the pseudo-encoding is applied.
+ * The constraints may be cut according to the CPUID enumeration
+ * by inserting the EVENT_CONSTRAINT_END.
+ */
+ if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
+ x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
+ x86_pmu.event_constraints = intel_v5_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
+ name = "generic_arch_v5+";
+ break;
}
}
@@ -6340,6 +6404,8 @@ __init int intel_pmu_init(void)
}
if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2e215369df4a..376cc3d66094 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1203,7 +1203,10 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
base = MSR_RELOAD_FIXED_CTR0;
idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
+ if (x86_pmu.intel_cap.pebs_format < 5)
+ value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
+ else
+ value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
}
wrmsrl(base + idx, value);
}
@@ -1232,8 +1235,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
}
}
- if (idx >= INTEL_PMC_IDX_FIXED)
- idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ if (idx >= INTEL_PMC_IDX_FIXED) {
+ if (x86_pmu.intel_cap.pebs_format < 5)
+ idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
+ else
+ idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ }
/*
* Use auto-reload if possible to save a MSR write in the PMI.
@@ -2204,6 +2211,7 @@ void __init intel_ds_init(void)
break;
case 4:
+ case 5:
x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
if (x86_pmu.intel_cap.pebs_baseline) {
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..fe1742c4ca49 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -8,14 +8,6 @@
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -305,11 +297,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support && x86_pmu.lbr_has_tsx;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
-
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1336,10 +1329,10 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = {
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
PERF_BR_SYSRET, /* X86_BR_SYSRET */
PERF_BR_UNKNOWN, /* X86_BR_INT */
- PERF_BR_UNKNOWN, /* X86_BR_IRET */
+ PERF_BR_ERET, /* X86_BR_IRET */
PERF_BR_COND, /* X86_BR_JCC */
PERF_BR_UNCOND, /* X86_BR_JMP */
- PERF_BR_UNKNOWN, /* X86_BR_IRQ */
+ PERF_BR_IRQ, /* X86_BR_IRQ */
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7f406c14715f..82ef87e9a897 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -13,6 +13,8 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
+#include <linux/bits.h>
+#include <linux/limits.h>
#include <linux/slab.h>
#include <linux/device.h>
@@ -57,6 +59,8 @@ static struct pt_cap_desc {
PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
+ PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)),
+ PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)),
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
@@ -108,6 +112,8 @@ PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(branch, "config:13" );
+PMU_FORMAT_ATTR(event, "config:31" );
+PMU_FORMAT_ATTR(notnt, "config:55" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
@@ -116,6 +122,8 @@ static struct attribute *pt_formats_attr[] = {
&format_attr_pt.attr,
&format_attr_cyc.attr,
&format_attr_pwr_evt.attr,
+ &format_attr_event.attr,
+ &format_attr_notnt.attr,
&format_attr_fup_on_ptw.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
@@ -296,6 +304,8 @@ fail:
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC | \
RTIT_CTL_PWR_EVT_EN | \
+ RTIT_CTL_EVENT_EN | \
+ RTIT_CTL_NOTNT | \
RTIT_CTL_FUP_ON_PTW | \
RTIT_CTL_PTW_EN)
@@ -350,6 +360,14 @@ static bool pt_event_valid(struct perf_event *event)
!intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
return false;
+ if (config & RTIT_CTL_EVENT_EN &&
+ !intel_pt_validate_hw_cap(PT_CAP_event_trace))
+ return false;
+
+ if (config & RTIT_CTL_NOTNT &&
+ !intel_pt_validate_hw_cap(PT_CAP_tnt_disable))
+ return false;
+
if (config & RTIT_CTL_PTW) {
if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
return false;
@@ -472,7 +490,7 @@ static u64 pt_config_filters(struct perf_event *event)
pt->filters.filter[range].msr_b = filter->msr_b;
}
- rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off;
}
return rtit_ctl;
@@ -897,8 +915,9 @@ static void pt_handle_status(struct pt *pt)
* means we are already losing data; need to let the decoder
* know.
*/
- if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == pt_buffer_region_size(buf)) {
+ if (!buf->single &&
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+ buf->output_off == pt_buffer_region_size(buf))) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
@@ -1347,10 +1366,26 @@ static void pt_addr_filters_fini(struct perf_event *event)
event->hw.addr_filters = NULL;
}
-static inline bool valid_kernel_ip(unsigned long ip)
+#ifdef CONFIG_X86_64
+/* Clamp to a canonical address greater-than-or-equal-to the address given */
+static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits)
+{
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ -BIT_ULL(vaddr_bits - 1);
+}
+
+/* Clamp to a canonical address less-than-or-equal-to the address given */
+static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits)
{
- return virt_addr_valid(ip) && kernel_ip(ip);
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ BIT_ULL(vaddr_bits - 1) - 1;
}
+#else
+#define clamp_to_ge_canonical_addr(x, y) (x)
+#define clamp_to_le_canonical_addr(x, y) (x)
+#endif
static int pt_event_addr_filters_validate(struct list_head *filters)
{
@@ -1366,14 +1401,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
- if (!filter->path.dentry) {
- if (!valid_kernel_ip(filter->offset))
- return -EINVAL;
-
- if (!valid_kernel_ip(filter->offset + filter->size))
- return -EINVAL;
- }
-
if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
return -EOPNOTSUPP;
}
@@ -1397,9 +1424,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
- /* apply the offset */
- msr_a = fr[range].start;
- msr_b = msr_a + fr[range].size - 1;
+ unsigned long n = fr[range].size - 1;
+ unsigned long a = fr[range].start;
+ unsigned long b;
+
+ if (a > ULONG_MAX - n)
+ b = ULONG_MAX;
+ else
+ b = a + n;
+ /*
+ * Apply the offset. 64-bit addresses written to the
+ * MSRs must be canonical, but the range can encompass
+ * non-canonical addresses. Since software cannot
+ * execute at non-canonical addresses, adjusting to
+ * canonical addresses does not affect the result of the
+ * address filter.
+ */
+ msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits);
+ msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits);
+ if (msr_b < msr_a)
+ msr_a = msr_b = 0;
}
filters->filter[range].msr_a = msr_a;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f1ba6ab2e97e..e497da9bf427 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
- .mmio_init = tgl_uncore_mmio_init,
+ .mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index b9687980aab6..2adeaf4de4df 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
-void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
+void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
+void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 3049c646fa20..5fd72d4b8bbb 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -215,10 +215,18 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
pci_read_config_dword(dev, bar_offset, &val);
- if (val & UNCORE_DISCOVERY_MASK)
+ if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64)
return -EINVAL;
- addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK);
+ addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK);
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ u32 val2;
+
+ pci_read_config_dword(dev, bar_offset + 4, &val2);
+ addr |= ((resource_size_t)val2) << 32;
+ }
+#endif
size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE;
io_addr = ioremap(addr, size);
if (!io_addr)
@@ -444,7 +452,7 @@ static struct intel_uncore_ops generic_uncore_pci_ops = {
#define UNCORE_GENERIC_MMIO_SIZE 0x4000
-static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
+static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
{
struct intel_uncore_type *type = box->pmu->type;
@@ -456,7 +464,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{
- unsigned int box_ctl = generic_uncore_mmio_box_ctl(box);
+ u64 box_ctl = generic_uncore_mmio_box_ctl(box);
struct intel_uncore_type *type = box->pmu->type;
resource_size_t addr;
@@ -494,8 +502,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6d735611c281..f4439357779a 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -18,8 +18,6 @@
#define UNCORE_DISCOVERY_BIR_BASE 0x10
/* Discovery table BAR step */
#define UNCORE_DISCOVERY_BIR_STEP 0x4
-/* Mask of the discovery table offset */
-#define UNCORE_DISCOVERY_MASK 0xf
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
@@ -139,6 +137,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 0f63706cdadf..f698a55bde81 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -64,6 +65,20 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
+#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
+#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
+#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
+#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
+#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
+#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
+#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
+#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
+#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
+#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
+#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
+#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
+#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
+#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -155,6 +170,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ }
};
@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32);
#endif
+ addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
}
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, 0);
+}
+
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void)
}
/* end of Tiger Lake MMIO uncore support */
+
+/* Alder Lake MMIO uncore support */
+#define ADL_UNCORE_IMC_BASE 0xd900
+#define ADL_UNCORE_IMC_MAP_SIZE 0x200
+#define ADL_UNCORE_IMC_CTR 0xe8
+#define ADL_UNCORE_IMC_CTRL 0xd0
+#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
+#define ADL_UNCORE_IMC_BOX_CTL 0xc4
+#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
+#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
+
+#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
+#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
+#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
+#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
+ ADL_UNCORE_IMC_CTL_RST_CTRS)
+
+static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
+
+ /* The global control in MC1 can control both MCs. */
+ if (box->io_addr && (box->pmu->pmu_idx == 1))
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
+}
+
+static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(0, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static struct intel_uncore_ops adl_uncore_mmio_ops = {
+ .init_box = adl_uncore_imc_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = adl_uncore_mmio_disable_box,
+ .enable_box = adl_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = intel_generic_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
+#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ ADL_UNC_CTL_CHMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET)
+
+static struct attribute *adl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_chmask.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static const struct attribute_group adl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = adl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type adl_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = ADL_UNCORE_IMC_CTR,
+ .event_ctl = ADL_UNCORE_IMC_CTRL,
+ .event_mask = ADL_UNC_IMC_EVENT_MASK,
+ .box_ctl = ADL_UNCORE_IMC_BOX_CTL,
+ .mmio_offset = 0,
+ .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
+ .ops = &adl_uncore_mmio_ops,
+ .format_group = &adl_uncore_imc_format_group,
+};
+
+enum perf_adl_uncore_imc_freerunning_types {
+ ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ ADL_MMIO_UNCORE_IMC_DATA_READ,
+ ADL_MMIO_UNCORE_IMC_DATA_WRITE,
+ ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters adl_uncore_imc_freerunning[] = {
+ [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
+};
+
+static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
+}
+
+static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
+ .init_box = adl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type adl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
+ .freerunning = adl_uncore_imc_freerunning,
+ .ops = &adl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *adl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void adl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = adl_mmio_uncores;
+}
+
+/* end of Alder Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3660f698fb2a..ed869443efb2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
+ .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9d376e528dfc..150261d929b9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -215,7 +215,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
+ LBR_FORMAT_INFO2 = 0x07,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
};
enum {
@@ -840,6 +841,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+ unsigned int lbr_has_info:1;
+ unsigned int lbr_has_tsx:1;
+ unsigned int lbr_from_flags:1;
+ unsigned int lbr_to_cycles:1;
+
/*
* Intel Architectural LBR CPUID Enumeration
*/
@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_init(void);
+
void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 85feafacc445..77e3a47af5ad 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+static struct perf_msr amd_rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
+ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
+ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
+ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
};
-
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 84b87538a15d..bab883c0b6fe 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
@@ -31,7 +31,8 @@ do { \
"\t.word %c1" "\t# bug_entry::line\n" \
"\t.word %c2" "\t# bug_entry::flags\n" \
"\t.org 2b+%c3\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (flags), \
"i" (sizeof(struct bug_entry))); \
@@ -39,14 +40,15 @@ do { \
#else /* !CONFIG_DEBUG_BUGVERBOSE */
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t.word %c0" "\t# bug_entry::flags\n" \
"\t.org 2b+%c1\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
@@ -55,7 +57,7 @@ do { \
#else
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
@@ -63,8 +65,8 @@ do { \
#define BUG() \
do { \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, 0); \
- unreachable(); \
+ _BUG_FLAGS(ASM_UD2, 0, ""); \
+ __builtin_unreachable(); \
} while (0)
/*
@@ -75,9 +77,9 @@ do { \
*/
#define __WARN_FLAGS(flags) \
do { \
+ __auto_type f = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
- annotate_reachable(); \
+ _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 000000000000..3d98c3a60d34
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+#include <asm/types.h>
+
+enum cc_vendor {
+ CC_VENDOR_NONE,
+ CC_VENDOR_AMD,
+ CC_VENDOR_HYPERV,
+ CC_VENDOR_INTEL,
+};
+
+void cc_set_vendor(enum cc_vendor v);
+void cc_set_mask(u64 mask);
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+#else
+static inline u64 cc_mkenc(u64 val)
+{
+ return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+ return val;
+}
+#endif
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d..3edf05e98e58 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -299,9 +299,6 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -330,6 +327,7 @@
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
@@ -390,7 +388,10 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 3afa990d756b..c5aed9e9226c 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu)
{
return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
#else
static __always_inline bool arch_cpu_online(int cpu)
{
return cpu == 0;
}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ return;
+}
#endif
#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 8f28fafa98b3..1231d63f836d 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,8 +56,11 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-/* Force disable because it's broken beyond repair */
-#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#ifdef CONFIG_INTEL_IOMMU_SVM
+# define DISABLE_ENQCMD 0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 05a6ab940f45..1b29f58f730f 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 8380c3ddd4b2..2f9eeb5c3069 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -7,8 +7,9 @@
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
-#define MAX_FIXED_PEBS_EVENTS 4
+#define MAX_PEBS_EVENTS_FMT4 8
+#define MAX_PEBS_EVENTS 32
+#define MAX_FIXED_PEBS_EVENTS 16
/*
* A debug store configuration.
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index ebe8d2ea44fe..c796e9bc98b6 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -13,6 +13,8 @@ enum pt_capabilities {
PT_CAP_mtc,
PT_CAP_ptwrite,
PT_CAP_power_event_trace,
+ PT_CAP_event_trace,
+ PT_CAP_tnt_disable,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 631d5040b31e..d39e0de06be2 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -82,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt)
KVM_X86_OP(load_eoi_exitmap)
KVM_X86_OP(set_virtual_apic_mode)
KVM_X86_OP_NULL(set_apic_access_page_addr)
-KVM_X86_OP(deliver_posted_interrupt)
+KVM_X86_OP(deliver_interrupt)
KVM_X86_OP_NULL(sync_pir_to_irr)
KVM_X86_OP(set_tss_addr)
KVM_X86_OP(set_identity_map_addr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1384517d7709..b61e46743184 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -498,6 +498,7 @@ struct kvm_pmc {
bool intr;
};
+#define KVM_PMC_MAX_FIXED 3
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
@@ -511,7 +512,7 @@ struct kvm_pmu {
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
+ struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
@@ -703,7 +704,6 @@ struct kvm_vcpu_arch {
struct fpu_guest guest_fpu;
u64 xcr0;
- u64 guest_supported_xcr0;
struct kvm_pio_request pio;
void *pio_data;
@@ -1410,7 +1410,8 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
- int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
@@ -1483,7 +1484,8 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
@@ -1496,6 +1498,7 @@ struct kvm_x86_ops {
};
struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
int (*check_events)(struct kvm_vcpu *vcpu);
bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
void (*triple_fault)(struct kvm_vcpu *vcpu);
@@ -1861,7 +1864,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3faf0f97edb1..9f1741ac4769 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -205,6 +205,8 @@
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_EVENT_EN BIT(31)
+#define RTIT_CTL_NOTNT BIT_ULL(55)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
@@ -476,6 +478,7 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
@@ -704,12 +707,14 @@
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
@@ -958,4 +963,8 @@
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cc74dc584836..acbaeaf83b61 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,7 +84,7 @@
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
#else
jmp *%\reg
#endif
@@ -94,7 +94,7 @@
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
__stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
#else
call *%\reg
#endif
@@ -146,7 +146,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -176,7 +176,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
@@ -188,9 +188,11 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_GENERIC,
- SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS_ENHANCED,
+ SPECTRE_V2_RETPOLINE,
+ SPECTRE_V2_LFENCE,
+ SPECTRE_V2_EIBRS,
+ SPECTRE_V2_EIBRS_RETPOLINE,
+ SPECTRE_V2_EIBRS_LFENCE,
};
/* The indirect branch speculation control variants */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 4d5810c8fab7..9cc82f305f4b 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -71,6 +71,16 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr))
+static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return __canonical_address(vaddr, vaddr_bits) == vaddr;
+}
+
#endif /* __ASSEMBLY__ */
#include <asm-generic/memory_model.h>
diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a69012e1903f..e1591467668e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void);
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(pv_ops.op))
+ [paravirt_opptr] "m" (pv_ops.op)
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -316,7 +316,7 @@ int paravirt_disable_iospace(void);
*/
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ "call *%[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc1b5003713..58d9e4b1fa0a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -7,7 +7,7 @@
*/
#define INTEL_PMC_MAX_GENERIC 32
-#define INTEL_PMC_MAX_FIXED 4
+#define INTEL_PMC_MAX_FIXED 16
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 8a9432fb3802..62ab07e24aef 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,17 +15,12 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
-
#ifndef __ASSEMBLY__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
+#include <asm/coco.h>
#include <asm-generic/pgtable_uffd.h>
#include <linux/page_table_check.h>
@@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
void ptdump_walk_pgd_level_checkwx(void);
void ptdump_walk_user_pgd_level_checkwx(void);
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
+
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx()
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index fe5efbcba824..5f6daea1ee24 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -108,16 +108,18 @@ static __always_inline bool should_resched(int preempt_offset)
extern asmlinkage void preempt_schedule(void);
extern asmlinkage void preempt_schedule_thunk(void);
-#define __preempt_schedule_func preempt_schedule_thunk
+#define preempt_schedule_dynamic_enabled preempt_schedule_thunk
+#define preempt_schedule_dynamic_disabled NULL
extern asmlinkage void preempt_schedule_notrace(void);
extern asmlinkage void preempt_schedule_notrace_thunk(void);
-#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_disabled NULL
#ifdef CONFIG_PREEMPT_DYNAMIC
-DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
+DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
#define __preempt_schedule() \
do { \
@@ -125,7 +127,7 @@ do { \
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
} while (0)
-DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
+DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
#define __preempt_schedule_notrace() \
do { \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2c5f12ae7d04..a87e7c33d5ac 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,8 @@ struct cpuinfo_x86 {
int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
+ /* protected processor identification number */
+ u64 ppin;
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 703663175a5a..4357e0f2cd5f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs)
#endif
}
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return (regs->flags & X86_VM_MASK);
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a..78ca53512486 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
int set_pages_array_wb(struct page **pages, int addrinarray);
/*
@@ -84,7 +83,6 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index b00dbc5fac2b..bb2fb78523ce 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -220,6 +220,42 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF)
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe. APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT 0xff
+
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
+
struct vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 2f0b6be8eaab..9619385bf749 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+#define topology_ppin(cpu) (cpu_data(cpu).ppin)
extern unsigned int __max_die_per_package;
@@ -215,15 +216,26 @@ extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
extern void arch_set_max_freq_ratio(bool turbo_disabled);
+void init_freq_invariance(bool secondary, bool cppc_ready);
#else
static inline void arch_set_max_freq_ratio(bool turbo_disabled)
{
}
+static inline void init_freq_invariance(bool secondary, bool cppc_ready)
+{
+}
#endif
-#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
+#ifdef CONFIG_ACPI_CPPC_LIB
void init_freq_invariance_cppc(void);
-#define init_freq_invariance_cppc init_freq_invariance_cppc
+#define arch_init_invariance_cppc init_freq_invariance_cppc
+
+bool amd_set_max_freq_ratio(u64 *ratio);
+#else
+static inline bool amd_set_max_freq_ratio(u64 *ratio)
+{
+ return false;
+}
#endif
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 22b7412c08f6..e9170457697e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,21 @@ struct x86_init_acpi {
};
/**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
+ */
+struct x86_guest {
+ void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_tlb_flush_required)(bool enc);
+ bool (*enc_cache_flush_required)(void);
+};
+
+/**
* struct x86_init_ops - functions for platform specific setup
*
*/
@@ -287,6 +302,7 @@ struct x86_platform_ops {
struct x86_legacy_features legacy;
void (*set_legacy_features)(void);
struct x86_hyper_runtime hyper;
+ struct x86_guest guest;
};
struct x86_apic_ops {
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..78e667a31d6c 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -100,6 +100,13 @@
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+/*
+ * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
+ * used to store high bits for the Destination ID. This expands the Destination
+ * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
/*
* Leaf 6 (0x40000x05)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 1bf2ad34188a..16f548a661cf 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -43,20 +43,6 @@ static inline uint32_t xen_cpuid_base(void)
return hypervisor_cpuid_base("XenVMMXenVMM", 2);
}
-#ifdef CONFIG_XEN
-extern bool __init xen_hvm_need_lapic(void);
-
-static inline bool __init xen_x2apic_para_available(void)
-{
- return xen_hvm_need_lapic();
-}
-#else
-static inline bool __init xen_x2apic_para_available(void)
-{
- return (xen_cpuid_base() != 0);
-}
-#endif
-
struct pci_dev;
#ifdef CONFIG_XEN_PV_DOM0
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 2ee95a7769e6..7b0307acc410 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -57,7 +57,8 @@
op(i + 3, 3)
static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
@@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 67ceb790e639..7a6b9474591e 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -21,7 +21,8 @@
#include <asm/fpu/api.h>
static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 7;
@@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 7;
@@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 7;
@@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 7;
@@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
#undef BLOCK
static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 6;
@@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 6;
@@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 6;
@@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 6;
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 0c4e5b5e3852..7f81dd5897f4 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -26,7 +26,8 @@
BLOCK4(8) \
BLOCK4(12)
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1)
{
unsigned long lines = bytes >> 9;
@@ -52,8 +53,9 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 9;
@@ -82,8 +84,10 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 9;
@@ -115,8 +119,11 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 9;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2da3316bb559..bf6e96011dfe 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6aef9ee28a39..6462e3dd98f4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
CFLAGS_REMOVE_sev.o = -pg
-CFLAGS_REMOVE_cc_platform.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
@@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
KASAN_SANITIZE_sev.o := n
-KASAN_SANITIZE_cc_platform.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
@@ -49,7 +47,6 @@ endif
KCOV_INSTRUMENT := n
CFLAGS_head$(BITS).o += -fno-stack-protector
-CFLAGS_cc_platform.o += -fno-stack-protector
CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
@@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
-obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
-
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index cf340d85946a..fc17b3f136fe 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -3,7 +3,7 @@
obj-$(CONFIG_ACPI) += boot.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
obj-$(CONFIG_ACPI_APEI) += apei.o
-obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_msr.o
+obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5b6d1a95776f..0d01e7f5078c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d)
return 0;
}
+static int __init disable_acpi_xsdt(const struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ pr_notice("%s detected: force use of acpi=rsdt\n", d->ident);
+ acpi_gbl_do_not_use_xsdt = TRUE;
+ } else {
+ pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n");
+ }
+ return 0;
+}
+
static int __init dmi_disable_acpi(const struct dmi_system_id *d)
{
if (!acpi_force) {
@@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
},
},
+ /*
+ * Boxes that need ACPI XSDT use disabled due to corrupted tables
+ */
+ {
+ .callback = disable_acpi_xsdt,
+ .ident = "Advantech DAC-BJ01",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"),
+ DMI_MATCH(DMI_BIOS_VERSION, "V1.12"),
+ DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"),
+ },
+ },
{}
};
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
new file mode 100644
index 000000000000..df1644d9b3b6
--- /dev/null
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cppc.c: CPPC Interface for x86
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+#include <acpi/cppc_acpi.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/topology.h>
+
+/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+
+bool cpc_ffh_supported(void)
+{
+ return true;
+}
+
+int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+ int err;
+
+ err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
+ if (!err) {
+ u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+
+ *val &= mask;
+ *val >>= reg->bit_offset;
+ }
+ return err;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+ u64 rd_val;
+ int err;
+
+ err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
+ if (!err) {
+ u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+
+ val <<= reg->bit_offset;
+ val &= mask;
+ rd_val &= ~mask;
+ rd_val |= val;
+ err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
+ }
+ return err;
+}
+
+bool amd_set_max_freq_ratio(u64 *ratio)
+{
+ struct cppc_perf_caps perf_caps;
+ u64 highest_perf, nominal_perf;
+ u64 perf_ratio;
+ int rc;
+
+ if (!ratio)
+ return false;
+
+ rc = cppc_get_perf_caps(0, &perf_caps);
+ if (rc) {
+ pr_debug("Could not retrieve perf counters (%d)\n", rc);
+ return false;
+ }
+
+ highest_perf = amd_get_highest_perf();
+ nominal_perf = perf_caps.nominal_perf;
+
+ if (!highest_perf || !nominal_perf) {
+ pr_debug("Could not retrieve highest or nominal performance\n");
+ return false;
+ }
+
+ perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+ /* midpoint between max_boost and max_P */
+ perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+ if (!perf_ratio) {
+ pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+ return false;
+ }
+
+ *ratio = perf_ratio;
+ arch_set_max_freq_ratio(false);
+
+ return true;
+}
+
+static DEFINE_MUTEX(freq_invariance_lock);
+
+void init_freq_invariance_cppc(void)
+{
+ static bool secondary;
+
+ mutex_lock(&freq_invariance_lock);
+
+ init_freq_invariance(secondary, true);
+ secondary = true;
+
+ mutex_unlock(&freq_invariance_lock);
+}
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c
deleted file mode 100644
index b961de569e7e..000000000000
--- a/arch/x86/kernel/acpi/cppc_msr.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * cppc_msr.c: MSR Interface for CPPC
- * Copyright (c) 2016, Intel Corporation.
- */
-
-#include <acpi/cppc_acpi.h>
-#include <asm/msr.h>
-
-/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
-
-bool cpc_ffh_supported(void)
-{
- return true;
-}
-
-int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
-{
- int err;
-
- err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
- if (!err) {
- u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
- reg->bit_offset);
-
- *val &= mask;
- *val >>= reg->bit_offset;
- }
- return err;
-}
-
-int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
-{
- u64 rd_val;
- int err;
-
- err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
- if (!err) {
- u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
- reg->bit_offset);
-
- val <<= reg->bit_offset;
- val &= mask;
- rd_val &= ~mask;
- rd_val |= val;
- err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
- }
- return err;
-}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1e97f944b47d..3b7f4cdbf2e0 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -15,6 +15,7 @@
#include <asm/desc.h>
#include <asm/cacheflush.h>
#include <asm/realmode.h>
+#include <asm/hypervisor.h>
#include <linux/ftrace.h>
#include "../../realmode/rm/wakeup.h"
@@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str)
acpi_realmode_flags |= 4;
#ifdef CONFIG_HIBERNATION
if (strncmp(str, "s4_hwsig", 8) == 0)
- acpi_check_s4_hw_signature(1);
+ acpi_check_s4_hw_signature = 1;
if (strncmp(str, "s4_nohwsig", 10) == 0)
- acpi_check_s4_hw_signature(0);
+ acpi_check_s4_hw_signature = 0;
#endif
if (strncmp(str, "nonvs", 5) == 0)
acpi_nvs_nosave();
@@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str)
}
__setup("acpi_sleep=", acpi_sleep_setup);
+
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST)
+static int __init init_s4_sigcheck(void)
+{
+ /*
+ * If running on a hypervisor, honour the ACPI specification
+ * by default and trigger a clean reboot when the hardware
+ * signature in FACS is changed after hibernation.
+ */
+ if (acpi_check_s4_hw_signature == -1 &&
+ !hypervisor_is_type(X86_HYPER_NATIVE))
+ acpi_check_s4_hw_signature = 1;
+
+ return 0;
+}
+/* This must happen before acpi_init() which is a subsys initcall */
+arch_initcall(init_s4_sigcheck);
+#endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..b4470eabf151 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -389,7 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes)
*
* CALL *%\reg
*
- * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ * It also tries to inline spectre_v2=retpoline,lfence when size permits.
*/
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
{
@@ -407,7 +407,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
- !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
return -1;
op = insn->opcode.bytes[0];
@@ -438,9 +438,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
}
/*
- * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+ * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
*/
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
bytes[i++] = 0x0f;
bytes[i++] = 0xae;
bytes[i++] = 0xe8; /* LFENCE */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4edb6f0f628c..0c0b09796ced 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id;
}
-static void amd_detect_ppin(struct cpuinfo_x86 *c)
-{
- unsigned long long val;
-
- if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
- return;
-
- /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
- if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
- goto clear_ppin;
-
- /* PPIN is locked in disabled mode, clear feature bit */
- if ((val & 3UL) == 1UL)
- goto clear_ppin;
-
- /* If PPIN is disabled, try to enable it */
- if (!(val & 2UL)) {
- wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
- rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
- }
-
- /* If PPIN_EN bit is 1, return from here; otherwise fall through */
- if (val & 2UL)
- return;
-
-clear_ppin:
- clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
-}
-
u32 amd_get_nodes_per_socket(void)
{
return nodes_per_socket;
@@ -585,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
* the SME physical address space reduction value.
* If BIOS has not enabled SME then don't advertise the
* SME feature (set in scattered.c).
+ * If the kernel has not enabled SME via any means then
+ * don't advertise the SME feature.
* For SEV: If BIOS has not enabled SEV then don't advertise the
* SEV and SEV_ES feature (set in scattered.c).
*
@@ -607,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
if (IS_ENABLED(CONFIG_X86_32))
goto clear_all;
+ if (!sme_me_mask)
+ setup_clear_cpu_cap(X86_FEATURE_SME);
+
rdmsrl(MSR_K7_HWCR, msr);
if (!(msr & MSR_K7_HWCR_SMMLOCK))
goto clear_sev;
@@ -947,7 +923,6 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
- amd_detect_ppin(c);
init_amd_cacheinfo(c);
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 22911deacb6e..9ca008f9e9b1 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -91,7 +91,7 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
- if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
return 0;
if (rcu_is_idle_cpu(cpu))
@@ -114,7 +114,7 @@ void arch_freq_prepare_all(void)
return;
for_each_online_cpu(cpu) {
- if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
continue;
if (rcu_is_idle_cpu(cpu))
continue; /* Idle CPUs are completely uninteresting. */
@@ -136,7 +136,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
- if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
return 0;
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1c1f218a701d..6296e1ebed1d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,6 +16,7 @@
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <linux/pgtable.h>
+#include <linux/bpf.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
+#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
+#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+
+#ifdef CONFIG_BPF_SYSCALL
+void unpriv_ebpf_notify(int new_state)
+{
+ if (new_state)
+ return;
+
+ /* Unprivileged eBPF is enabled */
+
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_EIBRS:
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+ break;
+ case SPECTRE_V2_EIBRS_LFENCE:
+ if (sched_smt_active())
+ pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+ break;
+ default:
+ break;
+ }
+}
+#endif
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
@@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
- SPECTRE_V2_CMD_RETPOLINE_AMD,
+ SPECTRE_V2_CMD_RETPOLINE_LFENCE,
+ SPECTRE_V2_CMD_EIBRS,
+ SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+ SPECTRE_V2_CMD_EIBRS_LFENCE,
};
enum spectre_v2_user_cmd {
@@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return (mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE);
+}
+
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
@@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return;
/*
@@ -824,9 +861,11 @@ set_mode:
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+ [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
};
static const struct {
@@ -837,8 +876,12 @@ static const struct {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
+ { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "eibrs", SPECTRE_V2_CMD_EIBRS, false },
+ { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
+ { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
@@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_EIBRS ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
+ !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
+ mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return cmd;
}
+static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
+{
+ if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("Kernel not compiled with retpoline; no mitigation available!");
+ return SPECTRE_V2_NONE;
+ }
+
+ return SPECTRE_V2_RETPOLINE;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- mode = SPECTRE_V2_IBRS_ENHANCED;
- /* Force it so VMEXIT will restore correctly */
- x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- goto specv2_set_mode;
+ mode = SPECTRE_V2_EIBRS;
+ break;
}
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+
+ mode = spectre_v2_select_retpoline();
break;
- case SPECTRE_V2_CMD_RETPOLINE_AMD:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_amd;
+
+ case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
+ pr_err(SPECTRE_V2_LFENCE_MSG);
+ mode = SPECTRE_V2_LFENCE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_generic;
+ mode = SPECTRE_V2_RETPOLINE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+ mode = spectre_v2_select_retpoline();
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS:
+ mode = SPECTRE_V2_EIBRS;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_LFENCE:
+ mode = SPECTRE_V2_EIBRS_LFENCE;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
+ mode = SPECTRE_V2_EIBRS_RETPOLINE;
break;
}
- pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
- return;
-retpoline_auto:
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- retpoline_amd:
- if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
- pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
- goto retpoline_generic;
- }
- mode = SPECTRE_V2_RETPOLINE_AMD;
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
- } else {
- retpoline_generic:
- mode = SPECTRE_V2_RETPOLINE_GENERIC;
+ if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+ if (spectre_v2_in_eibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ }
+
+ switch (mode) {
+ case SPECTRE_V2_NONE:
+ case SPECTRE_V2_EIBRS:
+ break;
+
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+ fallthrough;
+
+ case SPECTRE_V2_RETPOLINE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ break;
}
-specv2_set_mode:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -975,7 +1063,7 @@ specv2_set_mode:
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
@@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
@@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1714,6 +1806,27 @@ static char *ibpb_state(void)
return "";
}
+static ssize_t spectre_v2_show_state(char *buf)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+ return sprintf(buf, "Vulnerable: LFENCE\n");
+
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+
+ return sprintf(buf, "%s%s%s%s%s%s\n",
+ spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ stibp_state(),
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ spectre_v2_module_string());
+}
+
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
@@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
- return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
- spectre_v2_module_string());
+ return spectre_v2_show_state(buf);
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7b8382c11788..64deb7727d00 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -88,6 +88,83 @@ EXPORT_SYMBOL_GPL(get_llc_id);
/* L2 cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
+static struct ppin_info {
+ int feature;
+ int msr_ppin_ctl;
+ int msr_ppin;
+} ppin_info[] = {
+ [X86_VENDOR_INTEL] = {
+ .feature = X86_FEATURE_INTEL_PPIN,
+ .msr_ppin_ctl = MSR_PPIN_CTL,
+ .msr_ppin = MSR_PPIN
+ },
+ [X86_VENDOR_AMD] = {
+ .feature = X86_FEATURE_AMD_PPIN,
+ .msr_ppin_ctl = MSR_AMD_PPIN_CTL,
+ .msr_ppin = MSR_AMD_PPIN
+ },
+};
+
+static const struct x86_cpu_id ppin_cpuids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
+ X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]),
+
+ /* Legacy models without CPUID enumeration */
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
+
+ {}
+};
+
+static void ppin_init(struct cpuinfo_x86 *c)
+{
+ const struct x86_cpu_id *id;
+ unsigned long long val;
+ struct ppin_info *info;
+
+ id = x86_match_cpu(ppin_cpuids);
+ if (!id)
+ return;
+
+ /*
+ * Testing the presence of the MSR is not enough. Need to check
+ * that the PPIN_CTL allows reading of the PPIN.
+ */
+ info = (struct ppin_info *)id->driver_data;
+
+ if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+ goto clear_ppin;
+
+ if ((val & 3UL) == 1UL) {
+ /* PPIN locked in disabled mode */
+ goto clear_ppin;
+ }
+
+ /* If PPIN is disabled, try to enable */
+ if (!(val & 2UL)) {
+ wrmsrl_safe(info->msr_ppin_ctl, val | 2UL);
+ rdmsrl_safe(info->msr_ppin_ctl, &val);
+ }
+
+ /* Is the enable bit set? */
+ if (val & 2UL) {
+ c->ppin = __rdmsr(info->msr_ppin);
+ set_cpu_cap(c, info->feature);
+ return;
+ }
+
+clear_ppin:
+ clear_cpu_cap(c, info->feature);
+}
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -1655,6 +1732,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
}
+ ppin_init(c);
+
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index a1e2f41796dc..1940d305db1c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr)
u32 hi, lo;
/* sysfs write might race against an offline operation */
- if (this_cpu_read(threshold_banks))
+ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
return;
rdmsr(tr->b->address, lo, hi);
@@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = {
NULL, /* possibly interrupt_enable if supported, see below */
NULL,
};
+ATTRIBUTE_GROUPS(default);
#define to_block(k) container_of(k, struct threshold_block, kobj)
#define to_attr(a) container_of(a, struct threshold_attr, attr)
@@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj);
static struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
- .default_attrs = default_attrs,
+ .default_groups = default_groups,
.release = threshold_block_release,
};
@@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable) {
- threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+ default_attrs[2] = &interrupt_enable.attr;
b->interrupt_enable = 1;
} else {
- threshold_ktype.default_attrs[2] = NULL;
+ default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..e53d3e6096fb 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -138,12 +138,7 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
-
- if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
- m->ppin = __rdmsr(MSR_PPIN);
- else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
- m->ppin = __rdmsr(MSR_AMD_PPIN);
-
+ m->ppin = cpu_data(m->extcpu).ppin;
m->microcode = boot_cpu_data.microcode;
}
@@ -1304,10 +1299,12 @@ static void kill_me_maybe(struct callback_head *cb)
/*
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
- * to the current process with the proper error info, so no need to
- * send SIGBUS here again.
+ * to the current process with the proper error info,
+ * -EOPNOTSUPP means hwpoison_filter() filtered the error event,
+ *
+ * In both cases, no further processing is required.
*/
- if (ret == -EHWPOISON)
+ if (ret == -EHWPOISON || ret == -EOPNOTSUPP)
return;
pr_err("Memory error not recovered");
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index bb9a46a804bf..95275a5e57e0 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -470,46 +470,6 @@ void intel_clear_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
}
-static void intel_ppin_init(struct cpuinfo_x86 *c)
-{
- unsigned long long val;
-
- /*
- * Even if testing the presence of the MSR would be enough, we don't
- * want to risk the situation where other models reuse this MSR for
- * other purposes.
- */
- switch (c->x86_model) {
- case INTEL_FAM6_IVYBRIDGE_X:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
-
- if (rdmsrl_safe(MSR_PPIN_CTL, &val))
- return;
-
- if ((val & 3UL) == 1UL) {
- /* PPIN locked in disabled mode */
- return;
- }
-
- /* If PPIN is disabled, try to enable */
- if (!(val & 2UL)) {
- wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
- rdmsrl_safe(MSR_PPIN_CTL, &val);
- }
-
- /* Is the enable bit set? */
- if (val & 2UL)
- set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
- }
-}
-
/*
* Enable additional error logs from the integrated
* memory controller on processors that support this.
@@ -534,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_cmci();
intel_init_lmce();
- intel_ppin_init(c);
intel_imc_init(c);
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 5a99f993e639..e0a572472052 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,6 +33,7 @@
#include <asm/nmi.h>
#include <clocksource/hyperv_timer.h>
#include <asm/numa.h>
+#include <asm/coco.h>
/* Is Linux running as the root partition? */
bool hv_root_partition;
@@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void)
*/
swiotlb_force = SWIOTLB_FORCE;
#endif
+ /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+ if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
+ cc_set_vendor(CC_VENDOR_HYPERV);
+ }
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 21d1f062895a..4143b1e4c5c6 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,7 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..7c63a1911fae 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -13,6 +13,30 @@
#include "sgx.h"
/*
+ * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
+ * follow right after the EPC data in the backing storage. In addition to the
+ * visible enclave pages, there's one extra page slot for SECS, before PCMD
+ * structs.
+ */
+static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
+ unsigned long page_index)
+{
+ pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
+
+ return epc_end_off + page_index * sizeof(struct sgx_pcmd);
+}
+
+/*
+ * Free a page from the backing storage in the given page index.
+ */
+static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
+{
+ struct inode *inode = file_inode(encl->backing);
+
+ shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
+}
+
+/*
* ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
* Pages" in the SDM.
*/
@@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
{
unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
struct sgx_encl *encl = encl_page->encl;
+ pgoff_t page_index, page_pcmd_off;
struct sgx_pageinfo pginfo;
struct sgx_backing b;
- pgoff_t page_index;
+ bool pcmd_page_empty;
+ u8 *pcmd_page;
int ret;
if (secs_page)
@@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
else
page_index = PFN_DOWN(encl->size);
+ page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+
ret = sgx_encl_get_backing(encl, page_index, &b);
if (ret)
return ret;
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) +
- b.pcmd_offset;
+ pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
if (secs_page)
pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
@@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
ret = -EFAULT;
}
- kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
+ memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
+
+ /*
+ * The area for the PCMD in the page was zeroed above. Check if the
+ * whole page is now empty meaning that all PCMD's have been zeroed:
+ */
+ pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
+
+ kunmap_atomic(pcmd_page);
kunmap_atomic((void *)(unsigned long)pginfo.contents);
sgx_encl_put_backing(&b, false);
+ sgx_encl_truncate_backing_page(encl, page_index);
+
+ if (pcmd_page_empty)
+ sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
+
return ret;
}
@@ -410,6 +451,8 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
+ /* Invoke scheduler to prevent soft lockups. */
+ cond_resched();
}
xa_destroy(&encl->page_array);
@@ -577,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing)
{
- pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5);
+ pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
struct page *contents;
struct page *pcmd;
@@ -585,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
if (IS_ERR(contents))
return PTR_ERR(contents);
- pcmd = sgx_encl_get_backing_page(encl, pcmd_index);
+ pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
if (IS_ERR(pcmd)) {
put_page(contents);
return PTR_ERR(pcmd);
@@ -594,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
backing->page_index = page_index;
backing->contents = contents;
backing->pcmd = pcmd;
- backing->pcmd_offset =
- (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) *
- sizeof(struct sgx_pcmd);
+ backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
return 0;
}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 4b41efc9e367..8e4bc6453d26 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void)
{
struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
- struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
- struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -418,13 +416,7 @@ skip:
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
- section = &sgx_epc_sections[epc_page->section];
- node = section->node;
-
- spin_lock(&node->lock);
- list_add_tail(&epc_page->list, &node->free_page_list);
- spin_unlock(&node->lock);
- atomic_long_inc(&sgx_nr_free_pages);
+ sgx_free_epc_page(epc_page);
}
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index bc0657f0deed..f267205f2d5a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt);
*/
void __init e820__reserve_setup_data(void)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
+ u32 len;
pa_data = boot_params.hdr.setup_data;
if (!pa_data)
@@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void)
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
+ if (!data) {
+ pr_warn("e820: failed to memremap setup_data entry\n");
+ return;
+ }
+
+ len = sizeof(*data);
+ pa_next = data->next;
+
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
@@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void)
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- e820__range_update(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ if (data->type == SETUP_INDIRECT) {
+ len += data->len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap(pa_data, len);
+ if (!data) {
+ pr_warn("e820: failed to memremap indirect setup_data\n");
+ return;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ e820__range_update(indirect->addr, indirect->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ e820__range_update_kexec(indirect->addr, indirect->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ }
}
- pa_data = data->next;
- early_memunmap(data, sizeof(*data));
+ pa_data = pa_next;
+ early_memunmap(data, len);
}
e820__update_table(e820_table);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8dea01ffc5c1..19821f027cb3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
fpu_inherit_perms(dst_fpu);
fpregs_unlock();
+ /*
+ * Children never inherit PASID state.
+ * Force it to have its init value:
+ */
+ if (use_xsave())
+ dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
+
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 437d7c930c0b..75ffaef8c299 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
- struct user32_fxsr_struct newstate;
+ struct fxregs_state newstate;
int ret;
- BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
-
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
/* Copy the state */
memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
- /* Clear xmm8..15 */
+ /* Clear xmm8..15 for 32-bit callers */
BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
- memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
+ if (in_ia32_syscall())
+ memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02b3ddaf4f75..7c7824ae7862 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
+ if (!guest_fpu)
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de563db9cdcd..4f5ecbbaae77 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr)
}
#endif
-static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
+static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
{
unsigned long vaddr, vaddr_end;
int i;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 64b6da95af98..e2e89bebcbc3 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no,
static int __init create_setup_data_nodes(struct dentry *parent)
{
+ struct setup_indirect *indirect;
struct setup_data_node *node;
struct setup_data *data;
- int error;
+ u64 pa_data, pa_next;
struct dentry *d;
- u64 pa_data;
+ int error;
+ u32 len;
int no = 0;
d = debugfs_create_dir("setup_data", parent);
@@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent)
error = -ENOMEM;
goto err_dir;
}
-
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- node->paddr = ((struct setup_indirect *)data->data)->addr;
- node->type = ((struct setup_indirect *)data->data)->type;
- node->len = ((struct setup_indirect *)data->data)->len;
+ pa_next = data->next;
+
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(pa_data, len, MEMREMAP_WB);
+ if (!data) {
+ kfree(node);
+ error = -ENOMEM;
+ goto err_dir;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ node->paddr = indirect->addr;
+ node->type = indirect->type;
+ node->len = indirect->len;
+ } else {
+ node->paddr = pa_data;
+ node->type = data->type;
+ node->len = data->len;
+ }
} else {
node->paddr = pa_data;
node->type = data->type;
@@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
}
create_setup_data_node(d, no, node);
- pa_data = data->next;
+ pa_data = pa_next;
memunmap(data);
no++;
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index d0a19121c6a4..257892fcefa7 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
static int __init get_setup_data_size(int nr, size_t *size)
{
- int i = 0;
+ u64 pa_data = boot_params.hdr.setup_data, pa_next;
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data = boot_params.hdr.setup_data;
+ int i = 0;
+ u32 len;
while (pa_data) {
data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
+ pa_next = data->next;
+
if (nr == i) {
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
- *size = ((struct setup_indirect *)data->data)->len;
- else
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(pa_data, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT)
+ *size = indirect->len;
+ else
+ *size = data->len;
+ } else {
*size = data->len;
+ }
memunmap(data);
return 0;
}
- pa_data = data->next;
+ pa_data = pa_next;
memunmap(data);
i++;
}
@@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size)
static ssize_t type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
+ struct setup_indirect *indirect;
+ struct setup_data *data;
int nr, ret;
u64 paddr;
- struct setup_data *data;
+ u32 len;
ret = kobj_to_setup_data_nr(kobj, &nr);
if (ret)
@@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj,
if (!data)
return -ENOMEM;
- if (data->type == SETUP_INDIRECT)
- ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
- else
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(paddr, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ ret = sprintf(buf, "0x%x\n", indirect->type);
+ } else {
ret = sprintf(buf, "0x%x\n", data->type);
+ }
+
memunmap(data);
return ret;
}
@@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp,
char *buf,
loff_t off, size_t count)
{
+ struct setup_indirect *indirect;
+ struct setup_data *data;
int nr, ret = 0;
u64 paddr, len;
- struct setup_data *data;
void *p;
ret = kobj_to_setup_data_nr(kobj, &nr);
@@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp,
if (!data)
return -ENOMEM;
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- paddr = ((struct setup_indirect *)data->data)->addr;
- len = ((struct setup_indirect *)data->data)->len;
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(paddr, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ } else {
+ /*
+ * Even though this is technically undefined, return
+ * the data as though it is a normal setup_data struct.
+ * This will at least allow it to be inspected.
+ */
+ paddr += sizeof(*data);
+ len = data->len;
+ }
} else {
paddr += sizeof(*data);
len = data->len;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a438217cbfac..d77481ecb0d5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
static bool pv_ipi_supported(void)
{
- return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
+ (num_possible_cpus() != 1));
}
static bool pv_sched_yield_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
- if (vcpu_is_preempted(cpu)) {
+ if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
break;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index a35cbf9107af..c5caa7311bd8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void)
{
+ if (!kvm_para_available() || !kvmclock)
+ return 0;
+
kvmclock_init_mem();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 95fa745e310a..96d7c27b7093 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -273,6 +273,14 @@ int module_finalize(const Elf_Ehdr *hdr,
retpolines = s;
}
+ /*
+ * See alternative_instructions() for the ordering rules between the
+ * various patching types.
+ */
+ if (para) {
+ void *pseg = (void *)para->sh_addr;
+ apply_paravirt(pseg, pseg + para->sh_size);
+ }
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
@@ -290,11 +298,6 @@ int module_finalize(const Elf_Ehdr *hdr,
tseg, tseg + text->sh_size);
}
- if (para) {
- void *pseg = (void *)para->sh_addr;
- apply_paravirt(pseg, pseg + para->sh_size);
- }
-
/* make jump label nops */
jump_label_apply_nops(me);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4bce802d25fb..e73f7df362f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -292,7 +292,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
reason, smp_processor_id());
- pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637..e131d71b3cae 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy)
* without the encryption bit, they don't race each other when flushed
* and potentially end up with the wrong entry being committed to
* memory.
+ *
+ * Test the CPUID bit directly because the machine might've cleared
+ * X86_FEATURE_SME due to cmdline options.
*/
- if (boot_cpu_has(X86_FEATURE_SME))
+ if (cpuid_eax(0x8000001f) & BIT(0))
native_wbinvd();
for (;;) {
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6d2244c94799..8d2f2f995539 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(long),
+ .n = sizeof(struct fxregs_state) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
@@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = {
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
- .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
+ .n = sizeof(struct fxregs_state) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 9ae64f9af956..9b9fb7882c20 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dmi.h>
#include <linux/ioport.h>
#include <asm/e820/api.h>
@@ -24,31 +23,11 @@ static void resource_clip(struct resource *res, resource_size_t start,
res->start = end + 1;
}
-/*
- * Some BIOS-es contain a bug where they add addresses which map to
- * system RAM in the PCI host bridge window returned by the ACPI _CRS
- * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when
- * allocating address space"). To avoid this Linux by default excludes
- * E820 reservations when allocating addresses since 2010.
- * In 2019 some systems have shown-up with E820 reservations which cover
- * the entire _CRS returned PCI host bridge window, causing all attempts
- * to assign memory to PCI BARs to fail if Linux uses E820 reservations.
- *
- * Ideally Linux would fully stop using E820 reservations, but then
- * the old systems this was added for will regress.
- * Instead keep the old behavior for old systems, while ignoring the
- * E820 reservations for any systems from now on.
- */
static void remove_e820_regions(struct resource *avail)
{
- int i, year = dmi_get_bios_year();
+ int i;
struct e820_entry *entry;
- if (year >= 2018)
- return;
-
- pr_info_once("PCI: Removing E820 reservations from host bridge windows\n");
-
for (i = 0; i < e820_table->nr_entries; i++) {
entry = &e820_table->entries[i];
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f7a132eb794d..90d7e1788c91 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -369,21 +369,41 @@ static void __init parse_setup_data(void)
static void __init memblock_x86_reserve_range_setup_data(void)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
+ u32 len;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
+ if (!data) {
+ pr_warn("setup: failed to memremap setup_data entry\n");
+ return;
+ }
+
+ len = sizeof(*data);
+ pa_next = data->next;
+
memblock_reserve(pa_data, sizeof(*data) + data->len);
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
- memblock_reserve(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len);
+ if (data->type == SETUP_INDIRECT) {
+ len += data->len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap(pa_data, len);
+ if (!data) {
+ pr_warn("setup: failed to memremap indirect setup_data\n");
+ return;
+ }
- pa_data = data->next;
- early_memunmap(data, sizeof(*data));
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT)
+ memblock_reserve(indirect->addr, indirect->len);
+ }
+
+ pa_data = pa_next;
+ early_memunmap(data, len);
}
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 617012f4619f..2ef14772dc04 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -83,10 +83,6 @@
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>
-#ifdef CONFIG_ACPI_CPPC_LIB
-#include <acpi/cppc_acpi.h>
-#endif
-
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -155,8 +151,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
-static void init_freq_invariance(bool secondary, bool cppc_ready);
-
/*
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
@@ -2097,48 +2091,6 @@ out:
return true;
}
-#ifdef CONFIG_ACPI_CPPC_LIB
-static bool amd_set_max_freq_ratio(void)
-{
- struct cppc_perf_caps perf_caps;
- u64 highest_perf, nominal_perf;
- u64 perf_ratio;
- int rc;
-
- rc = cppc_get_perf_caps(0, &perf_caps);
- if (rc) {
- pr_debug("Could not retrieve perf counters (%d)\n", rc);
- return false;
- }
-
- highest_perf = amd_get_highest_perf();
- nominal_perf = perf_caps.nominal_perf;
-
- if (!highest_perf || !nominal_perf) {
- pr_debug("Could not retrieve highest or nominal performance\n");
- return false;
- }
-
- perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
- /* midpoint between max_boost and max_P */
- perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
- if (!perf_ratio) {
- pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
- return false;
- }
-
- arch_turbo_freq_ratio = perf_ratio;
- arch_set_max_freq_ratio(false);
-
- return true;
-}
-#else
-static bool amd_set_max_freq_ratio(void)
-{
- return false;
-}
-#endif
-
static void init_counter_refs(void)
{
u64 aperf, mperf;
@@ -2167,7 +2119,7 @@ static void register_freq_invariance_syscore_ops(void)
static inline void register_freq_invariance_syscore_ops(void) {}
#endif
-static void init_freq_invariance(bool secondary, bool cppc_ready)
+void init_freq_invariance(bool secondary, bool cppc_ready)
{
bool ret = false;
@@ -2187,7 +2139,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
if (!cppc_ready) {
return;
}
- ret = amd_set_max_freq_ratio();
+ ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
}
if (ret) {
@@ -2200,22 +2152,6 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
}
}
-#ifdef CONFIG_ACPI_CPPC_LIB
-static DEFINE_MUTEX(freq_invariance_lock);
-
-void init_freq_invariance_cppc(void)
-{
- static bool secondary;
-
- mutex_lock(&freq_invariance_lock);
-
- init_freq_invariance(secondary, true);
- secondary = true;
-
- mutex_unlock(&freq_invariance_lock);
-}
-#endif
-
static void disable_freq_invariance_workfn(struct work_struct *work)
{
static_branch_disable(&arch_scale_freq_key);
@@ -2264,8 +2200,4 @@ error:
pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
schedule_work(&disable_freq_invariance_work);
}
-#else
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index bd83748e2bde..8617d1ed9d31 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -154,11 +154,6 @@ static int __init topology_init(void)
{
int i;
-#ifdef CONFIG_NUMA
- for_each_online_node(i)
- register_one_node(i);
-#endif
-
for_each_present_cpu(i)
arch_register_cpu(i);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c9d566dcf89a..2e37862e3a8c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -39,6 +39,7 @@
#include <linux/io.h>
#include <linux/hardirq.h>
#include <linux/atomic.h>
+#include <linux/ioasid.h>
#include <asm/stacktrace.h>
#include <asm/processor.h>
@@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
return true;
}
+/*
+ * The unprivileged ENQCMD instruction generates #GPs if the
+ * IA32_PASID MSR has not been populated. If possible, populate
+ * the MSR from a PASID previously allocated to the mm.
+ */
+static bool try_fixup_enqcmd_gp(void)
+{
+#ifdef CONFIG_IOMMU_SVA
+ u32 pasid;
+
+ /*
+ * MSR_IA32_PASID is managed using XSAVE. Directly
+ * writing to the MSR is only possible when fpregs
+ * are valid and the fpstate is not. This is
+ * guaranteed when handling a userspace exception
+ * in *before* interrupts are re-enabled.
+ */
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * Hardware without ENQCMD will not generate
+ * #GPs that can be fixed up here.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+ return false;
+
+ pasid = current->mm->pasid;
+
+ /*
+ * If the mm has not been allocated a
+ * PASID, the #GP can not be fixed up.
+ */
+ if (!pasid_valid(pasid))
+ return false;
+
+ /*
+ * Did this thread already have its PASID activated?
+ * If so, the #GP must be from something else.
+ */
+ if (current->pasid_activated)
+ return false;
+
+ wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
+ current->pasid_activated = 1;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
unsigned long gp_addr;
int ret;
+ if (user_mode(regs) && try_fixup_enqcmd_gp())
+ return;
+
cond_local_irq_enable(regs);
if (static_cpu_has(X86_FEATURE_UMIP)) {
@@ -659,6 +714,7 @@ static bool do_int3(struct pt_regs *regs)
return res == NOTIFY_STOP;
}
+NOKPROBE_SYMBOL(do_int3);
static void do_int3_user(struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7d20c1d34a3c..e84ee5cdbd8c 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
+static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_tlb_flush_required_noop(bool enc) { return false; }
+static bool enc_cache_flush_required_noop(void) { return false; }
+
struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu_early,
.calibrate_tsc = native_calibrate_tsc,
@@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = {
.is_untracked_pat_range = is_ISA_range,
.nmi_init = default_nmi_init,
.get_nmi_reason = default_get_nmi_reason,
- .save_sched_clock_state = tsc_save_sched_clock_state,
- .restore_sched_clock_state = tsc_restore_sched_clock_state,
+ .save_sched_clock_state = tsc_save_sched_clock_state,
+ .restore_sched_clock_state = tsc_restore_sched_clock_state,
.hyper.pin_vcpu = x86_op_int_noop,
+
+ .guest = {
+ .enc_status_change_prepare = enc_status_change_prepare_noop,
+ .enc_status_change_finish = enc_status_change_finish_noop,
+ .enc_tlb_flush_required = enc_tlb_flush_required_noop,
+ .enc_cache_flush_required = enc_cache_flush_required_noop,
+ },
};
EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3902c28fb6cb..8bf541b24b4b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -133,6 +133,7 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
orig = &vcpu->arch.cpuid_entries[i];
if (e2[i].function != orig->function ||
e2[i].index != orig->index ||
+ e2[i].flags != orig->flags ||
e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
return -EINVAL;
@@ -196,10 +197,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.pv_cpuid.features = best->eax;
}
+/*
+ * Calculate guest's supported XCR0 taking into account guest CPUID data and
+ * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
+ */
+static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ if (!best)
+ return 0;
+
+ return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+}
+
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
best = cpuid_entry2_find(entries, nent, 1, 0);
if (best) {
@@ -238,6 +255,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
+
+ /*
+ * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
+ * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
+ * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
+ * at the time of EENTER, thus adjust the allowed XFRM by the guest's
+ * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
+ * '1' even on CPUs that don't support XSAVE.
+ */
+ best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
+ if (best) {
+ best->ecx &= guest_supported_xcr0 & 0xffffffff;
+ best->edx &= guest_supported_xcr0 >> 32;
+ best->ecx |= XFEATURE_MASK_FPSSE;
+ }
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
@@ -250,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
@@ -261,27 +294,10 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
- if (!best)
- vcpu->arch.guest_supported_xcr0 = 0;
- else
- vcpu->arch.guest_supported_xcr0 =
- (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+ guest_supported_xcr0 =
+ cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
- * at the time of EENTER, thus adjust the allowed XFRM by the guest's
- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
- * '1' even on CPUs that don't support XSAVE.
- */
- best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
- if (best) {
- best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
- best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
- best->ecx |= XFEATURE_MASK_FPSSE;
- }
+ vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
kvm_update_pv_runtime(vcpu);
@@ -346,8 +362,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
- if (vcpu->arch.last_vmentry_cpu != -1)
- return kvm_cpuid_check_equal(vcpu, e2, nent);
+ if (vcpu->arch.last_vmentry_cpu != -1) {
+ r = kvm_cpuid_check_equal(vcpu, e2, nent);
+ if (r)
+ return r;
+
+ kvfree(e2);
+ return 0;
+ }
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
@@ -535,12 +557,13 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_mask(CPUID_7_0_EBX,
- F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
- );
+ F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
+ F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
+ F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
+ F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
+ F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
+ F(AVX512VL));
kvm_cpu_cap_mask(CPUID_7_ECX,
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
@@ -856,7 +879,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
eax.split.bit_width = cap.bit_width_gp;
eax.split.mask_length = cap.events_mask_len;
- edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
+ edx.split.num_counters_fixed =
+ min(cap.num_counters_fixed, KVM_PMC_MAX_FIXED);
edx.split.bit_width_fixed = cap.bit_width_fixed;
if (cap.version)
edx.split.anythread_deprecated = 1;
@@ -887,13 +911,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case 0xd: {
- u64 guest_perm = xstate_get_guest_group_perm();
+ u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
+ u64 permitted_xss = supported_xss;
- entry->eax &= supported_xcr0 & guest_perm;
- entry->ebx = xstate_required_size(supported_xcr0, false);
+ entry->eax &= permitted_xcr0;
+ entry->ebx = xstate_required_size(permitted_xcr0, false);
entry->ecx = entry->ebx;
- entry->edx &= (supported_xcr0 & guest_perm) >> 32;
- if (!supported_xcr0)
+ entry->edx &= permitted_xcr0 >> 32;
+ if (!permitted_xcr0)
break;
entry = do_host_cpuid(array, function, 1);
@@ -902,20 +927,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_D_1_EAX);
if (entry->eax & (F(XSAVES)|F(XSAVEC)))
- entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
+ entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
true);
else {
- WARN_ON_ONCE(supported_xss != 0);
+ WARN_ON_ONCE(permitted_xss != 0);
entry->ebx = 0;
}
- entry->ecx &= supported_xss;
- entry->edx &= supported_xss >> 32;
+ entry->ecx &= permitted_xss;
+ entry->edx &= permitted_xss >> 32;
for (i = 2; i < 64; ++i) {
bool s_state;
- if (supported_xcr0 & BIT_ULL(i))
+ if (permitted_xcr0 & BIT_ULL(i))
s_state = false;
- else if (supported_xss & BIT_ULL(i))
+ else if (permitted_xss & BIT_ULL(i))
s_state = true;
else
continue;
@@ -929,7 +954,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* invalid sub-leafs. Only valid sub-leafs should
* reach this point, and they should have a non-zero
* save state size. Furthermore, check whether the
- * processor agrees with supported_xcr0/supported_xss
+ * processor agrees with permitted_xcr0/permitted_xss
* on whether this is an XCR0- or IA32_XSS-managed area.
*/
if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5719d8cfdbd9..50ca1db7247c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -429,8 +429,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
FOP_END
/* Special case for SETcc - 1 instruction per cc */
+
+/*
+ * Depending on .config the SETcc functions look like:
+ *
+ * SETcc %al [3 bytes]
+ * RET [1 byte]
+ * INT3 [1 byte; CONFIG_SLS]
+ *
+ * Which gives possible sizes 4 or 5. When rounded up to the
+ * next power-of-two alignment they become 4 or 8.
+ */
+#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS))
+#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS))
+static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+
#define FOP_SETCC(op) \
- ".align 4 \n\t" \
+ ".align " __stringify(SETCC_ALIGN) " \n\t" \
".type " #op ", @function \n\t" \
#op ": \n\t" \
#op " %al \n\t" \
@@ -665,7 +680,7 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
static inline bool emul_is_noncanonical_address(u64 la,
struct x86_emulate_ctxt *ctxt)
{
- return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
+ return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
}
/*
@@ -715,7 +730,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
case X86EMUL_MODE_PROT64:
*linear = la;
va_bits = ctxt_virt_addr_bits(ctxt);
- if (get_canonical(la, va_bits) != la)
+ if (!__is_canonical_address(la, va_bits))
goto bad;
*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
@@ -1047,7 +1062,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
u8 rc;
- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+ void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
asm("push %[flags]; popf; " CALL_NOSPEC
@@ -5380,8 +5395,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
- memset(&ctxt->rip_relative, 0,
- (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
+ /* Clear fields that are set conditionally but read without a guard. */
+ ctxt->rip_relative = false;
+ ctxt->rex_prefix = 0;
+ ctxt->lock_prefix = 0;
+ ctxt->rep_prefix = 0;
+ ctxt->regs_valid = 0;
+ ctxt->regs_dirty = 0;
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 39eded2426ff..840ddb4a9302 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -336,11 +336,7 @@ struct x86_emulate_ctxt {
fastop_t fop;
};
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
- /*
- * The following six fields are cleared together,
- * the rest are initialized unconditionally in x86_decode_insn
- * or elsewhere
- */
+
bool rip_relative;
u8 rex_prefix;
u8 lock_prefix;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index baca9fa37a91..9322e6340a74 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
- kvm_lapic_set_irr(vector, apic);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_vcpu_kick(vcpu);
- } else {
- trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector);
- }
+ static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
+ trig_mode, vector);
break;
case APIC_DM_REMRD:
@@ -2312,7 +2306,12 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
apic->irr_pending = true;
apic->isr_count = 1;
} else {
- apic->irr_pending = (apic_search_irr(apic) != -1);
+ /*
+ * Don't clear irr_pending, searching the IRR can race with
+ * updates from the CPU as APICv is still active from hardware's
+ * perspective. The flag will be cleared as appropriate when
+ * KVM injects the interrupt.
+ */
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
}
@@ -2629,7 +2628,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 593093b52395..5628d0ba637e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3565,7 +3565,7 @@ set_root_pgd:
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- return 0;
+ return r;
}
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
@@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
+static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
+{
+ /* make sure the token value is not 0 */
+ u32 id = vcpu->arch.apf.id;
+
+ if (id << 12 == 0)
+ vcpu->arch.apf.id = 1;
+
+ return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+}
+
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+ arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f614f95acc6b..b1a02993782b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -95,7 +95,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
}
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
- unsigned config, bool exclude_user,
+ u64 config, bool exclude_user,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
@@ -181,7 +181,8 @@ static int cmp_u64(const void *a, const void *b)
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
- unsigned config, type = PERF_TYPE_RAW;
+ u64 config;
+ u32 type = PERF_TYPE_RAW;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
bool allow_event = true;
@@ -220,7 +221,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
}
if (type == PERF_TYPE_RAW)
- config = eventsel & X86_RAW_EVENT_MASK;
+ config = eventsel & AMD64_RAW_EVENT_MASK;
if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
return;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7a7b8d5b775e..9e66fba1d6a3 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -15,8 +15,6 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
-#define MAX_FIXED_COUNTERS 3
-
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 90364d02f22a..fb3e20791338 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,20 +27,6 @@
#include "irq.h"
#include "svm.h"
-#define SVM_AVIC_DOORBELL 0xc001011b
-
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-
-/*
- * 0xff is broadcast, so the max index allowed for physical APIC ID
- * table is 0xfe. APIC IDs above 0xff are reserved.
- */
-#define AVIC_MAX_PHYSICAL_ID_COUNT 255
-
-#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
-#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
-#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
-
/* AVIC GATAG is encoded using VM and VCPU IDs */
#define AVIC_VCPU_ID_BITS 8
#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
@@ -73,12 +59,6 @@ struct amd_svm_iommu_ir {
void *data; /* Storing pointer to struct amd_ir_data */
};
-enum avic_ipi_failure_cause {
- AVIC_IPI_FAILURE_INVALID_INT_TYPE,
- AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
- AVIC_IPI_FAILURE_INVALID_TARGET,
- AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
-};
/* Note:
* This function is called from IOMMU driver to notify
@@ -289,6 +269,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+void avic_ring_doorbell(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point,
+ * which could result in signalling the wrong/previous pCPU. But if
+ * that happens the vCPU is guaranteed to do a VMRUN (after being
+ * migrated) and thus will process pending interrupts, i.e. a doorbell
+ * is not needed (and the spurious one is harmless).
+ */
+ int cpu = READ_ONCE(vcpu->cpu);
+
+ if (cpu != get_cpu())
+ wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
+ put_cpu();
+}
+
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh)
{
@@ -304,8 +300,13 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
kvm_for_each_vcpu(i, vcpu, kvm) {
if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK))
- kvm_vcpu_wake_up(vcpu);
+ icrl & APIC_DEST_MASK)) {
+ vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ }
}
}
@@ -345,8 +346,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
break;
case AVIC_IPI_FAILURE_INVALID_TARGET:
- WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
- index, vcpu->vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
@@ -669,52 +668,6 @@ void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
-{
- if (!vcpu->arch.apicv_active)
- return -1;
-
- kvm_lapic_set_irr(vec, vcpu->arch.apic);
-
- /*
- * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
- * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
- * the read of guest_mode, which guarantees that either VMRUN will see
- * and process the new vIRR entry, or that the below code will signal
- * the doorbell if the vCPU is already running in the guest.
- */
- smp_mb__after_atomic();
-
- /*
- * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
- * is in the guest. If the vCPU is not in the guest, hardware will
- * automatically process AVIC interrupts at VMRUN.
- */
- if (vcpu->mode == IN_GUEST_MODE) {
- int cpu = READ_ONCE(vcpu->cpu);
-
- /*
- * Note, the vCPU could get migrated to a different pCPU at any
- * point, which could result in signalling the wrong/previous
- * pCPU. But if that happens the vCPU is guaranteed to do a
- * VMRUN (after being migrated) and thus will process pending
- * interrupts, i.e. a doorbell is not needed (and the spurious
- * one is harmless).
- */
- if (cpu != get_cpu())
- wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
- put_cpu();
- } else {
- /*
- * Wake the vCPU if it was blocking. KVM will then detect the
- * pending IRQ when checking if the vCPU has a wake event.
- */
- kvm_vcpu_wake_up(vcpu);
- }
-
- return 0;
-}
-
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return false;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cf206855ebf0..39d280e7e80e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm)
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
-void svm_leave_nested(struct vcpu_svm *svm)
+void svm_leave_nested(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
@@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
return 0;
}
@@ -1457,18 +1457,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
!__nested_vmcb_check_save(vcpu, &save_cached))
goto out_free;
- /*
- * While the nested guest CR3 is already checked and set by
- * KVM_SET_SREGS, it was set when nested state was yet loaded,
- * thus MMU might not be initialized correctly.
- * Set it again to fix this.
- */
-
- ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
- nested_npt_enabled(svm), false);
- if (WARN_ON_ONCE(ret))
- goto out_free;
-
/*
* All checks done, we can enter guest mode. Userspace provides
@@ -1478,7 +1466,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
if (is_guest_mode(vcpu))
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
else
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
@@ -1494,6 +1482,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
+
+ /*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
+ * Set it again to fix this.
+ */
+
+ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm), false);
+ if (WARN_ON_ONCE(ret))
+ goto out_free;
+
+
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
@@ -1532,6 +1534,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
}
struct kvm_x86_nested_ops svm_nested_ops = {
+ .leave_nested = svm_leave_nested,
.check_events = svm_check_nested_events,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6a22798eaaee..17b53457d866 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void)
if (!sev_enabled || !npt_enabled)
goto out;
- /* Does the CPU support SEV? */
- if (!boot_cpu_has(X86_FEATURE_SEV))
+ /*
+ * SEV must obviously be supported in hardware. Sanity check that the
+ * CPU supports decode assists, which is mandatory for SEV guests to
+ * support instruction emulation.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SEV) ||
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
goto out;
/* Retrieve SEV CPUID information */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2c99b18d76c0..fd3a00c892c7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
if (!(efer & EFER_SVME)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, true);
/* #GP intercept is still needed for vmware backdoor */
if (!enable_vmware_backdoor)
@@ -312,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return ret;
}
- if (svm_gp_erratum_intercept)
+ /*
+ * Never intercept #GP for SEV guests, KVM can't
+ * decrypt guest memory to workaround the erratum.
+ */
+ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
}
}
@@ -1010,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
- * as VMware does.
+ * as VMware does. Don't intercept #GP for SEV guests as KVM can't
+ * decrypt guest memory to decode the faulting instruction.
*/
- if (enable_vmware_backdoor)
+ if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1580,6 +1585,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 hcr0 = cr0;
+ bool old_paging = is_paging(vcpu);
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
@@ -1596,8 +1602,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
#endif
vcpu->arch.cr0 = cr0;
- if (!npt_enabled)
+ if (!npt_enabled) {
hcr0 |= X86_CR0_PG | X86_CR0_WP;
+ if (old_paging != is_paging(vcpu))
+ svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
+ }
/*
* re-enable caching here because the QEMU bios
@@ -1641,8 +1650,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
svm_flush_tlb(vcpu);
vcpu->arch.cr4 = cr4;
- if (!npt_enabled)
+ if (!npt_enabled) {
cr4 |= X86_CR4_PAE;
+
+ if (!is_paging(vcpu))
+ cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ }
cr4 |= host_cr4_mce;
to_svm(vcpu)->vmcb->save.cr4 = cr4;
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -2091,10 +2104,6 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code)
goto reinject;
- /* All SVM instructions expect page aligned RAX */
- if (svm->vmcb->save.rax & ~PAGE_MASK)
- goto reinject;
-
/* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
@@ -2112,8 +2121,13 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu))
return kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
- } else
+ } else {
+ /* All SVM instructions expect page aligned RAX */
+ if (svm->vmcb->save.rax & ~PAGE_MASK)
+ goto reinject;
+
return emulate_svm_instr(vcpu, opcode);
+ }
reinject:
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -2679,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
- if (!msr->host_initiated && !svm->tsc_scaling_enabled)
- return 1;
+
+ if (!svm->tsc_scaling_enabled) {
+
+ if (!msr->host_initiated)
+ return 1;
+ /*
+ * In case TSC scaling is not enabled, always
+ * leave this MSR at the default value.
+ *
+ * Due to bug in qemu 6.2.0, it would try to set
+ * this msr to 0 if tsc scaling is not enabled.
+ * Ignore this value as well.
+ */
+ if (data != 0 && data != svm->tsc_ratio_msr)
+ return 1;
+ break;
+ }
if (data & TSC_RATIO_RSVD)
return 1;
@@ -3285,6 +3314,55 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vector)
+{
+ /*
+ * vcpu->arch.apicv_active must be read after vcpu->mode.
+ * Pairs with smp_store_release in vcpu_enter_guest.
+ */
+ bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
+
+ if (!READ_ONCE(vcpu->arch.apicv_active)) {
+ /* Process the interrupt via inject_pending_event */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
+ }
+
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector);
+ if (in_guest_mode) {
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ. If
+ * the vCPU exits the guest before the doorbell chimes, hardware
+ * will automatically process AVIC interrupts at the next VMRUN.
+ */
+ avic_ring_doorbell(vcpu);
+ } else {
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
+ kvm_vcpu_wake_up(vcpu);
+ }
+}
+
+static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ kvm_lapic_set_irr(vector, apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode. This guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that svm_complete_interrupt_delivery
+ * will signal the doorbell if the CPU has already entered the guest.
+ */
+ smp_mb__after_atomic();
+ svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector);
+}
+
static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3332,11 +3410,13 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_nmi_blocked(vcpu))
+ return 0;
+
/* An NMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return -EBUSY;
-
- return !svm_nmi_blocked(vcpu);
+ return 1;
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -3388,9 +3468,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
+
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_interrupt_blocked(vcpu))
+ return 0;
+
/*
* An IRQ must not be injected into L2 if it's supposed to VM-Exit,
* e.g. if the IRQ arrived asynchronously after checking nested events.
@@ -3398,7 +3482,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
return -EBUSY;
- return !svm_interrupt_blocked(vcpu);
+ return 1;
}
static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -3609,7 +3693,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long vmcb_pa = svm->current_vmcb->pa;
- kvm_guest_enter_irqoff();
+ guest_state_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3629,7 +3713,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
vmload(__sme_page_pa(sd->save_area));
}
- kvm_guest_exit_irqoff();
+ guest_state_exit_irqoff();
}
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -4129,11 +4213,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_smi_blocked(vcpu))
+ return 0;
+
/* An SMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
return -EBUSY;
- return !svm_smi_blocked(vcpu);
+ return 1;
}
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -4227,11 +4314,18 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
* Enter the nested guest now
*/
+ vmcb_mark_all_dirty(svm->vmcb01.ptr);
+
vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ if (ret)
+ goto unmap_save;
+
+ svm->nested.nested_run_pending = 1;
+
unmap_save:
kvm_vcpu_unmap(vcpu, &map_save, true);
unmap_map:
@@ -4252,79 +4346,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
}
}
-static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
bool smep, smap, is_user;
unsigned long cr4;
+ u64 error_code;
+
+ /* Emulation is always possible when KVM has access to all guest state. */
+ if (!sev_guest(vcpu->kvm))
+ return true;
+
+ /* #UD and #GP should never be intercepted for SEV guests. */
+ WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
+ EMULTYPE_TRAP_UD_FORCED |
+ EMULTYPE_VMWARE_GP));
/*
- * When the guest is an SEV-ES guest, emulation is not possible.
+ * Emulation is impossible for SEV-ES guests as KVM doesn't have access
+ * to guest register state.
*/
if (sev_es_guest(vcpu->kvm))
return false;
/*
+ * Emulation is possible if the instruction is already decoded, e.g.
+ * when completing I/O after returning from userspace.
+ */
+ if (emul_type & EMULTYPE_NO_DECODE)
+ return true;
+
+ /*
+ * Emulation is possible for SEV guests if and only if a prefilled
+ * buffer containing the bytes of the intercepted instruction is
+ * available. SEV guest memory is encrypted with a guest specific key
+ * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
+ * decode garbage.
+ *
+ * Inject #UD if KVM reached this point without an instruction buffer.
+ * In practice, this path should never be hit by a well-behaved guest,
+ * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
+ * is still theoretically reachable, e.g. via unaccelerated fault-like
+ * AVIC access, and needs to be handled by KVM to avoid putting the
+ * guest into an infinite loop. Injecting #UD is somewhat arbitrary,
+ * but its the least awful option given lack of insight into the guest.
+ */
+ if (unlikely(!insn)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+ }
+
+ /*
+ * Emulate for SEV guests if the insn buffer is not empty. The buffer
+ * will be empty if the DecodeAssist microcode cannot fetch bytes for
+ * the faulting instruction because the code fetch itself faulted, e.g.
+ * the guest attempted to fetch from emulated MMIO or a guest page
+ * table used to translate CS:RIP resides in emulated MMIO.
+ */
+ if (likely(insn_len))
+ return true;
+
+ /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
- * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
- * possible that CPU microcode implementing DecodeAssist will fail
- * to read bytes of instruction which caused #NPF. In this case,
- * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
- * return 0 instead of the correct guest instruction bytes.
- *
- * This happens because CPU microcode reading instruction bytes
- * uses a special opcode which attempts to read data using CPL=0
- * privileges. The microcode reads CS:RIP and if it hits a SMAP
- * fault, it gives up and returns no instruction bytes.
+ * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail to
+ * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly
+ * be '0'. This happens because microcode reads CS:RIP using a _data_
+ * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode
+ * gives up and does not fill the instruction bytes buffer.
*
- * Detection:
- * We reach here in case CPU supports DecodeAssist, raised #NPF and
- * returned 0 in GuestIntrBytes field of the VMCB.
- * First, errata can only be triggered in case vCPU CR4.SMAP=1.
- * Second, if vCPU CR4.SMEP=1, errata could only be triggered
- * in case vCPU CPL==3 (Because otherwise guest would have triggered
- * a SMEP fault instead of #NPF).
- * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
- * As most guests enable SMAP if they have also enabled SMEP, use above
- * logic in order to attempt minimize false-positive of detecting errata
- * while still preserving all cases semantic correctness.
+ * As above, KVM reaches this point iff the VM is an SEV guest, the CPU
+ * supports DecodeAssist, a #NPF was raised, KVM's page fault handler
+ * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the
+ * GuestIntrBytes field of the VMCB.
*
- * Workaround:
- * To determine what instruction the guest was executing, the hypervisor
- * will have to decode the instruction at the instruction pointer.
+ * This does _not_ mean that the erratum has been encountered, as the
+ * DecodeAssist will also fail if the load for CS:RIP hits a legitimate
+ * #PF, e.g. if the guest attempt to execute from emulated MMIO and
+ * encountered a reserved/not-present #PF.
*
- * In non SEV guest, hypervisor will be able to read the guest
- * memory to decode the instruction pointer when insn_len is zero
- * so we return true to indicate that decoding is possible.
+ * To hit the erratum, the following conditions must be true:
+ * 1. CR4.SMAP=1 (obviously).
+ * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot
+ * have been hit as the guest would have encountered a SMEP
+ * violation #PF, not a #NPF.
+ * 3. The #NPF is not due to a code fetch, in which case failure to
+ * retrieve the instruction bytes is legitimate (see abvoe).
*
- * But in the SEV guest, the guest memory is encrypted with the
- * guest specific key and hypervisor will not be able to decode the
- * instruction pointer so we will not able to workaround it. Lets
- * print the error and request to kill the guest.
- */
- if (likely(!insn || insn_len))
- return true;
-
- /*
- * If RIP is invalid, go ahead with emulation which will cause an
- * internal error exit.
+ * In addition, don't apply the erratum workaround if the #NPF occurred
+ * while translating guest page tables (see below).
*/
- if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
- return true;
+ error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
+ if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
+ goto resume_guest;
cr4 = kvm_read_cr4(vcpu);
smep = cr4 & X86_CR4_SMEP;
smap = cr4 & X86_CR4_SMAP;
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
- if (!sev_guest(vcpu->kvm))
- return true;
-
pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /*
+ * If the fault occurred in userspace, arbitrarily inject #GP
+ * to avoid killing the guest and to hopefully avoid confusing
+ * the guest kernel too much, e.g. injecting #PF would not be
+ * coherent with respect to the guest's page tables. Request
+ * triple fault if the fault occurred in the kernel as there's
+ * no fault that KVM can inject without confusing the guest.
+ * In practice, the triple fault is moot as no sane SEV kernel
+ * will execute from user memory while also running with SMAP=1.
+ */
+ if (is_user)
+ kvm_inject_gp(vcpu, 0);
+ else
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
+resume_guest:
+ /*
+ * If the erratum was not hit, simply resume the guest and let it fault
+ * again. While awful, e.g. the vCPU may get stuck in an infinite loop
+ * if the fault is at CPL=0, it's the lesser of all evils. Exiting to
+ * userspace will kill the guest, and letting the emulator read garbage
+ * will yield random behavior and potentially corrupt the guest.
+ *
+ * Simply resuming the guest is technically not a violation of the SEV
+ * architecture. AMD's APM states that all code fetches and page table
+ * accesses for SEV guest are encrypted, regardless of the C-Bit. The
+ * APM also states that encrypted accesses to MMIO are "ignored", but
+ * doesn't explicitly define "ignored", i.e. doing nothing and letting
+ * the guest spin is technically "ignoring" the access.
+ */
return false;
}
@@ -4478,7 +4633,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.pmu_ops = &amd_pmu_ops,
.nested_ops = &svm_nested_ops,
- .deliver_posted_interrupt = svm_deliver_avic_intr,
+ .deliver_interrupt = svm_deliver_interrupt,
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
@@ -4555,6 +4710,7 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
+ kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 47ef8f4a9358..fa98d6844728 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -304,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
-static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
-{
- return (vmcb->control.clean & (1 << bit));
-}
-
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
@@ -494,6 +489,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
int read, int write);
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vec);
/* nested.c */
@@ -525,7 +522,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
-void svm_leave_nested(struct vcpu_svm *svm);
+void svm_leave_nested(struct kvm_vcpu *vcpu);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
@@ -561,17 +558,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
-#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
-#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
-#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
-
-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
-#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
-#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
-#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
-
-#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -588,12 +574,12 @@ bool svm_check_apicv_inhibit_reasons(ulong bit);
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_ring_doorbell(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index c53b8bf8d013..489ca56212c6 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
if (npt_enabled &&
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)
+ hve->hv_enlightenments_control.msr_bitmap = 1;
}
static inline void svm_hv_hardware_setup(void)
@@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
struct hv_enlightenments *hve =
(struct hv_enlightenments *)vmcb->control.reserved_sw;
- /*
- * vmcb can be NULL if called during early vcpu init.
- * And its okay not to mark vmcb dirty during vcpu init
- * as we mark it dirty unconditionally towards end of vcpu
- * init phase.
- */
- if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
- hve->hv_enlightenments_control.msr_bitmap)
+ if (hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 959b59d13b5a..3f430e218375 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -54,7 +54,6 @@ struct nested_vmx_msrs {
struct vmcs_config {
int size;
- int order;
u32 basic_cap;
u32 revision_id;
u32 pin_based_exec_ctrl;
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index ba6f99f584ac..87e3dc10edf4 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -12,8 +12,6 @@
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
-#if IS_ENABLED(CONFIG_HYPERV)
-
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
{EVMCS1_OFFSET(name), clean_field}
@@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
};
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
+#if IS_ENABLED(CONFIG_HYPERV)
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
@@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
case MSR_IA32_VMX_PROCBASED_CTLS2:
ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
break;
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
break;
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 16731d2cf231..8d70f9aea94b 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
SECONDARY_EXEC_SHADOW_VMCS | \
SECONDARY_EXEC_TSC_SCALING | \
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
-#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
+ (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
-#if IS_ENABLED(CONFIG_HYPERV)
-
struct evmcs_field {
u16 offset;
u16 clean_field;
@@ -73,26 +73,56 @@ struct evmcs_field {
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
extern const unsigned int nr_evmcs_1_fields;
-static __always_inline int get_evmcs_offset(unsigned long field,
- u16 *clean_field)
+static __always_inline int evmcs_field_offset(unsigned long field,
+ u16 *clean_field)
{
unsigned int index = ROL16(field, 6);
const struct evmcs_field *evmcs_field;
- if (unlikely(index >= nr_evmcs_1_fields)) {
- WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
- field);
+ if (unlikely(index >= nr_evmcs_1_fields))
return -ENOENT;
- }
evmcs_field = &vmcs_field_to_evmcs_1[index];
+ /*
+ * Use offset=0 to detect holes in eVMCS. This offset belongs to
+ * 'revision_id' but this field has no encoding and is supposed to
+ * be accessed directly.
+ */
+ if (unlikely(!evmcs_field->offset))
+ return -ENOENT;
+
if (clean_field)
*clean_field = evmcs_field->clean_field;
return evmcs_field->offset;
}
+static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
+ unsigned long field, u16 offset)
+{
+ /*
+ * vmcs12_read_any() doesn't care whether the supplied structure
+ * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
+ * the exact offset of the required field, use it for convenience
+ * here.
+ */
+ return vmcs12_read_any((void *)evmcs, field, offset);
+}
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ int offset = evmcs_field_offset(field, clean_field);
+
+ WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n",
+ field);
+
+ return offset;
+}
+
static __always_inline void evmcs_write64(unsigned long field, u64 value)
{
u16 clean_field;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f235f77cbc03..dc822a1d403d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,6 +7,7 @@
#include <asm/mmu_context.h>
#include "cpuid.h"
+#include "evmcs.h"
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
@@ -245,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;
- vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
- src->fs_base, src->gs_base);
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
@@ -3055,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
bool vm_fail;
if (!nested_early_check)
@@ -3078,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/
vmcs_writel(GUEST_RFLAGS, 0);
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -4851,18 +4857,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
/*
- * We should allocate a shadow vmcs for vmcs01 only when L1
- * executes VMXON and free it when L1 executes VMXOFF.
- * As it is invalid to execute VMXON twice, we shouldn't reach
- * here when vmcs01 already have an allocated shadow vmcs.
+ * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it
+ * when L1 executes VMXOFF or the vCPU is forced out of nested
+ * operation. VMXON faults if the CPU is already post-VMXON, so it
+ * should be impossible to already have an allocated shadow VMCS. KVM
+ * doesn't support virtualization of VMCS shadowing, so vmcs01 should
+ * always be the loaded VMCS.
*/
- WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+ if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs))
+ return loaded_vmcs->shadow_vmcs;
+
+ loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+ if (loaded_vmcs->shadow_vmcs)
+ vmcs_clear(loaded_vmcs->shadow_vmcs);
- if (!loaded_vmcs->shadow_vmcs) {
- loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
- if (loaded_vmcs->shadow_vmcs)
- vmcs_clear(loaded_vmcs->shadow_vmcs);
- }
return loaded_vmcs->shadow_vmcs;
}
@@ -5099,27 +5107,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- /*
- * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
- * any VMREAD sets the ALU flags for VMfailInvalid.
- */
- if (vmx->nested.current_vmptr == INVALID_GPA ||
- (is_guest_mode(vcpu) &&
- get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
- return nested_vmx_failInvalid(vcpu);
-
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
- if (offset < 0)
- return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ /*
+ * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
+ * any VMREAD sets the ALU flags for VMfailInvalid.
+ */
+ if (vmx->nested.current_vmptr == INVALID_GPA ||
+ (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
+ return nested_vmx_failInvalid(vcpu);
- if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
- copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ offset = get_vmcs12_field_offset(field);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- /* Read the field, zero-extended to a u64 value */
- value = vmcs12_read_any(vmcs12, field, offset);
+ /* Read the field, zero-extended to a u64 value */
+ value = vmcs12_read_any(vmcs12, field, offset);
+ } else {
+ /*
+ * Hyper-V TLFS (as of 6.0b) explicitly states, that while an
+ * enlightened VMCS is active VMREAD/VMWRITE instructions are
+ * unsupported. Unfortunately, certain versions of Windows 11
+ * don't comply with this requirement which is not enforced in
+ * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a
+ * workaround, as misbehaving guests will panic on VM-Fail.
+ * Note, enlightened VMCS is incompatible with shadow VMCS so
+ * all VMREADs from L2 should go to L1.
+ */
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+ return nested_vmx_failInvalid(vcpu);
+
+ offset = evmcs_field_offset(field, NULL);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ /* Read the field, zero-extended to a u64 value */
+ value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
+ }
/*
* Now copy part of this value to register or memory, as requested.
@@ -5214,7 +5244,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
+ offset = get_vmcs12_field_offset(field);
if (offset < 0)
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
@@ -6462,7 +6492,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
max_idx = 0;
for (i = 0; i < nr_vmcs12_fields; i++) {
/* The vmcs12 table is very, very sparsely populated. */
- if (!vmcs_field_to_offset_table[i])
+ if (!vmcs12_field_offsets[i])
continue;
idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
@@ -6771,6 +6801,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
}
struct kvm_x86_nested_ops vmx_nested_ops = {
+ .leave_nested = vmx_leave_nested,
.check_events = vmx_check_nested_events,
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.triple_fault = nested_vmx_triple_fault,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 466d18fc0c5d..9b26596099a1 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -565,7 +565,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].current_config = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
@@ -591,7 +591,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmc->counter = pmc->eventsel = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
pmc = &pmu->fixed_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index cab6ba7a5005..2251b60920f8 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -8,7 +8,7 @@
FIELD(number, name), \
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-const unsigned short vmcs_field_to_offset_table[] = {
+const unsigned short vmcs12_field_offsets[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(POSTED_INTR_NV, posted_intr_nv),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = {
FIELD(HOST_RSP, host_rsp),
FIELD(HOST_RIP, host_rip),
};
-const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table);
+const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 2a45f026ee11..746129ddd5ae 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(guest_pml_index, 996);
}
-extern const unsigned short vmcs_field_to_offset_table[];
+extern const unsigned short vmcs12_field_offsets[];
extern const unsigned int nr_vmcs12_fields;
-static inline short vmcs_field_to_offset(unsigned long field)
+static inline short get_vmcs12_field_offset(unsigned long field)
{
unsigned short offset;
unsigned int index;
@@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
return -ENOENT;
index = array_index_nospec(index, nr_vmcs12_fields);
- offset = vmcs_field_to_offset_table[index];
+ offset = vmcs12_field_offsets[index];
if (offset == 0)
return -ENOENT;
return offset;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4ac676066d60..b730d799c26e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1080,14 +1080,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base)
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
{
- if (unlikely(cr3 != host->cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- host->cr3 = cr3;
- }
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
@@ -1182,9 +1177,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(),
- fs_sel, gs_sel, fs_base, gs_base);
-
+ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
vmx->guest_state_loaded = true;
}
@@ -1487,11 +1480,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
/*
* Emulation of instructions in SGX enclaves is impossible as RIP does
- * not point tthe failing instruction, and even if it did, the code
+ * not point at the failing instruction, and even if it did, the code
* stream is inaccessible. Inject #UD instead of exiting to userspace
* so that guest userspace can't DoS the guest simply by triggering
* emulation (enclaves are CPL3 only).
@@ -2603,7 +2597,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
@@ -2628,7 +2621,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
struct page *pages;
struct vmcs *vmcs;
- pages = __alloc_pages_node(node, flags, vmcs_config.order);
+ pages = __alloc_pages_node(node, flags, 0);
if (!pages)
return NULL;
vmcs = page_address(pages);
@@ -2647,7 +2640,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
void free_vmcs(struct vmcs *vmcs)
{
- free_pages((unsigned long)vmcs, vmcs_config.order);
+ free_page((unsigned long)vmcs);
}
/*
@@ -4041,6 +4034,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
return 0;
}
+static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+
+ if (vmx_deliver_posted_interrupt(vcpu, vector)) {
+ kvm_lapic_set_irr(vector, apic);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
+ }
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -4094,10 +4102,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
/*
- * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
- * HOST_IA32_SYSENTER_ESP.
+ * SYSENTER is used for 32-bit system calls on either 32-bit or
+ * 64-bit kernels. It is always zero If neither is allowed, otherwise
+ * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may
+ * have already done so!).
*/
- vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+ if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
@@ -4901,8 +4913,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
dr6 = vmx_get_exit_qual(vcpu);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ /*
+ * If the #DB was due to ICEBP, a.k.a. INT1, skip the
+ * instruction. ICEBP generates a trap-like #DB, but
+ * despite its interception control being tied to #DB,
+ * is an instruction intercept, i.e. the VM-Exit occurs
+ * on the ICEBP itself. Note, skipping ICEBP also
+ * clears STI and MOVSS blocking.
+ *
+ * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
+ * if single-step is enabled in RFLAGS and STI or MOVSS
+ * blocking is active, as the CPU doesn't set the bit
+ * on VM-Exit due to #DB interception. VM-Entry has a
+ * consistency check that a single-step #DB is pending
+ * in this scenario as the previous instruction cannot
+ * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
+ * don't modify RFLAGS), therefore the one instruction
+ * delay when activating single-step breakpoints must
+ * have already expired. Note, the CPU sets/clears BS
+ * as appropriate for all other VM-Exits types.
+ */
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
+ else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
@@ -5397,7 +5434,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
gpa_t gpa;
- if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
+ if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
return 1;
/*
@@ -6725,7 +6762,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx)
{
- kvm_guest_enter_irqoff();
+ guest_state_enter_irqoff();
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6741,13 +6778,13 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
- kvm_guest_exit_irqoff();
+ guest_state_exit_irqoff();
}
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -6790,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
+ /*
+ * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
+ * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+ * it switches back to the current->mm, which can occur in KVM context
+ * when switching to a temporary mm to patch kernel code, e.g. if KVM
+ * toggles a static key while handling a VM-Exit.
+ */
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -7615,6 +7665,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (ret)
return ret;
+ vmx->nested.nested_run_pending = 1;
vmx->nested.smm.guest_mode = false;
}
return 0;
@@ -7739,7 +7790,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .deliver_interrupt = vmx_deliver_interrupt,
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f2c82e7f38f..9c6bfcd84008 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -374,9 +374,8 @@ int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9e43d756312f..6db3a506b402 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -90,6 +90,8 @@
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
+#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
+
#define emul_to_vcpu(ctxt) \
((struct kvm_vcpu *)(ctxt)->vcpu)
@@ -982,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
+}
+
+#ifdef CONFIG_X86_64
+static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
+{
+ return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
+}
+#endif
+
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
@@ -1001,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
* saving. However, xcr0 bit 0 is always set, even if the
* emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
*/
- valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
+ valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
if (xcr0 & ~valid_bits)
return 1;
@@ -1735,7 +1749,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+ data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
break;
case MSR_TSC_AUX:
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
@@ -2349,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+#ifdef CONFIG_X86_64
static inline int gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
+#endif
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
@@ -3535,6 +3551,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
+ kvm_update_cpuid_runtime(vcpu);
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -3703,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
@@ -3714,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
vcpu->arch.guest_fpu.xfd_err = data;
@@ -4229,6 +4244,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
+ case KVM_CAP_SYS_ATTRIBUTES:
+ case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -4331,7 +4348,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
}
return r;
+}
+
+static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user*)(unsigned long)attr->addr;
+ if ((u64)(unsigned long)uaddr != attr->addr)
+ return ERR_PTR_USR(-EFAULT);
+ return uaddr;
+}
+
+static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
+{
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
+
+ if (attr->group)
+ return -ENXIO;
+
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ if (put_user(supported_xcr0, uaddr))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENXIO;
+ break;
+ }
+}
+
+static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
+{
+ if (attr->group)
+ return -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ return 0;
+ default:
+ return -ENXIO;
+ }
}
long kvm_arch_dev_ioctl(struct file *filp,
@@ -4422,6 +4481,22 @@ long kvm_arch_dev_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
break;
+ case KVM_GET_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_get_attr(&attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_has_attr(&attr);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -4860,8 +4935,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -5022,11 +5099,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET:
@@ -5045,12 +5122,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
struct kvm *kvm = vcpu->kvm;
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET: {
@@ -6452,7 +6529,7 @@ static void kvm_init_msr_list(void)
u32 dummy[2];
unsigned i;
- BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+ BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
"Please update the fixed PMCs in msrs_to_saved_all[]");
perf_get_x86_pmu_capability(&x86_pmu);
@@ -6810,6 +6887,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
+{
+ return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
+ insn, insn_len);
+}
+
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
@@ -6817,7 +6901,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
return 1;
if (force_emulation_prefix &&
@@ -8193,7 +8277,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool writeback = true;
bool write_fault_to_spt;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
return 1;
vcpu->arch.l1tf_flush_l1d = true;
@@ -8769,7 +8853,7 @@ int kvm_arch_init(void *opaque)
}
if (pi_inject_timer == -1)
- pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
+ pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
@@ -8871,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
return -KVM_EOPNOTSUPP;
+ /*
+ * When tsc is in permanent catchup mode guests won't be able to use
+ * pvclock_read_retry loop to get consistent view of pvclock
+ */
+ if (vcpu->arch.tsc_always_catchup)
+ return -KVM_EOPNOTSUPP;
+
if (!kvm_get_walltime_and_clockread(&ts, &cycle))
return -KVM_EOPNOTSUPP;
@@ -9089,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
likely(!pic_in_kernel(vcpu->kvm));
}
+/* Called within kvm->srcu read side. */
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
@@ -9097,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- /*
- * The call to kvm_ready_for_interrupt_injection() may end up in
- * kvm_xen_has_interrupt() which may require the srcu lock to be
- * held, to protect against changes in the vcpu_info address.
- */
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -9706,7 +9791,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
return;
@@ -9724,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/*
+ * Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
@@ -9912,7 +9998,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* result in virtual interrupt delivery.
*/
local_irq_disable();
- vcpu->mode = IN_GUEST_MODE;
+
+ /* Store vcpu->apicv_active before vcpu->mode. */
+ smp_store_release(&vcpu->mode, IN_GUEST_MODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -9972,6 +10060,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(0, 7);
}
+ guest_timing_enter_irqoff();
+
for (;;) {
/*
* Assert that vCPU vs. VM APICv state is consistent. An APICv
@@ -10056,7 +10146,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* of accounting via context tracking, but the loss of accuracy is
* acceptable for all known use cases.
*/
- vtime_account_guest_exit();
+ guest_timing_exit_irqoff();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
@@ -10098,6 +10188,7 @@ out:
return r;
}
+/* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
bool hv_timer;
@@ -10157,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted);
}
+/* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
@@ -10190,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (__xfer_to_guest_mode_work_pending()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
r = xfer_to_guest_mode_handle_work(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (r)
return r;
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-
return r;
}
@@ -10303,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
+ struct kvm *kvm = vcpu->kvm;
int r;
vcpu_load(vcpu);
@@ -10310,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -10320,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* use before KVM has ever run the vCPU.
*/
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
+
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
goto out;
@@ -10380,8 +10475,9 @@ out:
if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
- kvm_sigset_deactivate(vcpu);
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_sigset_deactivate(vcpu);
vcpu_put(vcpu);
return r;
}
@@ -11209,7 +11305,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.msr_misc_features_enables = 0;
- vcpu->arch.xcr0 = XFEATURE_MASK_FP;
+ __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
+ __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -11226,8 +11323,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
- vcpu->arch.ia32_xss = 0;
-
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
@@ -11571,8 +11666,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
-
/**
* __x86_set_memory_region: Setup KVM internal memory slot
*
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 635b75f9e145..f11d945ac41f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -10,51 +10,6 @@
void kvm_spurious_fault(void);
-static __always_inline void kvm_guest_enter_irqoff(void)
-{
- /*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * This ensures that e.g. latency analysis on the host observes
- * guest mode as interrupt enabled.
- *
- * guest_enter_irqoff() informs context tracking about the
- * transition to guest mode and if enabled adjusts RCU state
- * accordingly.
- */
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
-
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static __always_inline void kvm_guest_exit_irqoff(void)
-{
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on' as recorded before entering guest mode.
- * Same as enter_from_user_mode().
- *
- * context_tracking_guest_exit() restores host context and reinstates
- * RCU if enabled and required.
- *
- * This needs to be done immediately after VM-Exit, before any code
- * that might contain tracepoints or call out to the greater world,
- * e.g. before x86_spec_ctrl_restore_host().
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- context_tracking_guest_exit();
-
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
-}
-
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
({ \
bool failed = (consistency_check); \
@@ -211,14 +166,9 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
}
-static inline u64 get_canonical(u64 la, u8 vaddr_bits)
-{
- return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
-}
-
static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
{
- return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
+ return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
}
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 0e3f7d6e9fd7..74be1fda58e3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
+ struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
+ struct kvm_memslots *slots = kvm_memslots(v->kvm);
+ bool atomic = (state == RUNSTATE_runnable);
uint64_t state_entry_time;
- unsigned int offset;
+ int __user *user_state;
+ uint64_t __user *user_times;
kvm_xen_update_runstate(v, state);
if (!vx->runstate_set)
return;
- BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
+ kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
+ return;
+
+ /* We made sure it fits in a single page */
+ BUG_ON(!ghc->memslot);
+
+ if (atomic)
+ pagefault_disable();
- offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
-#ifdef CONFIG_X86_64
/*
- * The only difference is alignment of uint64_t in 32-bit.
- * So the first field 'state' is accessed directly using
- * offsetof() (where its offset happens to be zero), while the
- * remaining fields which are all uint64_t, start at 'offset'
- * which we tweak here by adding 4.
+ * The only difference between 32-bit and 64-bit versions of the
+ * runstate struct us the alignment of uint64_t in 32-bit, which
+ * means that the 64-bit version has an additional 4 bytes of
+ * padding after the first field 'state'.
+ *
+ * So we use 'int __user *user_state' to point to the state field,
+ * and 'uint64_t __user *user_times' for runstate_entry_time. So
+ * the actual array of time[] in each state starts at user_times[1].
*/
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
+ user_state = (int __user *)ghc->hva;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct compat_vcpu_runstate_info,
+ state_entry_time));
+#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
offsetof(struct compat_vcpu_runstate_info, time) + 4);
if (v->kvm->arch.xen.long_mode)
- offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct vcpu_runstate_info,
+ state_entry_time));
#endif
/*
* First write the updated state_entry_time at the appropriate
@@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ if (__put_user(state_entry_time, user_times))
+ goto out;
smp_wmb();
/*
@@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->current_runstate,
- offsetof(struct vcpu_runstate_info, state),
- sizeof(vx->current_runstate)))
- return;
+ if (__put_user(vx->current_runstate, user_state))
+ goto out;
/*
* Write the actual runstate times immediately after the
@@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->runstate_times[0],
- offset + sizeof(u64),
- sizeof(vx->runstate_times)))
- return;
-
+ if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
+ goto out;
smp_wmb();
/*
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
* runstate_entry_time field.
*/
-
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ __put_user(state_entry_time, user_times);
+ smp_wmb();
+
+ out:
+ mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+
+ if (atomic)
+ pagefault_enable();
}
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -316,10 +335,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotq %0\n"
"\t" LOCK_PREFIX "andq %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
@@ -335,10 +351,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotl %0\n"
"\t" LOCK_PREFIX "andl %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
@@ -449,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache,
data->u.gpa,
@@ -466,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache,
data->u.gpa,
@@ -487,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.runstate_cache,
data->u.gpa,
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 59cf2343f3d9..d0d7b9bc6cad 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
* Output:
* rax original destination
*/
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
movl %edx, %ecx
rep movsb
RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 50ea390df712..d83cba364e31 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,7 +24,6 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START_WEAK(memmove)
SYM_FUNC_START(__memmove)
mov %rdi, %rax
@@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove)
13:
RET
SYM_FUNC_END(__memmove)
-SYM_FUNC_END_ALIAS(memmove)
EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index d624f2bc42f1..fc9ffd3ff3b2 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
*
* rax original destination
*/
-SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
movq %r9,%rax
RET
SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 89b3fb244e15..afbdda539b80 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
.endm
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
4c: vrcp14ps/d Vpd,Wpd (66),(ev)
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
62: vpexpandb/w Vx,Wx (66),(ev)
63: vpcompressb/w Wx,Vx (66),(ev)
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
30: kshiftrb/w Vk,Uk,Ib (66),(v)
31: kshiftrd/q Vk,Uk,Ib (66),(v)
32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
ce: vgf2p8affineqb Vx,Wx,Ib (66)
cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
EndTable
GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
3: xrstors
4: xsavec
5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 026031b3b782..17a492c27306 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr,
static bool memremap_is_setup_data(resource_size_t phys_addr,
unsigned long size)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
u64 paddr, paddr_next;
@@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
data = memremap(paddr, sizeof(*data),
MEMREMAP_WB | MEMREMAP_DEC);
+ if (!data) {
+ pr_warn("failed to memremap setup_data entry\n");
+ return false;
+ }
paddr_next = data->next;
len = data->len;
@@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
return true;
}
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- paddr = ((struct setup_indirect *)data->data)->addr;
- len = ((struct setup_indirect *)data->data)->len;
+ if (data->type == SETUP_INDIRECT) {
+ memunmap(data);
+ data = memremap(paddr, sizeof(*data) + len,
+ MEMREMAP_WB | MEMREMAP_DEC);
+ if (!data) {
+ pr_warn("failed to memremap indirect setup_data\n");
+ return false;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ }
}
memunmap(data);
@@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
unsigned long size)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
u64 paddr, paddr_next;
paddr = boot_params.hdr.setup_data;
while (paddr) {
- unsigned int len;
+ unsigned int len, size;
if (phys_addr == paddr)
return true;
data = early_memremap_decrypted(paddr, sizeof(*data));
+ if (!data) {
+ pr_warn("failed to early memremap setup_data entry\n");
+ return false;
+ }
+
+ size = sizeof(*data);
paddr_next = data->next;
len = data->len;
- early_memunmap(data, sizeof(*data));
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ early_memunmap(data, sizeof(*data));
+ return true;
+ }
+
+ if (data->type == SETUP_INDIRECT) {
+ size += len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap_decrypted(paddr, size);
+ if (!data) {
+ pr_warn("failed to early memremap indirect setup_data\n");
+ return false;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ }
+ }
+
+ early_memunmap(data, size);
if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
return true;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 92ec176a7293..5a53c2cc169c 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -4,11 +4,6 @@
#include <linux/kernel.h>
#ifdef CONFIG_X86_64
-static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
-{
- return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
-}
-
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
unsigned long vaddr = (unsigned long)unsafe_src;
@@ -19,7 +14,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
* we also need to include the userspace guard page.
*/
return vaddr >= TASK_SIZE_MAX + PAGE_SIZE &&
- canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr;
+ __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
}
#else
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 2b2d018ea345..6169053c2854 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data)
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
}
-void __init sme_early_init(void)
-{
- unsigned int i;
-
- if (!sme_me_mask)
- return;
-
- early_pmd_flags = __sme_set(early_pmd_flags);
-
- __supported_pte_mask = __sme_set(__supported_pte_mask);
-
- /* Update the protection map with memory encryption mask */
- for (i = 0; i < ARRAY_SIZE(protection_map); i++)
- protection_map[i] = pgprot_encrypted(protection_map[i]);
-
- if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- swiotlb_force = SWIOTLB_FORCE;
-}
-
void __init sev_setup_arch(void)
{
phys_addr_t total_mem = memblock_phys_mem_size();
@@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
return pfn;
}
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_tlb_flush_required(bool enc)
+{
+ return true;
+}
+
+static bool amd_enc_cache_flush_required(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
+}
+
+static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
{
#ifdef CONFIG_PARAVIRT
unsigned long sz = npages << PAGE_SHIFT;
@@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
#endif
}
+static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+{
+}
+
+/* Return true unconditionally: return value doesn't matter for the SEV side */
+static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
+{
+ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ enc_dec_hypercall(vaddr, npages, enc);
+
+ return true;
+}
+
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
@@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0;
- notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+ early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out:
__flush_tlb_all();
return ret;
@@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
{
- notify_range_enc_status_changed(vaddr, npages, enc);
+ enc_dec_hypercall(vaddr, npages, enc);
+}
+
+void __init sme_early_init(void)
+{
+ unsigned int i;
+
+ if (!sme_me_mask)
+ return;
+
+ early_pmd_flags = __sme_set(early_pmd_flags);
+
+ __supported_pte_mask = __sme_set(__supported_pte_mask);
+
+ /* Update the protection map with memory encryption mask */
+ for (i = 0; i < ARRAY_SIZE(protection_map); i++)
+ protection_map[i] = pgprot_encrypted(protection_map[i]);
+
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ swiotlb_force = SWIOTLB_FORCE;
+
+ x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
+ x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
+ x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
+ x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 3f0abb403340..b43bc24d2bb6 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -44,6 +44,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cmdline.h>
+#include <asm/coco.h>
#include "mm_internal.h"
@@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp)
} else {
/* SEV state cannot be controlled by a command line option */
sme_me_mask = me_mask;
- physical_mask &= ~sme_me_mask;
- return;
+ goto out;
}
/*
@@ -600,6 +600,10 @@ void __init sme_enable(struct boot_params *bp)
sme_me_mask = 0;
else
sme_me_mask = active_by_default ? me_mask : 0;
-
- physical_mask &= ~sme_me_mask;
+out:
+ if (sme_me_mask) {
+ physical_mask &= ~sme_me_mask;
+ cc_set_vendor(CC_VENDOR_AMD);
+ cc_set_mask(sme_me_mask);
+ }
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c6b1213086d6..e8b061557887 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -738,17 +738,6 @@ void __init x86_numa_init(void)
numa_init(dummy_numa_init);
}
-static void __init init_memory_less_node(int nid)
-{
- /* Allocate and initialize node data. Memory-less node is now online.*/
- alloc_node_data(nid);
- free_area_init_memoryless_node(nid);
-
- /*
- * All zonelists will be built later in start_kernel() after per cpu
- * areas are initialized.
- */
-}
/*
* A node may exist which has one or more Generic Initiators but no CPUs and no
@@ -766,9 +755,18 @@ void __init init_gi_nodes(void)
{
int nid;
+ /*
+ * Exclude this node from
+ * bringup_nonboot_cpus
+ * cpu_up
+ * __try_online_node
+ * register_one_node
+ * because node_subsys is not initialized yet.
+ * TODO remove dependency on node_online
+ */
for_each_node_state(nid, N_GENERIC_INITIATOR)
if (!node_online(nid))
- init_memory_less_node(nid);
+ node_set_online(nid);
}
/*
@@ -798,8 +796,17 @@ void __init init_cpu_to_node(void)
if (node == NUMA_NO_NODE)
continue;
+ /*
+ * Exclude this node from
+ * bringup_nonboot_cpus
+ * cpu_up
+ * __try_online_node
+ * register_one_node
+ * because node_subsys is not initialized yet.
+ * TODO remove dependency on node_online
+ */
if (!node_online(node))
- init_memory_less_node(node);
+ node_set_online(node);
numa_set_node(cpu, node);
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef..abf5ed76e4b7 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages)
*/
static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
{
+ pgprot_t empty = __pgprot(0);
struct cpa_data cpa;
int ret;
@@ -1999,18 +2000,20 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
memset(&cpa, 0, sizeof(cpa));
cpa.vaddr = &addr;
cpa.numpages = numpages;
- cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
- cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+ cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty);
+ cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty);
cpa.pgd = init_mm.pgd;
/* Must avoid aliasing mappings in the highmem code */
kmap_flush_unused();
vm_unmap_aliases();
- /*
- * Before changing the encryption attribute, we need to flush caches.
- */
- cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
+ /* Flush the caches as needed before changing the encryption attribute. */
+ if (x86_platform.guest.enc_tlb_flush_required(enc))
+ cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
+
+ /* Notify hypervisor that we are about to set/clr encryption attribute. */
+ x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
ret = __change_page_attr_set_clr(&cpa, 1);
@@ -2023,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/
cpa_flush(&cpa, 0);
- /*
- * Notify hypervisor that a given memory range is mapped encrypted
- * or decrypted.
- */
- notify_range_enc_status_changed(addr, numpages, enc);
+ /* Notify hypervisor that we have successfully set/clr encryption attribute. */
+ if (!ret) {
+ if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
+ ret = -EIO;
+ }
return ret;
}
@@ -2121,12 +2124,6 @@ int set_pages_array_wc(struct page **pages, int numpages)
}
EXPORT_SYMBOL(set_pages_array_wc);
-int set_pages_array_wt(struct page **pages, int numpages)
-{
- return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
-}
-EXPORT_SYMBOL_GPL(set_pages_array_wt);
-
int set_pages_wb(struct page *page, int numpages)
{
unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2b1e266ff95c..0ecb140864b2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -394,7 +394,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
u8 *prog = *pprog;
#ifdef CONFIG_RETPOLINE
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2edd86649468..615a76d70019 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -353,8 +353,8 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
static const struct dmi_system_id msi_k8t_dmi_table[] = {
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 40d6a06e41c8..ead7e5b3a975 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,6 +8,7 @@ endmenu
config UML_X86
def_bool y
+ select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32
config 64BIT
bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index bae61554abcc..e54a9814ccf1 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -8,12 +8,11 @@
#define __FRAME_OFFSETS
#include <linux/ptrace.h>
#include <asm/types.h>
+#include <linux/kbuild.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+#define DEFINE_LONGS(sym, val) \
+ COMMENT(#val " / sizeof(unsigned long)"); \
+ DEFINE(sym, val / sizeof(unsigned long))
void foo(void)
{
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 42300941ec29..517a9d8d8f94 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -9,6 +9,7 @@
#include <xen/events.h>
#include <xen/interface/memory.h>
+#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/io_apic.h>
@@ -184,8 +185,7 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
-
- return 0;
+ return 0;
}
static bool no_vector_callback __initdata;
@@ -242,15 +242,14 @@ static __init int xen_parse_no_vector_callback(char *arg)
}
early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
-bool __init xen_hvm_need_lapic(void)
+static __init bool xen_x2apic_available(void)
{
- if (xen_pv_domain())
- return false;
- if (!xen_hvm_domain())
- return false;
- if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
- return false;
- return true;
+ return x2apic_supported();
+}
+
+static bool __init msi_ext_dest_id(void)
+{
+ return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID;
}
static __init void xen_hvm_guest_late_init(void)
@@ -312,9 +311,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.detect = xen_platform_hvm,
.type = X86_HYPER_XEN_HVM,
.init.init_platform = xen_hvm_guest_init,
- .init.x2apic_available = xen_x2apic_para_available,
+ .init.x2apic_available = xen_x2apic_available,
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
+ .init.msi_ext_dest_id = msi_ext_dest_id,
.runtime.pin_vcpu = xen_pin_vcpu,
.ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5004feb16783..d47c3d176ae4 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1341,10 +1341,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
xen_boot_params_init_edd();
#ifdef CONFIG_ACPI
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6a8f3b53ab83..4a6019238ee7 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu)
return rc;
}
-static void __init xen_fill_possible_map(void)
-{
- int i, rc;
-
- if (xen_initial_domain())
- return;
-
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- }
- }
-}
-
-static void __init xen_filter_cpu_maps(void)
+static void __init _get_smp_config(unsigned int early)
{
int i, rc;
unsigned int subtract = 0;
- if (!xen_initial_domain())
+ if (early)
return;
num_processors = 0;
@@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
- xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
/*
@@ -476,5 +459,8 @@ static const struct smp_ops xen_smp_ops __initconst = {
void __init xen_smp_init(void)
{
smp_ops = xen_smp_ops;
- xen_fill_possible_map();
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = _get_smp_config;
}
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 31b1e3477cb6..14ea32e734d5 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -57,6 +57,14 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ if (size >= offsetof(struct dom0_vga_console_info,
+ u.vesa_lfb.ext_lfb_base)
+ + sizeof(info->u.vesa_lfb.ext_lfb_base)
+ && info->u.vesa_lfb.ext_lfb_base) {
+ screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
+ screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ }
+
if (info->video_type == XEN_VGATYPE_EFI_LFB) {
screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
break;
@@ -66,14 +74,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
u.vesa_lfb.mode_attrs)
+ sizeof(info->u.vesa_lfb.mode_attrs))
screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
-
- if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.ext_lfb_base)
- + sizeof(info->u.vesa_lfb.ext_lfb_base)
- && info->u.vesa_lfb.ext_lfb_base) {
- screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
- screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- }
break;
}
}
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 8ac599aa6d99..887432327613 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -3,6 +3,7 @@ config XTENSA
def_bool y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT if !MMU
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DMA_PREP_COHERENT if MMU
select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 5d98a7ad4251..08010dbf5e09 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -26,6 +26,8 @@ static inline struct task_struct *get_current(void)
#define current get_current()
+register unsigned long current_stack_pointer __asm__("a1");
+
#else
#define GET_CURRENT(reg,sp) \
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index bd5aeb795567..8da562f5da73 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -241,6 +241,7 @@ static inline void paging_init(void) { }
* The pmd contains the kernel virtual address of the pte page.
*/
#define pmd_page_vaddr(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
+#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
#define pmd_page(pmd) virt_to_page(pmd_val(pmd))
/*
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index fe06e8ed162b..a85e785a6288 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -19,14 +19,14 @@ struct stackframe {
static __always_inline unsigned long *stack_pointer(struct task_struct *task)
{
- unsigned long *sp;
+ unsigned long sp;
if (!task || task == current)
- __asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp));
+ sp = current_stack_pointer;
else
- sp = (unsigned long *)task->thread.sp;
+ sp = task->thread.sp;
- return sp;
+ return (unsigned long *)sp;
}
void walk_stackframe(unsigned long *sp,
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 15051a8a1539..529fe9245821 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -36,9 +36,8 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
- unsigned long sp;
+ unsigned long sp = current_stack_pointer;
- __asm__ __volatile__ ("mov %0, a1\n" : "=a" (sp));
sp &= THREAD_SIZE - 1;
if (unlikely(sp < (sizeof(thread_info) + 1024)))
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 8eb6ad1a3a1d..0f0e0724397f 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -108,13 +108,13 @@ static void simdisk_submit_bio(struct bio *bio)
sector_t sector = bio->bi_iter.bi_sector;
bio_for_each_segment(bvec, bio, iter) {
- char *buffer = kmap_atomic(bvec.bv_page) + bvec.bv_offset;
+ char *buffer = bvec_kmap_local(&bvec);
unsigned len = bvec.bv_len >> SECTOR_SHIFT;
simdisk_transfer(dev, sector, len, buffer,
bio_data_dir(bio) == WRITE);
sector += len;
- kunmap_atomic(buffer);
+ kunmap_local(buffer);
}
bio_endio(bio);
diff --git a/block/Kconfig b/block/Kconfig
index d5d4197b7ed2..7eb5d6d53b3f 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,16 @@ menuconfig BLOCK
if BLOCK
+config BLOCK_LEGACY_AUTOLOAD
+ bool "Legacy autoloading support"
+ default y
+ help
+ Enable loading modules and creating block device instances based on
+ accesses through their device special file. This is a historic Linux
+ feature and makes no sense in a udev world where device files are
+ created on demand, but scripts that manually create device nodes and
+ then call losetup might rely on this behavior.
+
config BLK_RQ_ALLOC_TIME
bool
@@ -218,6 +228,9 @@ config BLK_PM
config BLOCK_HOLDER_DEPRECATED
bool
+config BLK_MQ_STACKING
+ bool
+
source "block/Kconfig.iosched"
endif # BLOCK
diff --git a/block/Makefile b/block/Makefile
index f38eaa612929..3950ecbc5c26 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
-obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
+ blk-crypto-sysfs.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
diff --git a/block/bdev.c b/block/bdev.c
index 102837a37051..13de871fa816 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -385,7 +385,7 @@ static struct kmem_cache * bdev_cachep __read_mostly;
static struct inode *bdev_alloc_inode(struct super_block *sb)
{
- struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
+ struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
@@ -678,7 +678,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
- return 0;;
+ return 0;
}
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
@@ -733,12 +733,15 @@ struct block_device *blkdev_get_no_open(dev_t dev)
struct inode *inode;
inode = ilookup(blockdev_superblock, dev);
- if (!inode) {
+ if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
blk_request_module(dev);
inode = ilookup(blockdev_superblock, dev);
- if (!inode)
- return NULL;
+ if (inode)
+ pr_warn_ratelimited(
+"block device autoloading is deprecated and will be removed.\n");
}
+ if (!inode)
+ return NULL;
/* switch from the inode reference to a device mode one: */
bdev = &BDEV_I(inode)->bdev;
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 24a5c5329bcd..420eda2589c0 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -645,8 +645,22 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_group *bfqg)
{
struct bfq_entity *entity = &bfqq->entity;
+ struct bfq_group *old_parent = bfqq_group(bfqq);
/*
+ * No point to move bfqq to the same group, which can happen when
+ * root group is offlined
+ */
+ if (old_parent == bfqg)
+ return;
+
+ /*
+ * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
+ * until elevator exit.
+ */
+ if (bfqq == &bfqd->oom_bfqq)
+ return;
+ /*
* Get extra reference to prevent bfqq from being freed in
* next possible expire or deactivate.
*/
@@ -666,7 +680,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
- bfqg_and_blkg_put(bfqq_group(bfqq));
+ bfqg_and_blkg_put(old_parent);
if (entity->parent &&
entity->parent->last_bfqq_created == bfqq)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0c612a911696..2e0dd68a3cbe 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -774,7 +774,7 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
if (!bfqq->next_rq)
return;
- bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+ bfqq->pos_root = &bfqq_group(bfqq)->rq_pos_tree;
__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
blk_rq_pos(bfqq->next_rq), &parent, &p);
if (!__bfqq) {
@@ -2153,7 +2153,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->waker_detection_started = now_ns;
bfq_bfqq_name(bfqq->tentative_waker_bfqq, waker_name,
MAX_BFQQ_NAME_LENGTH);
- bfq_log_bfqq(bfqd, bfqq, "set tenative waker %s", waker_name);
+ bfq_log_bfqq(bfqd, bfqq, "set tentative waker %s", waker_name);
} else /* Same tentative waker queue detected again */
bfqq->num_waker_detections++;
@@ -2669,7 +2669,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
sector_t sector)
{
- struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+ struct rb_root *root = &bfqq_group(bfqq)->rq_pos_tree;
struct rb_node *parent, *node;
struct bfq_queue *__bfqq;
@@ -2782,6 +2782,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
* are likely to increase the throughput.
*/
bfqq->new_bfqq = new_bfqq;
+ /*
+ * The above assignment schedules the following redirections:
+ * each time some I/O for bfqq arrives, the process that
+ * generated that I/O is disassociated from bfqq and
+ * associated with new_bfqq. Here we increases new_bfqq->ref
+ * in advance, adding the number of processes that are
+ * expected to be associated with new_bfqq as they happen to
+ * issue I/O.
+ */
new_bfqq->ref += process_refs;
return new_bfqq;
}
@@ -2844,6 +2853,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_queue *in_service_bfqq, *new_bfqq;
+ /* if a merge has already been setup, then proceed with that first */
+ if (bfqq->new_bfqq)
+ return bfqq->new_bfqq;
+
/*
* Check delayed stable merge for rotational or non-queueing
* devs. For this branch to be executed, bfqq must not be
@@ -2945,9 +2958,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfq_too_late_for_merging(bfqq))
return NULL;
- if (bfqq->new_bfqq)
- return bfqq->new_bfqq;
-
if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
return NULL;
@@ -5181,7 +5191,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct request *rq;
struct bfq_queue *in_serv_queue;
- bool waiting_rq, idle_timer_disabled;
+ bool waiting_rq, idle_timer_disabled = false;
spin_lock_irq(&bfqd->lock);
@@ -5189,14 +5199,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
rq = __bfq_dispatch_request(hctx);
-
- idle_timer_disabled =
- waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+ if (in_serv_queue == bfqd->in_service_queue) {
+ idle_timer_disabled =
+ waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+ }
spin_unlock_irq(&bfqd->lock);
-
- bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue,
- idle_timer_disabled);
+ bfq_update_dispatch_stats(hctx->queue, rq,
+ idle_timer_disabled ? in_serv_queue : NULL,
+ idle_timer_disabled);
return rq;
}
@@ -5448,7 +5459,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bfqq = bic_to_bfqq(bic, false);
if (bfqq) {
bfq_release_process_ref(bfqd, bfqq);
- bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, true);
+ bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
bic_set_bfqq(bic, bfqq, false);
}
@@ -7018,6 +7029,8 @@ static void bfq_exit_queue(struct elevator_queue *e)
spin_unlock_irq(&bfqd->lock);
#endif
+ wbt_enable_default(bfqd->queue);
+
kfree(bfqd);
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 07288b9da389..3b83e3d1c2e5 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -8,7 +8,6 @@
#include <linux/blktrace_api.h>
#include <linux/hrtimer.h>
-#include <linux/blk-cgroup.h>
#include "blk-cgroup-rwstat.h"
@@ -1051,7 +1050,6 @@ extern struct blkcg_policy blkcg_policy_bfq;
for (parent = NULL; entity ; entity = parent)
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd);
struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index b74cc0da118e..f8eb340381cf 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -142,16 +142,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
#ifdef CONFIG_BFQ_GROUP_IOSCHED
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
-{
- struct bfq_entity *group_entity = bfqq->entity.parent;
-
- if (!group_entity)
- group_entity = &bfqq->bfqd->root_group->entity;
-
- return container_of(group_entity, struct bfq_group, entity);
-}
-
/*
* Returns true if this budget changes may let next_in_service->parent
* become the next_in_service entity for its parent entity.
@@ -230,11 +220,6 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
-{
- return bfqq->bfqd->root_group;
-}
-
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
{
return false;
@@ -519,7 +504,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio)
static unsigned short bfq_weight_to_ioprio(int weight)
{
return max_t(int, 0,
- IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
+ IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);
}
static void bfq_get_entity(struct bfq_entity *entity)
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index d25114715459..6996e7bd66e9 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -373,7 +373,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
- bip->bip_iter.bi_sector += bytes_done >> 9;
+ bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
}
@@ -420,7 +420,6 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}
-EXPORT_SYMBOL(bio_integrity_clone);
int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
diff --git a/block/bio.c b/block/bio.c
index 4312a8085396..33979f306e9e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -15,7 +15,6 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
-#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
@@ -24,6 +23,7 @@
#include <trace/events/block.h>
#include "blk.h"
#include "blk-rq-qos.h"
+#include "blk-cgroup.h"
struct bio_alloc_cache {
struct bio *free_list;
@@ -249,12 +249,12 @@ static void bio_free(struct bio *bio)
* they must remember to pair any call to bio_init() with bio_uninit()
* when IO has completed, or when the bio is released.
*/
-void bio_init(struct bio *bio, struct bio_vec *table,
- unsigned short max_vecs)
+void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
+ unsigned short max_vecs, unsigned int opf)
{
bio->bi_next = NULL;
- bio->bi_bdev = NULL;
- bio->bi_opf = 0;
+ bio->bi_bdev = bdev;
+ bio->bi_opf = opf;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
@@ -268,6 +268,8 @@ void bio_init(struct bio *bio, struct bio_vec *table,
#ifdef CONFIG_BLK_CGROUP
bio->bi_blkg = NULL;
bio->bi_issue.value = 0;
+ if (bdev)
+ bio_associate_blkg(bio);
#ifdef CONFIG_BLK_CGROUP_IOCOST
bio->bi_iocost_cost = 0;
#endif
@@ -293,6 +295,8 @@ EXPORT_SYMBOL(bio_init);
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
+ * @bdev: block device to use the bio for
+ * @opf: operation and flags for bio
*
* Description:
* After calling bio_reset(), @bio will be in the same state as a freshly
@@ -300,11 +304,15 @@ EXPORT_SYMBOL(bio_init);
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
-void bio_reset(struct bio *bio)
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
{
bio_uninit(bio);
memset(bio, 0, BIO_RESET_BYTES);
atomic_set(&bio->__bi_remaining, 1);
+ bio->bi_bdev = bdev;
+ if (bio->bi_bdev)
+ bio_associate_blkg(bio);
+ bio->bi_opf = opf;
}
EXPORT_SYMBOL(bio_reset);
@@ -344,6 +352,20 @@ void bio_chain(struct bio *bio, struct bio *parent)
}
EXPORT_SYMBOL(bio_chain);
+struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
+ unsigned int nr_pages, unsigned int opf, gfp_t gfp)
+{
+ struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
+
+ if (bio) {
+ bio_chain(bio, new);
+ submit_bio(bio);
+ }
+
+ return new;
+}
+EXPORT_SYMBOL_GPL(blk_next_bio);
+
static void bio_alloc_rescue(struct work_struct *work)
{
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
@@ -400,8 +422,10 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
/**
* bio_alloc_bioset - allocate a bio for I/O
+ * @bdev: block device to allocate the bio for (can be %NULL)
+ * @nr_vecs: number of bvecs to pre-allocate
+ * @opf: operation and flags for bio
* @gfp_mask: the GFP_* mask given to the slab allocator
- * @nr_iovecs: number of iovecs to pre-allocate
* @bs: the bio_set to allocate from.
*
* Allocate a bio from the mempools in @bs.
@@ -430,15 +454,16 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
*
* Returns: Pointer to new bio on success, NULL on failure.
*/
-struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
+struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
+ unsigned int opf, gfp_t gfp_mask,
struct bio_set *bs)
{
gfp_t saved_gfp = gfp_mask;
struct bio *bio;
void *p;
- /* should not use nobvec bioset for nr_iovecs > 0 */
- if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
+ /* should not use nobvec bioset for nr_vecs > 0 */
+ if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
return NULL;
/*
@@ -475,23 +500,23 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
return NULL;
bio = p + bs->front_pad;
- if (nr_iovecs > BIO_INLINE_VECS) {
+ if (nr_vecs > BIO_INLINE_VECS) {
struct bio_vec *bvl = NULL;
- bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+ bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
- bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+ bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
}
if (unlikely(!bvl))
goto err_free;
- bio_init(bio, bvl, nr_iovecs);
- } else if (nr_iovecs) {
- bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
+ bio_init(bio, bdev, bvl, nr_vecs, opf);
+ } else if (nr_vecs) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
} else {
- bio_init(bio, NULL, 0);
+ bio_init(bio, bdev, NULL, 0, opf);
}
bio->bi_pool = bs;
@@ -522,7 +547,8 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
if (unlikely(!bio))
return NULL;
- bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
+ bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
+ 0);
bio->bi_pool = NULL;
return bio;
}
@@ -702,80 +728,84 @@ void bio_put(struct bio *bio)
}
EXPORT_SYMBOL(bio_put);
-/**
- * __bio_clone_fast - clone a bio that shares the original bio's biovec
- * @bio: destination bio
- * @bio_src: bio to clone
- *
- * Clone a &bio. Caller will own the returned bio, but not
- * the actual data it points to. Reference count of returned
- * bio will be one.
- *
- * Caller must ensure that @bio_src is not freed before @bio.
- */
-void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
+static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{
- WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
-
- /*
- * most users will be overriding ->bi_bdev with a new target,
- * so we don't set nor calculate new physical/hw segment counts here
- */
- bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
- if (bio_flagged(bio_src, BIO_REMAPPED))
+ if (bio->bi_bdev == bio_src->bi_bdev &&
+ bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
- bio->bi_opf = bio_src->bi_opf;
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
- bio->bi_io_vec = bio_src->bi_io_vec;
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
+
+ if (bio_crypt_clone(bio, bio_src, gfp) < 0)
+ return -ENOMEM;
+ if (bio_integrity(bio_src) &&
+ bio_integrity_clone(bio, bio_src, gfp) < 0)
+ return -ENOMEM;
+ return 0;
}
-EXPORT_SYMBOL(__bio_clone_fast);
/**
- * bio_clone_fast - clone a bio that shares the original bio's biovec
- * @bio: bio to clone
- * @gfp_mask: allocation priority
- * @bs: bio_set to allocate from
+ * bio_alloc_clone - clone a bio that shares the original bio's biovec
+ * @bdev: block_device to clone onto
+ * @bio_src: bio to clone from
+ * @gfp: allocation priority
+ * @bs: bio_set to allocate from
+ *
+ * Allocate a new bio that is a clone of @bio_src. The caller owns the returned
+ * bio, but not the actual data it points to.
*
- * Like __bio_clone_fast, only also allocates the returned bio
+ * The caller must ensure that the return bio is not freed before @bio_src.
*/
-struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+ gfp_t gfp, struct bio_set *bs)
{
- struct bio *b;
+ struct bio *bio;
- b = bio_alloc_bioset(gfp_mask, 0, bs);
- if (!b)
+ bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
+ if (!bio)
return NULL;
- __bio_clone_fast(b, bio);
-
- if (bio_crypt_clone(b, bio, gfp_mask) < 0)
- goto err_put;
-
- if (bio_integrity(bio) &&
- bio_integrity_clone(b, bio, gfp_mask) < 0)
- goto err_put;
-
- return b;
+ if (__bio_clone(bio, bio_src, gfp) < 0) {
+ bio_put(bio);
+ return NULL;
+ }
+ bio->bi_io_vec = bio_src->bi_io_vec;
-err_put:
- bio_put(b);
- return NULL;
+ return bio;
}
-EXPORT_SYMBOL(bio_clone_fast);
+EXPORT_SYMBOL(bio_alloc_clone);
-const char *bio_devname(struct bio *bio, char *buf)
+/**
+ * bio_init_clone - clone a bio that shares the original bio's biovec
+ * @bdev: block_device to clone onto
+ * @bio: bio to clone into
+ * @bio_src: bio to clone from
+ * @gfp: allocation priority
+ *
+ * Initialize a new bio in caller provided memory that is a clone of @bio_src.
+ * The caller owns the returned bio, but not the actual data it points to.
+ *
+ * The caller must ensure that @bio_src is not freed before @bio.
+ */
+int bio_init_clone(struct block_device *bdev, struct bio *bio,
+ struct bio *bio_src, gfp_t gfp)
{
- return bdevname(bio->bi_bdev, buf);
+ int ret;
+
+ bio_init(bio, bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
+ ret = __bio_clone(bio, bio_src, gfp);
+ if (ret)
+ bio_uninit(bio);
+ return ret;
}
-EXPORT_SYMBOL(bio_devname);
+EXPORT_SYMBOL(bio_init_clone);
/**
* bio_full - check if the bio is full
@@ -1054,7 +1084,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
if (len > UINT_MAX || off > UINT_MAX)
- return 0;
+ return false;
return bio_add_page(bio, &folio->page, len, off) > 0;
}
@@ -1486,8 +1516,7 @@ again:
if (!bio_integrity_endio(bio))
return;
- if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
- rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio);
+ rq_qos_done_bio(bio);
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
@@ -1541,7 +1570,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
return NULL;
- split = bio_clone_fast(bio, gfp, bs);
+ split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
if (!split)
return NULL;
@@ -1636,9 +1665,9 @@ EXPORT_SYMBOL(bioset_exit);
* Note that the bio must be embedded at the END of that structure always,
* or things will break badly.
* If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
- * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
- * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
- * dispatch queued requests when the mempool runs out of space.
+ * for allocating iovecs. This pool is not needed e.g. for bio_init_clone().
+ * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used
+ * to dispatch queued requests when the mempool runs out of space.
*
*/
int bioset_init(struct bio_set *bs,
@@ -1708,7 +1737,9 @@ EXPORT_SYMBOL(bioset_init_from_src);
/**
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
* @kiocb: kiocb describing the IO
+ * @bdev: block device to allocate the bio for (can be %NULL)
* @nr_vecs: number of iovecs to pre-allocate
+ * @opf: operation and flags for bio
* @bs: bio_set to allocate from
*
* Description:
@@ -1719,14 +1750,14 @@ EXPORT_SYMBOL(bioset_init_from_src);
* MUST be done from process context, not hard/soft IRQ.
*
*/
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
- struct bio_set *bs)
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
+ unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
{
struct bio_alloc_cache *cache;
struct bio *bio;
if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
- return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+ return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
cache = per_cpu_ptr(bs->cache, get_cpu());
if (cache->free_list) {
@@ -1734,13 +1765,14 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
cache->free_list = bio->bi_next;
cache->nr--;
put_cpu();
- bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
+ bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
+ nr_vecs, opf);
bio->bi_pool = bs;
bio_set_flag(bio, BIO_PERCPU_CACHE);
return bio;
}
put_cpu();
- bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+ bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
bio_set_flag(bio, BIO_PERCPU_CACHE);
return bio;
}
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
index ee746919c41f..9f2723b34b75 100644
--- a/block/blk-cgroup-rwstat.h
+++ b/block/blk-cgroup-rwstat.h
@@ -6,7 +6,7 @@
#ifndef _BLK_CGROUP_RWSTAT_H
#define _BLK_CGROUP_RWSTAT_H
-#include <linux/blk-cgroup.h>
+#include "blk-cgroup.h"
enum blkg_rwstat_type {
BLKG_RWSTAT_READ,
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 650f7e27989f..d53b0d69dd73 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,15 +23,14 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
-#include <linux/genhd.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <linux/ctype.h>
-#include <linux/blk-cgroup.h>
#include <linux/tracehook.h>
#include <linux/psi.h>
#include <linux/part_stat.h>
#include "blk.h"
+#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"
@@ -83,6 +82,8 @@ static void blkg_free(struct blkcg_gq *blkg)
if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
+ if (blkg->q)
+ blk_put_queue(blkg->q);
free_percpu(blkg->iostat_cpu);
percpu_ref_exit(&blkg->refcnt);
kfree(blkg);
@@ -168,6 +169,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
if (!blkg->iostat_cpu)
goto err_free;
+ if (!blk_get_queue(q))
+ goto err_free;
+
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
spin_lock_init(&blkg->async_bio_lock);
@@ -857,11 +861,11 @@ static void blkcg_fill_root_iostats(void)
blk_queue_root_blkg(bdev_get_queue(bdev));
struct blkg_iostat tmp;
int cpu;
+ unsigned long flags;
memset(&tmp, 0, sizeof(tmp));
for_each_possible_cpu(cpu) {
struct disk_stats *cpu_dkstats;
- unsigned long flags;
cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
tmp.ios[BLKG_IOSTAT_READ] +=
@@ -877,11 +881,11 @@ static void blkcg_fill_root_iostats(void)
cpu_dkstats->sectors[STAT_WRITE] << 9;
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
cpu_dkstats->sectors[STAT_DISCARD] << 9;
-
- flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
- blkg_iostat_set(&blkg->iostat.cur, &tmp);
- u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
+
+ flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+ blkg_iostat_set(&blkg->iostat.cur, &tmp);
+ u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
}
@@ -1176,6 +1180,8 @@ int blkcg_init_queue(struct request_queue *q)
bool preloaded;
int ret;
+ INIT_LIST_HEAD(&q->blkg_list);
+
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
new file mode 100644
index 000000000000..47e1e38390c9
--- /dev/null
+++ b/block/blk-cgroup.h
@@ -0,0 +1,494 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BLK_CGROUP_PRIVATE_H
+#define _BLK_CGROUP_PRIVATE_H
+/*
+ * block cgroup private header
+ *
+ * Based on ideas and code from CFQ, CFS and BFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ * Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
+ * Nauman Rafique <nauman@google.com>
+ */
+
+#include <linux/blk-cgroup.h>
+#include <linux/blk-mq.h>
+
+/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
+#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
+
+#ifdef CONFIG_BLK_CGROUP
+
+/*
+ * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
+ * request_queue (q). This is used by blkcg policies which need to track
+ * information per blkcg - q pair.
+ *
+ * There can be multiple active blkcg policies and each blkg:policy pair is
+ * represented by a blkg_policy_data which is allocated and freed by each
+ * policy's pd_alloc/free_fn() methods. A policy can allocate private data
+ * area by allocating larger data structure which embeds blkg_policy_data
+ * at the beginning.
+ */
+struct blkg_policy_data {
+ /* the blkg and policy id this per-policy data belongs to */
+ struct blkcg_gq *blkg;
+ int plid;
+};
+
+/*
+ * Policies that need to keep per-blkcg data which is independent from any
+ * request_queue associated to it should implement cpd_alloc/free_fn()
+ * methods. A policy can allocate private data area by allocating larger
+ * data structure which embeds blkcg_policy_data at the beginning.
+ * cpd_init() is invoked to let each policy handle per-blkcg data.
+ */
+struct blkcg_policy_data {
+ /* the blkcg and policy id this per-policy data belongs to */
+ struct blkcg *blkcg;
+ int plid;
+};
+
+typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
+typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
+ struct request_queue *q, struct blkcg *blkcg);
+typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+ struct seq_file *s);
+
+struct blkcg_policy {
+ int plid;
+ /* cgroup files for the policy */
+ struct cftype *dfl_cftypes;
+ struct cftype *legacy_cftypes;
+
+ /* operations */
+ blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
+ blkcg_pol_init_cpd_fn *cpd_init_fn;
+ blkcg_pol_free_cpd_fn *cpd_free_fn;
+ blkcg_pol_bind_cpd_fn *cpd_bind_fn;
+
+ blkcg_pol_alloc_pd_fn *pd_alloc_fn;
+ blkcg_pol_init_pd_fn *pd_init_fn;
+ blkcg_pol_online_pd_fn *pd_online_fn;
+ blkcg_pol_offline_pd_fn *pd_offline_fn;
+ blkcg_pol_free_pd_fn *pd_free_fn;
+ blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
+ blkcg_pol_stat_pd_fn *pd_stat_fn;
+};
+
+extern struct blkcg blkcg_root;
+extern bool blkcg_debug_stats;
+
+struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
+ struct request_queue *q, bool update_hint);
+int blkcg_init_queue(struct request_queue *q);
+void blkcg_exit_queue(struct request_queue *q);
+
+/* Blkio controller policy registration */
+int blkcg_policy_register(struct blkcg_policy *pol);
+void blkcg_policy_unregister(struct blkcg_policy *pol);
+int blkcg_activate_policy(struct request_queue *q,
+ const struct blkcg_policy *pol);
+void blkcg_deactivate_policy(struct request_queue *q,
+ const struct blkcg_policy *pol);
+
+const char *blkg_dev_name(struct blkcg_gq *blkg);
+void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
+ u64 (*prfill)(struct seq_file *,
+ struct blkg_policy_data *, int),
+ const struct blkcg_policy *pol, int data,
+ bool show_total);
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
+
+struct blkg_conf_ctx {
+ struct block_device *bdev;
+ struct blkcg_gq *blkg;
+ char *body;
+};
+
+struct block_device *blkcg_conf_open_bdev(char **inputp);
+int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+ char *input, struct blkg_conf_ctx *ctx);
+void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
+
+/**
+ * __bio_blkcg - internal, inconsistent version to get blkcg
+ *
+ * DO NOT USE.
+ * This function is inconsistent and consequently is dangerous to use. The
+ * first part of the function returns a blkcg where a reference is owned by the
+ * bio. This means it does not need to be rcu protected as it cannot go away
+ * with the bio owning a reference to it. However, the latter potentially gets
+ * it from task_css(). This can race against task migration and the cgroup
+ * dying. It is also semantically different as it must be called rcu protected
+ * and is susceptible to failure when trying to get a reference to it.
+ * Therefore, it is not ok to assume that *_get() will always succeed on the
+ * blkcg returned here.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
+{
+ if (bio && bio->bi_blkg)
+ return bio->bi_blkg->blkcg;
+ return css_to_blkcg(blkcg_css());
+}
+
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg. The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio. Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+ return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
+}
+
+/**
+ * __blkg_lookup - internal version of blkg_lookup()
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ * @update_hint: whether to update lookup hint with the result or not
+ *
+ * This is internal version and shouldn't be used by policy
+ * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
+ * @q's bypass state. If @update_hint is %true, the caller should be
+ * holding @q->queue_lock and lookup hint is updated on success.
+ */
+static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+ struct request_queue *q,
+ bool update_hint)
+{
+ struct blkcg_gq *blkg;
+
+ if (blkcg == &blkcg_root)
+ return q->root_blkg;
+
+ blkg = rcu_dereference(blkcg->blkg_hint);
+ if (blkg && blkg->q == q)
+ return blkg;
+
+ return blkg_lookup_slowpath(blkcg, q, update_hint);
+}
+
+/**
+ * blkg_lookup - lookup blkg for the specified blkcg - q pair
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for the @blkcg - @q pair. This function should be called
+ * under RCU read lock.
+ */
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
+ struct request_queue *q)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return __blkg_lookup(blkcg, q, false);
+}
+
+/**
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{
+ return q->root_blkg;
+}
+
+/**
+ * blkg_to_pdata - get policy private data
+ * @blkg: blkg of interest
+ * @pol: policy of interest
+ *
+ * Return pointer to private data associated with the @blkg-@pol pair.
+ */
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol)
+{
+ return blkg ? blkg->pd[pol->plid] : NULL;
+}
+
+static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
+ struct blkcg_policy *pol)
+{
+ return blkcg ? blkcg->cpd[pol->plid] : NULL;
+}
+
+/**
+ * pdata_to_blkg - get blkg associated with policy private data
+ * @pd: policy private data of interest
+ *
+ * @pd is policy private data. Determine the blkg it's associated with.
+ */
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
+{
+ return pd ? pd->blkg : NULL;
+}
+
+static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
+{
+ return cpd ? cpd->blkcg : NULL;
+}
+
+/**
+ * blkg_path - format cgroup path of blkg
+ * @blkg: blkg of interest
+ * @buf: target buffer
+ * @buflen: target buffer length
+ *
+ * Format the path of the cgroup of @blkg into @buf.
+ */
+static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
+{
+ return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
+}
+
+/**
+ * blkg_get - get a blkg reference
+ * @blkg: blkg to get
+ *
+ * The caller should be holding an existing reference.
+ */
+static inline void blkg_get(struct blkcg_gq *blkg)
+{
+ percpu_ref_get(&blkg->refcnt);
+}
+
+/**
+ * blkg_tryget - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg. We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
+{
+ return blkg && percpu_ref_tryget(&blkg->refcnt);
+}
+
+/**
+ * blkg_put - put a blkg reference
+ * @blkg: blkg to put
+ */
+static inline void blkg_put(struct blkcg_gq *blkg)
+{
+ percpu_ref_put(&blkg->refcnt);
+}
+
+/**
+ * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
+ * read locked. If called under either blkcg or queue lock, the iteration
+ * is guaranteed to include all and only online blkgs. The caller may
+ * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
+ */
+#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
+ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
+ if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
+ (p_blkg)->q, false)))
+
+/**
+ * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Similar to blkg_for_each_descendant_pre() but performs post-order
+ * traversal instead. Synchronization rules are the same. @p_blkg is
+ * included in the iteration and the last node to be visited.
+ */
+#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
+ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
+ if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
+ (p_blkg)->q, false)))
+
+bool __blkcg_punt_bio_submit(struct bio *bio);
+
+static inline bool blkcg_punt_bio_submit(struct bio *bio)
+{
+ if (bio->bi_opf & REQ_CGROUP_PUNT)
+ return __blkcg_punt_bio_submit(bio);
+ else
+ return false;
+}
+
+static inline void blkcg_bio_issue_init(struct bio *bio)
+{
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+}
+
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+ if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+ return;
+ if (atomic_add_return(1, &blkg->use_delay) == 1)
+ atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+
+ if (WARN_ON_ONCE(old < 0))
+ return 0;
+ if (old == 0)
+ return 0;
+
+ /*
+ * We do this song and dance because we can race with somebody else
+ * adding or removing delay. If we just did an atomic_dec we'd end up
+ * negative and we'd already be in trouble. We need to subtract 1 and
+ * then check to see if we were the last delay so we can drop the
+ * congestion count on the cgroup.
+ */
+ while (old) {
+ int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+ if (cur == old)
+ break;
+ old = cur;
+ }
+
+ if (old == 0)
+ return 0;
+ if (old == 1)
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ return 1;
+}
+
+/**
+ * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
+ * @blkg: target blkg
+ * @delay: delay duration in nsecs
+ *
+ * When enabled with this function, the delay is not decayed and must be
+ * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
+ * blkcg_[un]use_delay() and blkcg_add_delay() usages.
+ */
+static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
+{
+ int old = atomic_read(&blkg->use_delay);
+
+ /* We only want 1 person setting the congestion count for this blkg. */
+ if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
+ atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+
+ atomic64_set(&blkg->delay_nsec, delay);
+}
+
+/**
+ * blkcg_clear_delay - Disable allocator delay mechanism
+ * @blkg: target blkg
+ *
+ * Disable use_delay mechanism. See blkcg_set_delay().
+ */
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+
+ /* We only want 1 person clearing the congestion count for this blkg. */
+ if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+/**
+ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
+ * @rq: request to merge into
+ * @bio: bio to merge
+ *
+ * @bio and @rq should belong to the same cgroup and their issue_as_root should
+ * match. The latter is necessary as we don't want to throttle e.g. a metadata
+ * update because it happens to be next to a regular IO.
+ */
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
+{
+ return rq->bio->bi_blkg == bio->bi_blkg &&
+ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
+}
+
+void blk_cgroup_bio_start(struct bio *bio);
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+#else /* CONFIG_BLK_CGROUP */
+
+struct blkg_policy_data {
+};
+
+struct blkcg_policy_data {
+};
+
+struct blkcg_policy {
+};
+
+#ifdef CONFIG_BLOCK
+
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
+static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
+static inline void blkcg_exit_queue(struct request_queue *q) { }
+static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
+static inline int blkcg_activate_policy(struct request_queue *q,
+ const struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_deactivate_policy(struct request_queue *q,
+ const struct blkcg_policy *pol) { }
+
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
+
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol) { return NULL; }
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
+static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
+static inline void blkg_get(struct blkcg_gq *blkg) { }
+static inline void blkg_put(struct blkcg_gq *blkg) { }
+
+static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
+static inline void blk_cgroup_bio_start(struct bio *bio) { }
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
+
+#define blk_queue_for_each_rl(rl, q) \
+ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
+
+#endif /* CONFIG_BLOCK */
+#endif /* CONFIG_BLK_CGROUP */
+
+#endif /* _BLK_CGROUP_PRIVATE_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index 97f8bc8d3a79..9c14deab3af4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,7 +34,6 @@
#include <linux/delay.h>
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
@@ -49,7 +48,9 @@
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-pm.h"
+#include "blk-cgroup.h"
#include "blk-throttle.h"
+#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
@@ -164,6 +165,7 @@ static const struct {
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
[BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+ [BLK_STS_OFFLINE] = { -ENODEV, "device offline" },
/* device mapper special case, should not leak out: */
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
@@ -284,13 +286,6 @@ void blk_queue_start_drain(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq);
}
-void blk_set_queue_dying(struct request_queue *q)
-{
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- blk_queue_start_drain(q);
-}
-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
-
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
@@ -308,7 +303,8 @@ void blk_cleanup_queue(struct request_queue *q)
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
- blk_set_queue_dying(q);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
@@ -320,6 +316,9 @@ void blk_cleanup_queue(struct request_queue *q)
*/
blk_freeze_queue(q);
+ /* cleanup rq qos structures for queue without disk */
+ rq_qos_exit(q);
+
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
@@ -341,8 +340,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_mq_sched_free_rqs(q);
mutex_unlock(&q->sysfs_lock);
- percpu_ref_exit(&q->q_usage_counter);
-
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
@@ -475,9 +472,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
-#ifdef CONFIG_BLK_CGROUP
- INIT_LIST_HEAD(&q->blkg_list);
-#endif
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -498,17 +492,12 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto fail_stats;
- if (blkcg_init_queue(q))
- goto fail_ref;
-
blk_queue_dma_alignment(q, 511);
blk_set_default_limits(&q->limits);
q->nr_requests = BLKDEV_DEFAULT_RQ;
return q;
-fail_ref:
- percpu_ref_exit(&q->q_usage_counter);
fail_stats:
blk_free_queue_stats(q->stats);
fail_split:
@@ -542,17 +531,6 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
-static void handle_bad_sector(struct bio *bio, sector_t maxsector)
-{
- char b[BDEVNAME_SIZE];
-
- pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%s: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio_devname(bio, b), bio->bi_opf,
- bio_end_sector(bio), maxsector);
-}
-
#ifdef CONFIG_FAIL_MAKE_REQUEST
static DECLARE_FAULT_ATTR(fail_make_request);
@@ -582,14 +560,10 @@ late_initcall(fail_make_request_debugfs);
static inline bool bio_check_ro(struct bio *bio)
{
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
- char b[BDEVNAME_SIZE];
-
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
-
- WARN_ONCE(1,
- "Trying to write to read-only block-device %s (partno %d)\n",
- bio_devname(bio, b), bio->bi_bdev->bd_partno);
+ pr_warn("Trying to write to read-only block-device %pg\n",
+ bio->bi_bdev);
/* Older lvm-tools actually trigger this */
return false;
}
@@ -618,7 +592,11 @@ static inline int bio_check_eod(struct bio *bio)
if (nr_sectors && maxsector &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
- handle_bad_sector(bio, maxsector);
+ pr_info_ratelimited("%s: attempt to access beyond end of device\n"
+ "%pg: rw=%d, want=%llu, limit=%llu\n",
+ current->comm,
+ bio->bi_bdev, bio->bi_opf,
+ bio_end_sector(bio), maxsector);
return -EIO;
}
return 0;
@@ -678,7 +656,123 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
-noinline_for_stack bool submit_bio_checks(struct bio *bio)
+static void __submit_bio(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+ if (unlikely(!blk_crypto_bio_prep(&bio)))
+ return;
+
+ if (!disk->fops->submit_bio) {
+ blk_mq_submit_bio(bio);
+ } else if (likely(bio_queue_enter(bio) == 0)) {
+ disk->fops->submit_bio(bio);
+ blk_queue_exit(disk->queue);
+ }
+}
+
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
+ *
+ * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ * that), so we have a list with a single bio.
+ * - We pretend that we have just taken it off a longer list, so we assign
+ * bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ * bio_list of new bios to be added. ->submit_bio() may indeed add some more
+ * bios through a recursive call to submit_bio_noacct. If it did, we find a
+ * non-NULL value in bio_list and re-enter the loop from the top.
+ * - In this case we really did just take the bio of the top of the list (no
+ * pretending) and so remove it from bio_list, and call into ->submit_bio()
+ * again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ * ->submit_bio_bio, but that haven't been processed yet.
+ */
+static void __submit_bio_noacct(struct bio *bio)
+{
+ struct bio_list bio_list_on_stack[2];
+
+ BUG_ON(bio->bi_next);
+
+ bio_list_init(&bio_list_on_stack[0]);
+ current->bio_list = bio_list_on_stack;
+
+ do {
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ struct bio_list lower, same;
+
+ /*
+ * Create a fresh bio_list for all subordinate requests.
+ */
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
+
+ __submit_bio(bio);
+
+ /*
+ * Sort new bios into those for a lower level and those for the
+ * same level.
+ */
+ bio_list_init(&lower);
+ bio_list_init(&same);
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+ if (q == bdev_get_queue(bio->bi_bdev))
+ bio_list_add(&same, bio);
+ else
+ bio_list_add(&lower, bio);
+
+ /*
+ * Now assemble so we handle the lowest level first.
+ */
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+ current->bio_list = NULL;
+}
+
+static void __submit_bio_noacct_mq(struct bio *bio)
+{
+ struct bio_list bio_list[2] = { };
+
+ current->bio_list = bio_list;
+
+ do {
+ __submit_bio(bio);
+ } while ((bio = bio_list_pop(&bio_list[0])));
+
+ current->bio_list = NULL;
+}
+
+void submit_bio_noacct_nocheck(struct bio *bio)
+{
+ /*
+ * We only want one ->submit_bio to be active at a time, else stack
+ * usage with stacked devices could be a problem. Use current->bio_list
+ * to collect a list of requests submited by a ->submit_bio method while
+ * it is active, and then process them after it returned.
+ */
+ if (current->bio_list)
+ bio_list_add(&current->bio_list[0], bio);
+ else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
+ __submit_bio_noacct_mq(bio);
+ else
+ __submit_bio_noacct(bio);
+}
+
+/**
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
+ * @bio: The bio describing the location in memory and on the device.
+ *
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
+ */
+void submit_bio_noacct(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -763,7 +857,7 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
}
if (blk_throtl_bio(bio))
- return false;
+ return;
blk_cgroup_bio_start(bio);
blkcg_bio_issue_init(bio);
@@ -775,138 +869,14 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
*/
bio_set_flag(bio, BIO_TRACE_COMPLETION);
}
- return true;
+ submit_bio_noacct_nocheck(bio);
+ return;
not_supported:
status = BLK_STS_NOTSUPP;
end_io:
bio->bi_status = status;
bio_endio(bio);
- return false;
-}
-
-static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
-{
- if (blk_crypto_bio_prep(&bio)) {
- if (likely(bio_queue_enter(bio) == 0)) {
- disk->fops->submit_bio(bio);
- blk_queue_exit(disk->queue);
- }
- }
-}
-
-static void __submit_bio(struct bio *bio)
-{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
-
- if (unlikely(!submit_bio_checks(bio)))
- return;
-
- if (!disk->fops->submit_bio)
- blk_mq_submit_bio(bio);
- else
- __submit_bio_fops(disk, bio);
-}
-
-/*
- * The loop in this function may be a bit non-obvious, and so deserves some
- * explanation:
- *
- * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
- * that), so we have a list with a single bio.
- * - We pretend that we have just taken it off a longer list, so we assign
- * bio_list to a pointer to the bio_list_on_stack, thus initialising the
- * bio_list of new bios to be added. ->submit_bio() may indeed add some more
- * bios through a recursive call to submit_bio_noacct. If it did, we find a
- * non-NULL value in bio_list and re-enter the loop from the top.
- * - In this case we really did just take the bio of the top of the list (no
- * pretending) and so remove it from bio_list, and call into ->submit_bio()
- * again.
- *
- * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
- * bio_list_on_stack[1] contains bios that were submitted before the current
- * ->submit_bio_bio, but that haven't been processed yet.
- */
-static void __submit_bio_noacct(struct bio *bio)
-{
- struct bio_list bio_list_on_stack[2];
-
- BUG_ON(bio->bi_next);
-
- bio_list_init(&bio_list_on_stack[0]);
- current->bio_list = bio_list_on_stack;
-
- do {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- struct bio_list lower, same;
-
- /*
- * Create a fresh bio_list for all subordinate requests.
- */
- bio_list_on_stack[1] = bio_list_on_stack[0];
- bio_list_init(&bio_list_on_stack[0]);
-
- __submit_bio(bio);
-
- /*
- * Sort new bios into those for a lower level and those for the
- * same level.
- */
- bio_list_init(&lower);
- bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
- if (q == bdev_get_queue(bio->bi_bdev))
- bio_list_add(&same, bio);
- else
- bio_list_add(&lower, bio);
-
- /*
- * Now assemble so we handle the lowest level first.
- */
- bio_list_merge(&bio_list_on_stack[0], &lower);
- bio_list_merge(&bio_list_on_stack[0], &same);
- bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
- } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
-
- current->bio_list = NULL;
-}
-
-static void __submit_bio_noacct_mq(struct bio *bio)
-{
- struct bio_list bio_list[2] = { };
-
- current->bio_list = bio_list;
-
- do {
- __submit_bio(bio);
- } while ((bio = bio_list_pop(&bio_list[0])));
-
- current->bio_list = NULL;
-}
-
-/**
- * submit_bio_noacct - re-submit a bio to the block device layer for I/O
- * @bio: The bio describing the location in memory and on the device.
- *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers. All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
- */
-void submit_bio_noacct(struct bio *bio)
-{
- /*
- * We only want one ->submit_bio to be active at a time, else stack
- * usage with stacked devices could be a problem. Use current->bio_list
- * to collect a list of requests submited by a ->submit_bio method while
- * it is active, and then process them after it returned.
- */
- if (current->bio_list)
- bio_list_add(&current->bio_list[0], bio);
- else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
- __submit_bio_noacct_mq(bio);
- else
- __submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio_noacct);
@@ -991,8 +961,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
- if (current->plug)
- blk_flush_plug(current->plug, false);
+ blk_flush_plug(current->plug, false);
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
@@ -1061,20 +1030,32 @@ again:
}
static unsigned long __part_start_io_acct(struct block_device *part,
- unsigned int sectors, unsigned int op)
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
- unsigned long now = READ_ONCE(jiffies);
part_stat_lock();
- update_io_ticks(part, now, false);
+ update_io_ticks(part, start_time, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_stat_local_inc(part, in_flight[op_is_write(op)]);
part_stat_unlock();
- return now;
+ return start_time;
+}
+
+/**
+ * bio_start_io_acct_time - start I/O accounting for bio based drivers
+ * @bio: bio to start account for
+ * @start_time: start time that should be passed back to bio_end_io_acct().
+ */
+void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
+{
+ __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), start_time);
}
+EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
/**
* bio_start_io_acct - start I/O accounting for bio based drivers
@@ -1084,14 +1065,15 @@ static unsigned long __part_start_io_acct(struct block_device *part,
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
+ return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
unsigned int op)
{
- return __part_start_io_acct(disk->part0, sectors, op);
+ return __part_start_io_acct(disk->part0, sectors, op, jiffies);
}
EXPORT_SYMBOL(disk_start_io_acct);
@@ -1261,7 +1243,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
}
EXPORT_SYMBOL(blk_check_plugged);
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
@@ -1290,7 +1272,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
void blk_finish_plug(struct blk_plug *plug)
{
if (plug == current->plug) {
- blk_flush_plug(plug, false);
+ __blk_flush_plug(plug, false);
current->plug = NULL;
}
}
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index c87aba8584c6..18c8eafe20b9 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -10,7 +10,6 @@
#define pr_fmt(fmt) "blk-crypto-fallback: " fmt
#include <crypto/skcipher.h>
-#include <linux/blk-cgroup.h>
#include <linux/blk-crypto.h>
#include <linux/blk-crypto-profile.h>
#include <linux/blkdev.h>
@@ -20,6 +19,7 @@
#include <linux/random.h>
#include <linux/scatterlist.h>
+#include "blk-cgroup.h"
#include "blk-crypto-internal.h"
static unsigned int num_prealloc_bounce_pg = 32;
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index 2fb0d65a464c..e6818ffaddbf 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -11,6 +11,7 @@
/* Represents a crypto mode supported by blk-crypto */
struct blk_crypto_mode {
+ const char *name; /* name of this mode, shown in sysfs */
const char *cipher_str; /* crypto API name (for fallback case) */
unsigned int keysize; /* key size in bytes */
unsigned int ivsize; /* iv size in bytes */
@@ -20,6 +21,10 @@ extern const struct blk_crypto_mode blk_crypto_modes[];
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+int blk_crypto_sysfs_register(struct request_queue *q);
+
+void blk_crypto_sysfs_unregister(struct request_queue *q);
+
void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
unsigned int inc);
@@ -62,6 +67,13 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+static inline int blk_crypto_sysfs_register(struct request_queue *q)
+{
+ return 0;
+}
+
+static inline void blk_crypto_sysfs_unregister(struct request_queue *q) { }
+
static inline bool bio_crypt_rq_ctx_compatible(struct request *rq,
struct bio *bio)
{
diff --git a/block/blk-crypto-sysfs.c b/block/blk-crypto-sysfs.c
new file mode 100644
index 000000000000..fd93bd2f33b7
--- /dev/null
+++ b/block/blk-crypto-sysfs.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2021 Google LLC
+ *
+ * sysfs support for blk-crypto. This file contains the code which exports the
+ * crypto capabilities of devices via /sys/block/$disk/queue/crypto/.
+ */
+
+#include <linux/blk-crypto-profile.h>
+
+#include "blk-crypto-internal.h"
+
+struct blk_crypto_kobj {
+ struct kobject kobj;
+ struct blk_crypto_profile *profile;
+};
+
+struct blk_crypto_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_crypto_profile *profile,
+ struct blk_crypto_attr *attr, char *page);
+};
+
+static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj)
+{
+ return container_of(kobj, struct blk_crypto_kobj, kobj)->profile;
+}
+
+static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr)
+{
+ return container_of(attr, struct blk_crypto_attr, attr);
+}
+
+static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile,
+ struct blk_crypto_attr *attr, char *page)
+{
+ return sysfs_emit(page, "%u\n", 8 * profile->max_dun_bytes_supported);
+}
+
+static ssize_t num_keyslots_show(struct blk_crypto_profile *profile,
+ struct blk_crypto_attr *attr, char *page)
+{
+ return sysfs_emit(page, "%u\n", profile->num_slots);
+}
+
+#define BLK_CRYPTO_RO_ATTR(_name) \
+ static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
+
+BLK_CRYPTO_RO_ATTR(max_dun_bits);
+BLK_CRYPTO_RO_ATTR(num_keyslots);
+
+static struct attribute *blk_crypto_attrs[] = {
+ &max_dun_bits_attr.attr,
+ &num_keyslots_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group blk_crypto_attr_group = {
+ .attrs = blk_crypto_attrs,
+};
+
+/*
+ * The encryption mode attributes. To avoid hard-coding the list of encryption
+ * modes, these are initialized at boot time by blk_crypto_sysfs_init().
+ */
+static struct blk_crypto_attr __blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX];
+static struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1];
+
+static umode_t blk_crypto_mode_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
+ struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
+ int mode_num = a - __blk_crypto_mode_attrs;
+
+ if (profile->modes_supported[mode_num])
+ return 0444;
+ return 0;
+}
+
+static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile,
+ struct blk_crypto_attr *attr, char *page)
+{
+ int mode_num = attr - __blk_crypto_mode_attrs;
+
+ return sysfs_emit(page, "0x%x\n", profile->modes_supported[mode_num]);
+}
+
+static const struct attribute_group blk_crypto_modes_attr_group = {
+ .name = "modes",
+ .attrs = blk_crypto_mode_attrs,
+ .is_visible = blk_crypto_mode_is_visible,
+};
+
+static const struct attribute_group *blk_crypto_attr_groups[] = {
+ &blk_crypto_attr_group,
+ &blk_crypto_modes_attr_group,
+ NULL,
+};
+
+static ssize_t blk_crypto_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *page)
+{
+ struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
+ struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
+
+ return a->show(profile, a, page);
+}
+
+static const struct sysfs_ops blk_crypto_attr_ops = {
+ .show = blk_crypto_attr_show,
+};
+
+static void blk_crypto_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct blk_crypto_kobj, kobj));
+}
+
+static struct kobj_type blk_crypto_ktype = {
+ .default_groups = blk_crypto_attr_groups,
+ .sysfs_ops = &blk_crypto_attr_ops,
+ .release = blk_crypto_release,
+};
+
+/*
+ * If the request_queue has a blk_crypto_profile, create the "crypto"
+ * subdirectory in sysfs (/sys/block/$disk/queue/crypto/).
+ */
+int blk_crypto_sysfs_register(struct request_queue *q)
+{
+ struct blk_crypto_kobj *obj;
+ int err;
+
+ if (!q->crypto_profile)
+ return 0;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+ obj->profile = q->crypto_profile;
+
+ err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype, &q->kobj,
+ "crypto");
+ if (err) {
+ kobject_put(&obj->kobj);
+ return err;
+ }
+ q->crypto_kobject = &obj->kobj;
+ return 0;
+}
+
+void blk_crypto_sysfs_unregister(struct request_queue *q)
+{
+ kobject_put(q->crypto_kobject);
+}
+
+static int __init blk_crypto_sysfs_init(void)
+{
+ int i;
+
+ BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
+ for (i = 1; i < BLK_ENCRYPTION_MODE_MAX; i++) {
+ struct blk_crypto_attr *attr = &__blk_crypto_mode_attrs[i];
+
+ attr->attr.name = blk_crypto_modes[i].name;
+ attr->attr.mode = 0444;
+ attr->show = blk_crypto_mode_show;
+ blk_crypto_mode_attrs[i - 1] = &attr->attr;
+ }
+ return 0;
+}
+subsys_initcall(blk_crypto_sysfs_init);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index ec9efeeeca91..a496aaef85ba 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -19,16 +19,19 @@
const struct blk_crypto_mode blk_crypto_modes[] = {
[BLK_ENCRYPTION_MODE_AES_256_XTS] = {
+ .name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
.ivsize = 16,
},
[BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = {
+ .name = "AES-128-CBC-ESSIV",
.cipher_str = "essiv(cbc(aes),sha256)",
.keysize = 16,
.ivsize = 16,
},
[BLK_ENCRYPTION_MODE_ADIANTUM] = {
+ .name = "Adiantum",
.cipher_str = "adiantum(xchacha12,aes)",
.keysize = 32,
.ivsize = 32,
@@ -111,7 +114,6 @@ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
*dst->bi_crypt_context = *src->bi_crypt_context;
return 0;
}
-EXPORT_SYMBOL_GPL(__bio_crypt_clone);
/* Increments @dun by @inc, treating @dun as a multi-limb integer. */
void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e4df894189ce..c68968724870 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -460,9 +460,7 @@ int blkdev_issue_flush(struct block_device *bdev)
{
struct bio bio;
- bio_init(&bio, NULL, 0);
- bio_set_dev(&bio, bdev);
- bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH);
return submit_bio_wait(&bio);
}
EXPORT_SYMBOL(blkdev_issue_flush);
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index b925f3db3ab7..18c68d8b9138 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
&q->kobj, "%s", "independent_access_ranges");
if (ret) {
q->ia_ranges = NULL;
- kfree(iars);
+ kobject_put(&iars->kobj);
return ret;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 769b64394298..70a0a3d680a3 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -178,12 +178,12 @@
#include <linux/time64.h>
#include <linux/parser.h>
#include <linux/sched/signal.h>
-#include <linux/blk-cgroup.h>
#include <asm/local.h>
#include <asm/local64.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
#include "blk-wbt.h"
+#include "blk-cgroup.h"
#ifdef CONFIG_TRACEPOINTS
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 6593c7123b97..2f33932e72e3 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -74,9 +74,9 @@
#include <linux/sched/signal.h>
#include <trace/events/block.h>
#include <linux/blk-mq.h>
-#include <linux/blk-cgroup.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
+#include "blk-cgroup.h"
#include "blk.h"
#define DEFAULT_SCALE_COOKIE 1000000U
@@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
int inflight = 0;
blkg = bio->bi_blkg;
- if (!blkg || !bio_flagged(bio, BIO_TRACKED))
+ if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
return;
iolat = blkg_to_lat(bio->bi_blkg);
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 2e7f10e1c03f..79e797f5d194 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -12,11 +12,11 @@
* Documentation/admin-guide/cgroup-v2.rst.
*/
-#include <linux/blk-cgroup.h>
#include <linux/blk-mq.h>
#include <linux/blk_types.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-rq-qos.h"
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f09beadcbe3..fc6ea52e7482 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,19 +10,6 @@
#include "blk.h"
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
-{
- struct bio *new = bio_alloc(gfp, nr_pages);
-
- if (bio) {
- bio_chain(bio, new);
- submit_bio(bio);
- }
-
- return new;
-}
-EXPORT_SYMBOL_GPL(blk_next_bio);
-
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, int flags,
struct bio **biop)
@@ -32,9 +19,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
unsigned int op;
sector_t bs_mask, part_offset = 0;
- if (!q)
- return -ENXIO;
-
if (bdev_read_only(bdev))
return -EPERM;
@@ -95,11 +79,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
- bio = blk_next_bio(bio, 0, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, op, 0);
-
bio->bi_iter.bi_size = req_sects << 9;
sector += req_sects;
nr_sects -= req_sects;
@@ -172,9 +153,6 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
struct bio *bio = *biop;
sector_t bs_mask;
- if (!q)
- return -ENXIO;
-
if (bdev_read_only(bdev))
return -EPERM;
@@ -189,14 +167,12 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
max_write_same_sectors = bio_allowed_max_sectors(q);
while (nr_sects) {
- bio = blk_next_bio(bio, 1, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 1, REQ_OP_WRITE_SAME, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
if (nr_sects > max_write_same_sectors) {
bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -250,10 +226,6 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
{
struct bio *bio = *biop;
unsigned int max_write_zeroes_sectors;
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (!q)
- return -ENXIO;
if (bdev_read_only(bdev))
return -EPERM;
@@ -265,10 +237,8 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
return -EOPNOTSUPP;
while (nr_sects) {
- bio = blk_next_bio(bio, 0, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio->bi_opf = REQ_OP_WRITE_ZEROES;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
@@ -304,23 +274,17 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
struct bio **biop)
{
- struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
int bi_size = 0;
unsigned int sz;
- if (!q)
- return -ENXIO;
-
if (bdev_read_only(bdev))
return -EPERM;
while (nr_sects != 0) {
- bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
- gfp_mask);
+ bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
+ REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
diff --git a/block/blk-map.c b/block/blk-map.c
index 4526adde0156..c7f71d83eff1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -446,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (bytes > len)
bytes = len;
- page = alloc_page(GFP_NOIO | gfp_mask);
+ page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
if (!page)
goto cleanup;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4de34a332c9f..ea6968313b4a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,6 +9,7 @@
#include <linux/blk-integrity.h>
#include <linux/scatterlist.h>
#include <linux/part_stat.h>
+#include <linux/blk-cgroup.h>
#include <trace/events/block.h>
@@ -368,8 +369,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
*bio = split;
-
- blk_throtl_charge_bio_split(*bio);
}
}
@@ -600,6 +599,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
unsigned int nr_phys_segs)
{
+ if (!blk_cgroup_mergeable(req, bio))
+ goto no_merge;
+
if (blk_integrity_merge_bio(req->q, req, bio) == false)
goto no_merge;
@@ -696,6 +698,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
if (total_phys_segments > blk_rq_get_max_segments(req))
return 0;
+ if (!blk_cgroup_mergeable(req, next->bio))
+ return 0;
+
if (blk_integrity_merge_rq(q, req, next) == false)
return 0;
@@ -904,6 +909,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_data_dir(bio) != rq_data_dir(rq))
return false;
+ /* don't merge across cgroup boundaries */
+ if (!blk_cgroup_mergeable(rq, bio))
+ return false;
+
/* only merge integrity protected bio into ditto rq */
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
return false;
@@ -1089,12 +1098,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
if (!plug || rq_list_empty(plug->mq_list))
return false;
- /* check the previously added entry for a quick merge attempt */
- rq = rq_list_peek(&plug->mq_list);
- if (rq->q == q) {
- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
- BIO_MERGE_OK)
- return true;
+ rq_list_for_each(&plug->mq_list, rq) {
+ if (rq->q == q) {
+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK)
+ return true;
+ break;
+ }
+
+ /*
+ * Only keep iterating plug list for merges if we have multiple
+ * queues
+ */
+ if (!plug->multiple_queues)
+ break;
}
return false;
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3a790eb4995c..e2880f6deb34 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -707,7 +707,7 @@ static void debugfs_create_files(struct dentry *parent, void *data,
void blk_mq_debugfs_register(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
@@ -780,7 +780,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_hctx(q, hctx);
@@ -789,7 +789,7 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q)
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_unregister_hctx(hctx);
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index a68aa6041a10..69918f4170d6 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -6,6 +6,8 @@
#include <linux/seq_file.h>
+struct blk_mq_hw_ctx;
+
struct blk_mq_debugfs_attr {
const char *name;
umode_t mode;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 55488ba97823..9e56a69422b6 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -180,11 +180,18 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
+ unsigned long end = jiffies + HZ;
int ret;
do {
ret = __blk_mq_do_dispatch_sched(hctx);
- } while (ret == 1);
+ if (ret != 1)
+ break;
+ if (need_resched() || time_is_before_jiffies(end)) {
+ blk_mq_delay_run_hw_queue(hctx, 0);
+ break;
+ }
+ } while (1);
return ret;
}
@@ -515,7 +522,7 @@ static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags) {
@@ -550,9 +557,10 @@ static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
- unsigned int i, flags = q->tag_set->flags;
+ unsigned int flags = q->tag_set->flags;
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
+ unsigned long i;
int ret;
if (!e) {
@@ -618,7 +626,7 @@ err_free_map_and_rqs:
void blk_mq_sched_free_rqs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
@@ -635,7 +643,7 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{
struct blk_mq_hw_ctx *hctx;
- unsigned int i;
+ unsigned long i;
unsigned int flags = 0;
queue_for_each_hw_ctx(q, hctx, i) {
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 674786574075..c08426975856 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -206,7 +206,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
lockdep_assert_held(&q->sysfs_dir_lock);
@@ -255,7 +255,8 @@ void blk_mq_sysfs_init(struct request_queue *q)
int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int ret, i;
+ unsigned long i, j;
+ int ret;
WARN_ON_ONCE(!q->kobj.parent);
lockdep_assert_held(&q->sysfs_dir_lock);
@@ -278,8 +279,10 @@ out:
return ret;
unreg:
- while (--i >= 0)
- blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
+ queue_for_each_hw_ctx(q, hctx, j) {
+ if (j < i)
+ blk_mq_unregister_hctx(hctx);
+ }
kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
kobject_del(q->mq_kobj);
@@ -290,7 +293,7 @@ unreg:
void blk_mq_sysfs_unregister(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
@@ -306,7 +309,8 @@ unlock:
int blk_mq_sysfs_register(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i, ret = 0;
+ unsigned long i;
+ int ret = 0;
mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 845f74e8dd7b..68ac23d0b640 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -107,7 +107,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
return BLK_MQ_NO_TAG;
if (data->shallow_depth)
- return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
+ return sbitmap_queue_get_shallow(bt, data->shallow_depth);
else
return __sbitmap_queue_get(bt);
}
@@ -498,7 +498,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
void *priv)
{
/*
- * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
+ * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
* while the queue is frozen. So we can use q_usage_counter to avoid
* racing with it.
*/
@@ -515,7 +515,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
bt_for_each(NULL, q, btags, fn, priv, false);
} else {
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f3bf3358a3bb..8e659dc5fcf3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -71,7 +71,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
blk_qc_t qc)
{
- return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT];
+ return xa_load(&q->hctx_table,
+ (qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
}
static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
@@ -312,7 +313,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- unsigned int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hw_queue_mapped(hctx))
@@ -573,7 +574,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
* If not tell the caller that it should skip this queue.
*/
ret = -EXDEV;
- data.hctx = q->queue_hw_ctx[hctx_idx];
+ data.hctx = xa_load(&q->hctx_table, hctx_idx);
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
@@ -736,6 +737,10 @@ static void blk_complete_request(struct request *req)
/* Completion has already been traced */
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+
+ if (req_op(req) == REQ_OP_ZONE_APPEND)
+ bio->bi_iter.bi_sector = req->__sector;
+
if (!is_flush)
bio_endio(bio);
bio = next;
@@ -789,8 +794,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
#endif
if (unlikely(error && !blk_rq_is_passthrough(req) &&
- !(req->rq_flags & RQF_QUIET)))
+ !(req->rq_flags & RQF_QUIET))) {
blk_print_req_error(req, error);
+ trace_block_rq_error(req, error, nr_bytes);
+ }
blk_account_io_completion(req, nr_bytes);
@@ -881,10 +888,15 @@ static inline void blk_account_io_done(struct request *req, u64 now)
static void __blk_account_io_start(struct request *rq)
{
- /* passthrough requests can hold bios that do not have ->bi_bdev set */
- if (rq->bio && rq->bio->bi_bdev)
+ /*
+ * All non-passthrough requests are created from a bio with one
+ * exception: when a flush command that is part of a flush sequence
+ * generated by the state machine in blk-flush.c is cloned onto the
+ * lower device by dm-multipath we can get here without a bio.
+ */
+ if (rq->bio)
rq->part = rq->bio->bi_bdev;
- else if (rq->q->disk)
+ else
rq->part = rq->q->disk->part0;
part_stat_lock();
@@ -1440,7 +1452,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
container_of(work, struct request_queue, timeout_work);
unsigned long next = 0;
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
/* A deadlock might occur if a request is stuck requiring a
* timeout at the same time a queue freeze is waiting
@@ -2141,7 +2153,7 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
{
struct blk_mq_hw_ctx *hctx, *sq_hctx;
- int i;
+ unsigned long i;
sq_hctx = NULL;
if (blk_mq_has_sqsched(q))
@@ -2169,7 +2181,7 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues);
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
{
struct blk_mq_hw_ctx *hctx, *sq_hctx;
- int i;
+ unsigned long i;
sq_hctx = NULL;
if (blk_mq_has_sqsched(q))
@@ -2178,6 +2190,14 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
if (blk_mq_hctx_stopped(hctx))
continue;
/*
+ * If there is already a run_work pending, leave the
+ * pending delay untouched. Otherwise, a hctx can stall
+ * if another hctx is re-delaying the other's work
+ * before the work executes.
+ */
+ if (delayed_work_pending(&hctx->run_work))
+ continue;
+ /*
* Dispatch from this hctx either if there's no hctx preferred
* by IO scheduler or if it has requests that bypass the
* scheduler.
@@ -2199,7 +2219,7 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
bool blk_mq_queue_stopped(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hctx_stopped(hctx))
@@ -2238,7 +2258,7 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queue);
void blk_mq_stop_hw_queues(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_stop_hw_queue(hctx);
@@ -2256,7 +2276,7 @@ EXPORT_SYMBOL(blk_mq_start_hw_queue);
void blk_mq_start_hw_queues(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_start_hw_queue(hctx);
@@ -2276,7 +2296,7 @@ EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_start_stopped_hw_queue(hctx, async);
@@ -2557,13 +2577,36 @@ static void __blk_mq_flush_plug_list(struct request_queue *q,
q->mq_ops->queue_rqs(&plug->mq_list);
}
+static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
+{
+ struct blk_mq_hw_ctx *this_hctx = NULL;
+ struct blk_mq_ctx *this_ctx = NULL;
+ struct request *requeue_list = NULL;
+ unsigned int depth = 0;
+ LIST_HEAD(list);
+
+ do {
+ struct request *rq = rq_list_pop(&plug->mq_list);
+
+ if (!this_hctx) {
+ this_hctx = rq->mq_hctx;
+ this_ctx = rq->mq_ctx;
+ } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+ rq_list_add(&requeue_list, rq);
+ continue;
+ }
+ list_add_tail(&rq->queuelist, &list);
+ depth++;
+ } while (!rq_list_empty(plug->mq_list));
+
+ plug->mq_list = requeue_list;
+ trace_block_unplug(this_hctx->queue, depth, !from_sched);
+ blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched);
+}
+
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct blk_mq_hw_ctx *this_hctx;
- struct blk_mq_ctx *this_ctx;
struct request *rq;
- unsigned int depth;
- LIST_HEAD(list);
if (rq_list_empty(plug->mq_list))
return;
@@ -2599,35 +2642,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
return;
}
- this_hctx = NULL;
- this_ctx = NULL;
- depth = 0;
do {
- rq = rq_list_pop(&plug->mq_list);
-
- if (!this_hctx) {
- this_hctx = rq->mq_hctx;
- this_ctx = rq->mq_ctx;
- } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
- trace_block_unplug(this_hctx->queue, depth,
- !from_schedule);
- blk_mq_sched_insert_requests(this_hctx, this_ctx,
- &list, from_schedule);
- depth = 0;
- this_hctx = rq->mq_hctx;
- this_ctx = rq->mq_ctx;
-
- }
-
- list_add(&rq->queuelist, &list);
- depth++;
+ blk_mq_dispatch_plug_list(plug, from_schedule);
} while (!rq_list_empty(plug->mq_list));
-
- if (!list_empty(&list)) {
- trace_block_unplug(this_hctx->queue, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
- from_schedule);
- }
}
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
@@ -2714,7 +2731,8 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
static struct request *blk_mq_get_new_requests(struct request_queue *q,
struct blk_plug *plug,
- struct bio *bio)
+ struct bio *bio,
+ unsigned int nsegs)
{
struct blk_mq_alloc_data data = {
.q = q,
@@ -2726,6 +2744,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
if (unlikely(bio_queue_enter(bio)))
return NULL;
+ if (blk_mq_attempt_bio_merge(q, bio, nsegs))
+ goto queue_exit;
+
+ rq_qos_throttle(q, bio);
+
if (plug) {
data.nr_tags = plug->nr_ios;
plug->nr_ios = 1;
@@ -2738,12 +2761,13 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
+queue_exit:
blk_queue_exit(q);
return NULL;
}
static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
- struct blk_plug *plug, struct bio *bio)
+ struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
{
struct request *rq;
@@ -2753,12 +2777,19 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
if (!rq || rq->q != q)
return NULL;
- if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+ if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
+ *bio = NULL;
+ return NULL;
+ }
+
+ rq_qos_throttle(q, *bio);
+
+ if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
return NULL;
- if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+ if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
return NULL;
- rq->cmd_flags = bio->bi_opf;
+ rq->cmd_flags = (*bio)->bi_opf;
plug->cached_rq = rq_list_next(rq);
INIT_LIST_HEAD(&rq->queuelist);
return rq;
@@ -2786,9 +2817,6 @@ void blk_mq_submit_bio(struct bio *bio)
unsigned int nr_segs = 1;
blk_status_t ret;
- if (unlikely(!blk_crypto_bio_prep(&bio)))
- return;
-
blk_queue_bounce(q, &bio);
if (blk_may_split(q, bio))
__blk_queue_split(q, &bio, &nr_segs);
@@ -2796,14 +2824,11 @@ void blk_mq_submit_bio(struct bio *bio)
if (!bio_integrity_prep(bio))
return;
- if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
- return;
-
- rq_qos_throttle(q, bio);
-
- rq = blk_mq_get_cached_request(q, plug, bio);
+ rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
if (!rq) {
- rq = blk_mq_get_new_requests(q, plug, bio);
+ if (!bio)
+ return;
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
if (unlikely(!rq))
return;
}
@@ -2838,27 +2863,16 @@ void blk_mq_submit_bio(struct bio *bio)
blk_mq_try_issue_directly(rq->mq_hctx, rq));
}
+#ifdef CONFIG_BLK_MQ_STACKING
/**
- * blk_cloned_rq_check_limits - Helper function to check a cloned request
- * for the new queue limits
- * @q: the queue
- * @rq: the request being checked
- *
- * Description:
- * @rq may have been made based on weaker limitations of upper-level queues
- * in request stacking drivers, and it may violate the limitation of @q.
- * Since the block layer and the underlying device driver trust @rq
- * after it is inserted to @q, it should be checked against @q before
- * the insertion using this generic function.
- *
- * Request stacking drivers like request-based dm may change the queue
- * limits when retrying requests on other queues. Those requests need
- * to be checked against the new queue limits again during dispatch.
+ * blk_insert_cloned_request - Helper for stacking drivers to submit a request
+ * @rq: the request being queued
*/
-static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
- struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request *rq)
{
+ struct request_queue *q = rq->q;
unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+ blk_status_t ret;
if (blk_rq_sectors(rq) > max_sectors) {
/*
@@ -2890,24 +2904,7 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
return BLK_STS_IOERR;
}
- return BLK_STS_OK;
-}
-
-/**
- * blk_insert_cloned_request - Helper for stacking drivers to submit a request
- * @q: the queue to submit the request
- * @rq: the request being queued
- */
-blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
-{
- blk_status_t ret;
-
- ret = blk_cloned_rq_check_limits(q, rq);
- if (ret != BLK_STS_OK)
- return ret;
-
- if (rq->q->disk &&
- should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq)))
+ if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
return BLK_STS_IOERR;
if (blk_crypto_insert_cloned_request(rq))
@@ -2920,8 +2917,10 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
- blk_mq_run_dispatch_ops(rq->q,
+ blk_mq_run_dispatch_ops(q,
ret = blk_mq_request_issue_directly(rq, true));
+ if (ret)
+ blk_account_io_done(rq, ktime_get_ns());
return ret;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -2973,10 +2972,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
bs = &fs_bio_set;
__rq_for_each_bio(bio_src, rq_src) {
- bio = bio_clone_fast(bio_src, gfp_mask, bs);
+ bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
+ bs);
if (!bio)
goto free_and_out;
- bio->bi_bdev = rq->q->disk->part0;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@@ -3013,6 +3012,7 @@ free_and_out:
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+#endif /* CONFIG_BLK_MQ_STACKING */
/*
* Steal bios from a request and add them to a bio list.
@@ -3083,6 +3083,9 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
struct blk_mq_tags *drv_tags;
struct page *page;
+ if (list_empty(&tags->page_list))
+ return;
+
if (blk_mq_is_shared_tags(set->flags))
drv_tags = set->shared_tags;
else
@@ -3125,15 +3128,41 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags)
blk_mq_free_tags(tags);
}
+static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set,
+ unsigned int hctx_idx)
+{
+ int i;
+
+ for (i = 0; i < set->nr_maps; i++) {
+ unsigned int start = set->map[i].queue_offset;
+ unsigned int end = start + set->map[i].nr_queues;
+
+ if (hctx_idx >= start && hctx_idx < end)
+ break;
+ }
+
+ if (i >= set->nr_maps)
+ i = HCTX_TYPE_DEFAULT;
+
+ return i;
+}
+
+static int blk_mq_get_hctx_node(struct blk_mq_tag_set *set,
+ unsigned int hctx_idx)
+{
+ enum hctx_type type = hctx_idx_to_type(set, hctx_idx);
+
+ return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);
+}
+
static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,
unsigned int nr_tags,
unsigned int reserved_tags)
{
+ int node = blk_mq_get_hctx_node(set, hctx_idx);
struct blk_mq_tags *tags;
- int node;
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -3182,10 +3211,9 @@ static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
unsigned int hctx_idx, unsigned int depth)
{
unsigned int i, j, entries_per_page, max_order = 4;
+ int node = blk_mq_get_hctx_node(set, hctx_idx);
size_t rq_size, left;
- int node;
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
if (node == NUMA_NO_NODE)
node = set->numa_node;
@@ -3430,6 +3458,8 @@ static void blk_mq_exit_hctx(struct request_queue *q,
blk_mq_remove_cpuhp(hctx);
+ xa_erase(&q->hctx_table, hctx_idx);
+
spin_lock(&q->unused_hctx_lock);
list_add(&hctx->hctx_list, &q->unused_hctx_list);
spin_unlock(&q->unused_hctx_lock);
@@ -3439,12 +3469,11 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
struct blk_mq_tag_set *set, int nr_queue)
{
struct blk_mq_hw_ctx *hctx;
- unsigned int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i) {
if (i == nr_queue)
break;
- blk_mq_debugfs_unregister_hctx(hctx);
blk_mq_exit_hctx(q, set, hctx, i);
}
}
@@ -3469,8 +3498,15 @@ static int blk_mq_init_hctx(struct request_queue *q,
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
hctx->numa_node))
goto exit_hctx;
+
+ if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
+ goto exit_flush_rq;
+
return 0;
+ exit_flush_rq:
+ if (set->ops->exit_request)
+ set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
@@ -3630,7 +3666,8 @@ static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
static void blk_mq_map_swqueue(struct request_queue *q)
{
- unsigned int i, j, hctx_idx;
+ unsigned int j, hctx_idx;
+ unsigned long i;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
@@ -3737,7 +3774,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i) {
if (shared) {
@@ -3837,7 +3874,7 @@ static int blk_mq_alloc_ctxs(struct request_queue *q)
void blk_mq_release(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx, *next;
- int i;
+ unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
@@ -3848,7 +3885,7 @@ void blk_mq_release(struct request_queue *q)
kobject_put(&hctx->kobj);
}
- kfree(q->queue_hw_ctx);
+ xa_destroy(&q->hctx_table);
/*
* release .mq_kobj and sw queue's kobject now because
@@ -3937,52 +3974,28 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
- int i, j, end;
- struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
-
- if (q->nr_hw_queues < set->nr_hw_queues) {
- struct blk_mq_hw_ctx **new_hctxs;
-
- new_hctxs = kcalloc_node(set->nr_hw_queues,
- sizeof(*new_hctxs), GFP_KERNEL,
- set->numa_node);
- if (!new_hctxs)
- return;
- if (hctxs)
- memcpy(new_hctxs, hctxs, q->nr_hw_queues *
- sizeof(*hctxs));
- q->queue_hw_ctx = new_hctxs;
- kfree(hctxs);
- hctxs = new_hctxs;
- }
+ struct blk_mq_hw_ctx *hctx;
+ unsigned long i, j;
/* protect against switching io scheduler */
mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
- int node;
- struct blk_mq_hw_ctx *hctx;
+ int old_node;
+ int node = blk_mq_get_hctx_node(set, i);
+ struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);
- node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
- /*
- * If the hw queue has been mapped to another numa node,
- * we need to realloc the hctx. If allocation fails, fallback
- * to use the previous one.
- */
- if (hctxs[i] && (hctxs[i]->numa_node == node))
- continue;
+ if (old_hctx) {
+ old_node = old_hctx->numa_node;
+ blk_mq_exit_hctx(q, set, old_hctx, i);
+ }
- hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
- if (hctx) {
- if (hctxs[i])
- blk_mq_exit_hctx(q, set, hctxs[i], i);
- hctxs[i] = hctx;
- } else {
- if (hctxs[i])
- pr_warn("Allocate new hctx on node %d fails,\
- fallback to previous one on node %d\n",
- node, hctxs[i]->numa_node);
- else
+ if (!blk_mq_alloc_and_init_hctx(set, q, i, node)) {
+ if (!old_hctx)
break;
+ pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n",
+ node, old_node);
+ hctx = blk_mq_alloc_and_init_hctx(set, q, i, old_node);
+ WARN_ON_ONCE(!hctx);
}
}
/*
@@ -3991,24 +4004,27 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
*/
if (i != set->nr_hw_queues) {
j = q->nr_hw_queues;
- end = i;
} else {
j = i;
- end = q->nr_hw_queues;
q->nr_hw_queues = set->nr_hw_queues;
}
- for (; j < end; j++) {
- struct blk_mq_hw_ctx *hctx = hctxs[j];
-
- if (hctx) {
- blk_mq_exit_hctx(q, set, hctx, j);
- hctxs[j] = NULL;
- }
- }
+ xa_for_each_start(&q->hctx_table, j, hctx, j)
+ blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->sysfs_lock);
}
+static void blk_mq_update_poll_flag(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+
+ if (set->nr_maps > HCTX_TYPE_POLL &&
+ set->map[HCTX_TYPE_POLL].nr_queues)
+ blk_queue_flag_set(QUEUE_FLAG_POLL, q);
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+}
+
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q)
{
@@ -4033,6 +4049,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);
+ xa_init(&q->hctx_table);
+
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
@@ -4043,9 +4061,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
- if (set->nr_maps > HCTX_TYPE_POLL &&
- set->map[HCTX_TYPE_POLL].nr_queues)
- blk_queue_flag_set(QUEUE_FLAG_POLL, q);
+ blk_mq_update_poll_flag(q);
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
INIT_LIST_HEAD(&q->requeue_list);
@@ -4064,7 +4080,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return 0;
err_hctxs:
- kfree(q->queue_hw_ctx);
+ xa_destroy(&q->hctx_table);
q->nr_hw_queues = 0;
blk_mq_sysfs_deinit(q);
err_poll:
@@ -4352,7 +4368,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
- int i, ret;
+ int ret;
+ unsigned long i;
if (!set)
return -EINVAL;
@@ -4511,6 +4528,7 @@ fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
+ blk_mq_update_poll_flag(q);
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
@@ -4727,7 +4745,7 @@ void blk_mq_cancel_work_sync(struct request_queue *q)
{
if (queue_is_mq(q)) {
struct blk_mq_hw_ctx *hctx;
- int i;
+ unsigned long i;
cancel_delayed_work_sync(&q->requeue_work);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 948791ea2a3e..2615bd58bad3 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -83,7 +83,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
enum hctx_type type,
unsigned int cpu)
{
- return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
+ return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
}
static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 3cfbc8668cba..68267007da1c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
__rq_qos_requeue(q->rq_qos, rq);
}
-static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
+static inline void rq_qos_done_bio(struct bio *bio)
{
- if (q->rq_qos)
- __rq_qos_done_bio(q->rq_qos, bio);
+ if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
+ bio_flagged(bio, BIO_QOS_MERGED))) {
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ if (q->rq_qos)
+ __rq_qos_done_bio(q->rq_qos, bio);
+ }
}
static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
{
- /*
- * BIO_TRACKED lets controllers know that a bio went through the
- * normal rq_qos path.
- */
if (q->rq_qos) {
- bio_set_flag(bio, BIO_TRACKED);
+ bio_set_flag(bio, BIO_QOS_THROTTLED);
__rq_qos_throttle(q->rq_qos, bio);
}
}
@@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (q->rq_qos)
+ if (q->rq_qos) {
+ bio_set_flag(bio, BIO_QOS_MERGED);
__rq_qos_merge(q->rq_qos, rq, bio);
+ }
}
static inline void rq_qos_queue_depth_changed(struct request_queue *q)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9f32882ceb2f..85c4ba006671 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -10,7 +10,6 @@
#include <linux/backing-dev.h>
#include <linux/blktrace_api.h>
#include <linux/blk-mq.h>
-#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include "blk.h"
@@ -18,6 +17,7 @@
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h"
+#include "blk-cgroup.h"
#include "blk-throttle.h"
struct queue_sysfs_entry {
@@ -739,27 +739,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
}
-/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
-static void blk_exit_queue(struct request_queue *q)
-{
- /*
- * Since the I/O scheduler exit code may access cgroup information,
- * perform I/O scheduler exit before disassociating from the block
- * cgroup controller.
- */
- if (q->elevator) {
- ioc_clear_queue(q);
- elevator_exit(q);
- }
-
- /*
- * Remove all references to @q from the block cgroup controller before
- * restoring @q->queue_lock to avoid that restoring this pointer causes
- * e.g. blkcg_print_blkgs() to crash.
- */
- blkcg_exit_queue(q);
-}
-
/**
* blk_release_queue - releases all allocated resources of the request_queue
* @kobj: pointer to a kobject, whose container is a request_queue
@@ -787,12 +766,12 @@ static void blk_release_queue(struct kobject *kobj)
might_sleep();
+ percpu_ref_exit(&q->q_usage_counter);
+
if (q->poll_stat)
blk_stat_remove_callback(q, q->poll_cb);
blk_stat_free_callback(q->poll_cb);
- blk_exit_queue(q);
-
blk_free_queue_stats(q->stats);
kfree(q->poll_stat);
@@ -880,6 +859,10 @@ int blk_register_queue(struct gendisk *disk)
goto put_dev;
}
+ ret = blk_crypto_sysfs_register(q);
+ if (ret)
+ goto put_dev;
+
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
wbt_enable_default(q);
blk_throtl_register_queue(q);
@@ -910,6 +893,7 @@ unlock:
return ret;
put_dev:
+ elv_unregister_queue(q);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock);
@@ -954,16 +938,18 @@ void blk_unregister_queue(struct gendisk *disk)
*/
if (queue_is_mq(q))
blk_mq_unregister_dev(disk_to_dev(disk), q);
-
- kobject_uevent(&q->kobj, KOBJ_REMOVE);
- kobject_del(&q->kobj);
+ blk_crypto_sysfs_unregister(q);
blk_trace_remove_sysfs(disk_to_dev(disk));
mutex_lock(&q->sysfs_lock);
- if (q->elevator)
- elv_unregister_queue(q);
+ elv_unregister_queue(q);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
+
+ /* Now that we've deleted all child objects, we can delete the queue. */
+ kobject_uevent(&q->kobj, KOBJ_REMOVE);
+ kobject_del(&q->kobj);
+
mutex_unlock(&q->sysfs_dir_lock);
kobject_put(&disk_to_dev(disk)->kobj);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 7c462c006b26..469c483719be 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,7 +10,6 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
-#include <linux/blk-cgroup.h>
#include "blk.h"
#include "blk-cgroup-rwstat.h"
#include "blk-stat.h"
@@ -42,11 +41,6 @@
/* A workqueue to queue throttle related work */
static struct workqueue_struct *kthrotld_workqueue;
-enum tg_state_flags {
- THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
- THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
-};
-
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
/* We measure latency for request size from <= 4k to >= 1M */
@@ -426,12 +420,24 @@ static void tg_update_has_rules(struct throtl_grp *tg)
struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
struct throtl_data *td = tg->td;
int rw;
+ int has_iops_limit = 0;
+
+ for (rw = READ; rw <= WRITE; rw++) {
+ unsigned int iops_limit = tg_iops_limit(tg, rw);
- for (rw = READ; rw <= WRITE; rw++)
tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
(td->limit_valid[td->limit_index] &&
(tg_bps_limit(tg, rw) != U64_MAX ||
- tg_iops_limit(tg, rw) != UINT_MAX));
+ iops_limit != UINT_MAX));
+
+ if (iops_limit != UINT_MAX)
+ has_iops_limit = 1;
+ }
+
+ if (has_iops_limit)
+ tg->flags |= THROTL_TG_HAS_IOPS_LIMIT;
+ else
+ tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT;
}
static void throtl_pd_online(struct blkg_policy_data *pd)
@@ -634,8 +640,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
- atomic_set(&tg->io_split_cnt[rw], 0);
-
/*
* Previous slice has expired. We must have trimmed it after last
* bio dispatch. That means since start of last slice, we never used
@@ -659,8 +663,6 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
- atomic_set(&tg->io_split_cnt[rw], 0);
-
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -808,7 +810,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
- if (bps_limit == U64_MAX) {
+ /* no need to throttle if this bio's bytes have been accounted */
+ if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
if (wait)
*wait = 0;
return true;
@@ -871,7 +874,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
/* If tg->bps = -1, then BW is unlimited */
- if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
+ if ((bps_limit == U64_MAX && iops_limit == UINT_MAX) ||
+ tg->flags & THROTL_TG_CANCELING) {
if (wait)
*wait = 0;
return true;
@@ -893,9 +897,6 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
- if (iops_limit != UINT_MAX)
- tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
-
if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
@@ -920,9 +921,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- tg->bytes_disp[rw] += bio_size;
+ if (!bio_flagged(bio, BIO_THROTTLED)) {
+ tg->bytes_disp[rw] += bio_size;
+ tg->last_bytes_disp[rw] += bio_size;
+ }
+
tg->io_disp[rw]++;
- tg->last_bytes_disp[rw] += bio_size;
tg->last_io_disp[rw]++;
/*
@@ -1134,12 +1138,22 @@ static void throtl_pending_timer_fn(struct timer_list *t)
struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
struct throtl_grp *tg = sq_to_tg(sq);
struct throtl_data *td = sq_to_td(sq);
- struct request_queue *q = td->queue;
struct throtl_service_queue *parent_sq;
+ struct request_queue *q;
bool dispatched;
int ret;
+ /* throtl_data may be gone, so figure out request queue by blkg */
+ if (tg)
+ q = tg->pd.blkg->q;
+ else
+ q = td->queue;
+
spin_lock_irq(&q->queue_lock);
+
+ if (!q->root_blkg)
+ goto out_unlock;
+
if (throtl_can_upgrade(td, NULL))
throtl_upgrade_state(td);
@@ -1219,7 +1233,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
if (!bio_list_empty(&bio_list_on_stack)) {
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bio_list_on_stack)))
- submit_bio_noacct(bio);
+ submit_bio_noacct_nocheck(bio);
blk_finish_plug(&plug);
}
}
@@ -1763,6 +1777,39 @@ static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
return false;
}
+void blk_throtl_cancel_bios(struct request_queue *q)
+{
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ spin_lock_irq(&q->queue_lock);
+ /*
+ * queue_lock is held, rcu lock is not needed here technically.
+ * However, rcu lock is still held to emphasize that following
+ * path need RCU protection and to prevent warning from lockdep.
+ */
+ rcu_read_lock();
+ blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+ struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_service_queue *sq = &tg->service_queue;
+
+ /*
+ * Set the flag to make sure throtl_pending_timer_fn() won't
+ * stop until all throttled bios are dispatched.
+ */
+ blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+ /*
+ * Update disptime after setting the above flag to make sure
+ * throtl_select_dispatch() won't exit without dispatching.
+ */
+ tg_update_disptime(tg);
+
+ throtl_schedule_pending_timer(sq, jiffies + 1);
+ }
+ rcu_read_unlock();
+ spin_unlock_irq(&q->queue_lock);
+}
+
static bool throtl_can_upgrade(struct throtl_data *td,
struct throtl_grp *this_tg)
{
@@ -1917,14 +1964,12 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
}
if (tg->iops[READ][LIMIT_LOW]) {
- tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
iops = tg->last_io_disp[READ] * HZ / elapsed_time;
if (iops >= tg->iops[READ][LIMIT_LOW])
tg->last_low_overflow_time[READ] = now;
}
if (tg->iops[WRITE][LIMIT_LOW]) {
- tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
if (iops >= tg->iops[WRITE][LIMIT_LOW])
tg->last_low_overflow_time[WRITE] = now;
@@ -2043,25 +2088,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
-void blk_throtl_charge_bio_split(struct bio *bio)
-{
- struct blkcg_gq *blkg = bio->bi_blkg;
- struct throtl_grp *parent = blkg_to_tg(blkg);
- struct throtl_service_queue *parent_sq;
- bool rw = bio_data_dir(bio);
-
- do {
- if (!parent->has_rules[rw])
- break;
-
- atomic_inc(&parent->io_split_cnt[rw]);
- atomic_inc(&parent->last_io_split_cnt[rw]);
-
- parent_sq = parent->service_queue.parent_sq;
- parent = sq_to_tg(parent_sq);
- } while (parent);
-}
-
bool __blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 175f03abd9e4..c1b602996127 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -52,6 +52,13 @@ struct throtl_service_queue {
struct timer_list pending_timer; /* fires on first_pending_disptime */
};
+enum tg_state_flags {
+ THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
+ THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
+ THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */
+ THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */
+};
+
enum {
LIMIT_LOW,
LIMIT_MAX,
@@ -132,9 +139,6 @@ struct throtl_grp {
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
- atomic_t io_split_cnt[2];
- atomic_t last_io_split_cnt[2];
-
struct blkg_rwstat stat_bytes;
struct blkg_rwstat stat_ios;
};
@@ -158,20 +162,23 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
-static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
+static inline void blk_throtl_cancel_bios(struct request_queue *q) { }
#else /* CONFIG_BLK_DEV_THROTTLING */
int blk_throtl_init(struct request_queue *q);
void blk_throtl_exit(struct request_queue *q);
void blk_throtl_register_queue(struct request_queue *q);
-void blk_throtl_charge_bio_split(struct bio *bio);
bool __blk_throtl_bio(struct bio *bio);
+void blk_throtl_cancel_bios(struct request_queue *q);
static inline bool blk_throtl_bio(struct bio *bio)
{
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
- if (bio_flagged(bio, BIO_THROTTLED))
+ /* no need to throttle bps any more if the bio has been throttled */
+ if (bio_flagged(bio, BIO_THROTTLED) &&
+ !(tg->flags & THROTL_TG_HAS_IOPS_LIMIT))
return false;
+
if (!tg->has_rules[bio_data_dir(bio)])
return false;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 774ecc598bee..602bef54c813 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -215,9 +215,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
continue;
}
- bio = blk_next_bio(bio, 0, gfp_mask);
- bio_set_dev(bio, bdev);
- bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
+ gfp_mask);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
@@ -239,10 +238,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
{
struct bio bio;
- bio_init(&bio, NULL, 0);
- bio_set_dev(&bio, bdev);
- bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
-
+ bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
return submit_bio_wait(&bio);
}
@@ -306,9 +302,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
}
while (sector < end_sector) {
- bio = blk_next_bio(bio, 0, gfp_mask);
- bio_set_dev(bio, bdev);
- bio->bi_opf = op | REQ_SYNC;
+ bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
diff --git a/block/blk.h b/block/blk.h
index 8bd43b3ad33d..6f21859c7f0f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -46,7 +46,7 @@ void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
-bool submit_bio_checks(struct bio *bio);
+void submit_bio_noacct_nocheck(struct bio *bio);
static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
{
@@ -325,7 +325,7 @@ int blk_dev_init(void);
*/
static inline bool blk_do_io_stat(struct request *rq)
{
- return (rq->rq_flags & RQF_IO_STAT) && rq->q->disk;
+ return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
}
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
@@ -406,8 +406,6 @@ extern int blk_iolatency_init(struct request_queue *q);
static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
#endif
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
-
#ifdef CONFIG_BLK_DEV_ZONED
void blk_queue_free_zone_bitmaps(struct request_queue *q);
void blk_queue_clear_zone_settings(struct request_queue *q);
@@ -426,6 +424,7 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
int bdev_del_partition(struct gendisk *disk, int partno);
int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length);
+void blk_drop_partitions(struct gendisk *disk);
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
@@ -445,6 +444,9 @@ int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
void disk_del_events(struct gendisk *disk);
void disk_release_events(struct gendisk *disk);
+void disk_block_events(struct gendisk *disk);
+void disk_unblock_events(struct gendisk *disk);
+void disk_flush_events(struct gendisk *disk, unsigned int mask);
extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;
diff --git a/block/bounce.c b/block/bounce.c
index 7af1a72835b9..3d50d19cde72 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -14,7 +14,6 @@
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
-#include <linux/blk-cgroup.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/hash.h>
@@ -24,6 +23,7 @@
#include <trace/events/block.h>
#include "blk.h"
+#include "blk-cgroup.h"
#define POOL_SIZE 64
#define ISA_POOL_SIZE 16
@@ -162,15 +162,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
* that does not own the bio - reason being drivers don't use it for
* iterating over the biovec anymore, so expecting it to be kept up
* to date (i.e. for clones that share the parent biovec) is just
- * asking for trouble and would force extra work on
- * __bio_clone_fast() anyways.
+ * asking for trouble and would force extra work.
*/
- bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
- &bounce_bio_set);
- bio->bi_bdev = bio_src->bi_bdev;
+ bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src),
+ bio_src->bi_opf, GFP_NOIO, &bounce_bio_set);
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
- bio->bi_opf = bio_src->bi_opf;
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
diff --git a/block/disk-events.c b/block/disk-events.c
index 8d5496e7592a..aee25a7e1ab7 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -4,7 +4,7 @@
*/
#include <linux/export.h>
#include <linux/moduleparam.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include "blk.h"
struct disk_events {
diff --git a/block/elevator.c b/block/elevator.c
index ec98aed39c4f..c319765892bb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -35,7 +35,6 @@
#include <linux/hash.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
#include <trace/events/block.h>
@@ -44,6 +43,7 @@
#include "blk-mq-sched.h"
#include "blk-pm.h"
#include "blk-wbt.h"
+#include "blk-cgroup.h"
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@@ -192,6 +192,9 @@ void elevator_exit(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
+ ioc_clear_queue(q);
+ blk_mq_sched_free_rqs(q);
+
mutex_lock(&e->sysfs_lock);
blk_mq_exit_sched(q, e);
mutex_unlock(&e->sysfs_lock);
@@ -516,17 +519,17 @@ int elv_register_queue(struct request_queue *q, bool uevent)
void elv_unregister_queue(struct request_queue *q)
{
+ struct elevator_queue *e = q->elevator;
+
lockdep_assert_held(&q->sysfs_lock);
- if (q) {
+ if (e && e->registered) {
struct elevator_queue *e = q->elevator;
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
e->registered = 0;
- /* Re-enable throttling in case elevator disabled it */
- wbt_enable_default(q);
}
}
@@ -593,11 +596,7 @@ int elevator_switch_mq(struct request_queue *q,
lockdep_assert_held(&q->sysfs_lock);
if (q->elevator) {
- if (q->elevator->registered)
- elv_unregister_queue(q);
-
- ioc_clear_queue(q);
- blk_mq_sched_free_rqs(q);
+ elv_unregister_queue(q);
elevator_exit(q);
}
@@ -608,7 +607,6 @@ int elevator_switch_mq(struct request_queue *q,
if (new_e) {
ret = elv_register_queue(q, true);
if (ret) {
- blk_mq_sched_free_rqs(q);
elevator_exit(q);
goto out;
}
diff --git a/block/fops.c b/block/fops.c
index 26bf15c770d2..e49096354dcd 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -75,8 +75,13 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
return -ENOMEM;
}
- bio_init(&bio, vecs, nr_pages);
- bio_set_dev(&bio, bdev);
+ if (iov_iter_rw(iter) == READ) {
+ bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
+ if (iter_is_iovec(iter))
+ should_dirty = true;
+ } else {
+ bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
+ }
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
@@ -88,14 +93,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
goto out;
ret = bio.bi_iter.bi_size;
- if (iov_iter_rw(iter) == READ) {
- bio.bi_opf = REQ_OP_READ;
- if (iter_is_iovec(iter))
- should_dirty = true;
- } else {
- bio.bi_opf = dio_bio_write_op(iocb);
+ if (iov_iter_rw(iter) == WRITE)
task_io_account_write(ret);
- }
+
if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT;
if (iocb->ki_flags & IOCB_HIPRI)
@@ -190,6 +190,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blkdev_dio *dio;
struct bio *bio;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+ unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
loff_t pos = iocb->ki_pos;
int ret = 0;
@@ -197,7 +198,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+ bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
atomic_set(&dio->ref, 1);
@@ -223,7 +224,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
blk_start_plug(&plug);
for (;;) {
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
@@ -238,11 +238,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
if (is_read) {
- bio->bi_opf = REQ_OP_READ;
if (dio->flags & DIO_SHOULD_DIRTY)
bio_set_pages_dirty(bio);
} else {
- bio->bi_opf = dio_bio_write_op(iocb);
task_io_account_write(bio->bi_iter.bi_size);
}
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -258,7 +256,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
atomic_inc(&dio->ref);
submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
}
blk_finish_plug(&plug);
@@ -289,6 +287,8 @@ static void blkdev_bio_end_io_async(struct bio *bio)
struct kiocb *iocb = dio->iocb;
ssize_t ret;
+ WRITE_ONCE(iocb->private, NULL);
+
if (likely(!bio->bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
@@ -311,6 +311,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
unsigned int nr_pages)
{
struct block_device *bdev = iocb->ki_filp->private_data;
+ bool is_read = iov_iter_rw(iter) == READ;
+ unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
struct blkdev_dio *dio;
struct bio *bio;
loff_t pos = iocb->ki_pos;
@@ -320,11 +322,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+ bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->flags = 0;
dio->iocb = iocb;
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_end_io = blkdev_bio_end_io_async;
@@ -347,14 +348,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
}
dio->size = bio->bi_iter.bi_size;
- if (iov_iter_rw(iter) == READ) {
- bio->bi_opf = REQ_OP_READ;
+ if (is_read) {
if (iter_is_iovec(iter)) {
dio->flags |= DIO_SHOULD_DIRTY;
bio_set_pages_dirty(bio);
}
} else {
- bio->bi_opf = dio_bio_write_op(iocb);
task_io_account_write(bio->bi_iter.bi_size);
}
@@ -429,7 +428,8 @@ static int blkdev_writepages(struct address_space *mapping,
}
const struct address_space_operations def_blk_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = blkdev_readpage,
.readahead = blkdev_readahead,
.writepage = blkdev_writepage,
@@ -566,34 +566,37 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct block_device *bdev = iocb->ki_filp->private_data;
loff_t size = bdev_nr_bytes(bdev);
- size_t count = iov_iter_count(to);
loff_t pos = iocb->ki_pos;
size_t shorted = 0;
ssize_t ret = 0;
+ size_t count;
- if (unlikely(pos + count > size)) {
+ if (unlikely(pos + iov_iter_count(to) > size)) {
if (pos >= size)
return 0;
size -= pos;
- if (count > size) {
- shorted = count - size;
- iov_iter_truncate(to, size);
- }
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
}
+ count = iov_iter_count(to);
+ if (!count)
+ goto reexpand; /* skip atime */
+
if (iocb->ki_flags & IOCB_DIRECT) {
struct address_space *mapping = iocb->ki_filp->f_mapping;
if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
- iocb->ki_pos + count - 1))
- return -EAGAIN;
+ if (filemap_range_needs_writeback(mapping, pos,
+ pos + count - 1)) {
+ ret = -EAGAIN;
+ goto reexpand;
+ }
} else {
- ret = filemap_write_and_wait_range(mapping,
- iocb->ki_pos,
- iocb->ki_pos + count - 1);
+ ret = filemap_write_and_wait_range(mapping, pos,
+ pos + count - 1);
if (ret < 0)
- return ret;
+ goto reexpand;
}
file_accessed(iocb->ki_filp);
@@ -603,12 +606,14 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_pos += ret;
count -= ret;
}
+ iov_iter_revert(to, count - iov_iter_count(to));
if (ret < 0 || !count)
- return ret;
+ goto reexpand;
}
ret = filemap_read(iocb, to, ret);
+reexpand:
if (unlikely(shorted))
iov_iter_reexpand(to, iov_iter_count(to) + shorted);
return ret;
diff --git a/block/genhd.c b/block/genhd.c
index 626c8406f21a..37eb41ee4086 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -8,7 +8,6 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
@@ -26,10 +25,12 @@
#include <linux/pm_runtime.h>
#include <linux/badblocks.h>
#include <linux/part_stat.h>
+#include "blk-throttle.h"
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
+#include "blk-cgroup.h"
static struct kobject *block_depr;
@@ -185,7 +186,9 @@ static struct blk_major_name {
struct blk_major_name *next;
int major;
char name[16];
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
void (*probe)(dev_t devt);
+#endif
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);
static DEFINE_SPINLOCK(major_names_spinlock);
@@ -275,7 +278,9 @@ int __register_blkdev(unsigned int major, const char *name,
}
p->major = major;
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
p->probe = probe;
+#endif
strlcpy(p->name, name, sizeof(p->name));
p->next = NULL;
index = major_to_index(major);
@@ -523,6 +528,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk_update_readahead(disk);
disk_add_events(disk);
+ set_bit(GD_ADDED, &disk->state);
return 0;
out_unregister_bdi:
@@ -549,6 +555,20 @@ out_free_ext_minor:
EXPORT_SYMBOL(device_add_disk);
/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+ set_bit(GD_DEAD, &disk->state);
+ blk_queue_start_drain(disk->queue);
+}
+EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
+
+/**
* del_gendisk - remove the gendisk
* @disk: the struct gendisk to remove
*
@@ -622,7 +642,8 @@ void del_gendisk(struct gendisk *disk)
blk_mq_freeze_queue_wait(q);
- rq_qos_exit(q);
+ blk_throtl_cancel_bios(disk->queue);
+
blk_sync_queue(q);
blk_flush_integrity();
/*
@@ -679,6 +700,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
return badblocks_store(disk->bb, page, len, 0);
}
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
void blk_request_module(dev_t devt)
{
unsigned int major = MAJOR(devt);
@@ -698,6 +720,7 @@ void blk_request_module(dev_t devt)
/* Make old-style 2.4 aliases work */
request_module("block-major-%d", MAJOR(devt));
}
+#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
/*
* print a full list of all partitions - intended for places where the root
@@ -913,12 +936,17 @@ ssize_t part_stat_show(struct device *dev,
struct disk_stats stat;
unsigned int inflight;
- part_stat_read_all(bdev, &stat);
if (queue_is_mq(q))
inflight = blk_mq_in_flight(q, bdev);
else
inflight = part_in_flight(bdev);
+ if (inflight) {
+ part_stat_lock();
+ update_io_ticks(bdev, jiffies, true);
+ part_stat_unlock();
+ }
+ part_stat_read_all(bdev, &stat);
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -1086,6 +1114,31 @@ static const struct attribute_group *disk_attr_groups[] = {
NULL
};
+static void disk_release_mq(struct request_queue *q)
+{
+ blk_mq_cancel_work_sync(q);
+
+ /*
+ * There can't be any non non-passthrough bios in flight here, but
+ * requests stay around longer, including passthrough ones so we
+ * still need to freeze the queue here.
+ */
+ blk_mq_freeze_queue(q);
+
+ /*
+ * Since the I/O scheduler exit code may access cgroup information,
+ * perform I/O scheduler exit before disassociating from the block
+ * cgroup controller.
+ */
+ if (q->elevator) {
+ mutex_lock(&q->sysfs_lock);
+ elevator_exit(q);
+ mutex_unlock(&q->sysfs_lock);
+ }
+ rq_qos_exit(q);
+ __blk_mq_unfreeze_queue(q, true);
+}
+
/**
* disk_release - releases all allocated resources of the gendisk
* @dev: the device representing this disk
@@ -1107,13 +1160,21 @@ static void disk_release(struct device *dev)
might_sleep();
WARN_ON_ONCE(disk_live(disk));
- blk_mq_cancel_work_sync(disk->queue);
+ if (queue_is_mq(disk->queue))
+ disk_release_mq(disk->queue);
+
+ blkcg_exit_queue(disk->queue);
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
+
disk->queue->disk = NULL;
blk_put_queue(disk->queue);
+
+ if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
+ disk->fops->free_disk(disk);
+
iput(disk->part0->bd_inode); /* frees the disk */
}
@@ -1174,12 +1235,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
xa_for_each(&gp->part_tbl, idx, hd) {
if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
continue;
- part_stat_read_all(hd, &stat);
if (queue_is_mq(gp->queue))
inflight = blk_mq_in_flight(gp->queue, hd);
else
inflight = part_in_flight(hd);
+ if (inflight) {
+ part_stat_lock();
+ update_io_ticks(hd, jiffies, true);
+ part_stat_unlock();
+ }
+ part_stat_read_all(hd, &stat);
seq_printf(seqf, "%4d %7d %pg "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1308,6 +1374,9 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
+ if (blkcg_init_queue(q))
+ goto out_erase_part0;
+
rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
@@ -1320,6 +1389,8 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
#endif
return disk;
+out_erase_part0:
+ xa_erase(&disk->part_tbl, 0);
out_destroy_part_tbl:
xa_destroy(&disk->part_tbl);
disk->part0->bd_disk = NULL;
diff --git a/block/holder.c b/block/holder.c
index 27cddce1b446..8d750281a1cd 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/slab.h>
struct bd_holder_disk {
diff --git a/block/partitions/check.h b/block/partitions/check.h
index d5b28e309d64..4ffa2359b1a3 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/pagemap.h>
#include <linux/blkdev.h>
-#include <linux/genhd.h>
#include "../blk.h"
/*
diff --git a/block/partitions/core.c b/block/partitions/core.c
index c2a1635922b1..2ef8dfa1e5c8 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -8,7 +8,6 @@
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/ctype.h>
-#include <linux/genhd.h>
#include <linux/vmalloc.h>
#include <linux/blktrace_api.h>
#include <linux/raid/detect.h>
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 8cc2b88d0aa8..84b9f36b9e47 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -13,7 +13,6 @@
#include <linux/types.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/string.h>
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 8693704dcf5e..0a747a0c782d 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -14,7 +14,6 @@
#include <linux/types.h>
#include <linux/list.h>
-#include <linux/genhd.h>
#include <linux/fs.h>
#include <asm/unaligned.h>
#include <asm/byteorder.h>
diff --git a/block/sed-opal.c b/block/sed-opal.c
index daafadbb88ca..9700197000f2 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -13,7 +13,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/list.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <uapi/linux/sed-opal.h>
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 692365dee2bd..05b66ce9d1c9 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -22,6 +22,9 @@ static struct key *builtin_trusted_keys;
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
static struct key *secondary_trusted_keys;
#endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+static struct key *machine_trusted_keys;
+#endif
#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
static struct key *platform_trusted_keys;
#endif
@@ -86,11 +89,50 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
if (!restriction)
panic("Can't allocate secondary trusted keyring restriction\n");
- restriction->check = restrict_link_by_builtin_and_secondary_trusted;
+ if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING))
+ restriction->check = restrict_link_by_builtin_secondary_and_machine;
+ else
+ restriction->check = restrict_link_by_builtin_and_secondary_trusted;
return restriction;
}
#endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init set_machine_trusted_keys(struct key *keyring)
+{
+ machine_trusted_keys = keyring;
+
+ if (key_link(secondary_trusted_keys, machine_trusted_keys) < 0)
+ panic("Can't link (machine) trusted keyrings\n");
+}
+
+/**
+ * restrict_link_by_builtin_secondary_and_machine - Restrict keyring addition.
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @restrict_key: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in either the built-in, the secondary, or
+ * the machine keyrings.
+ */
+int restrict_link_by_builtin_secondary_and_machine(
+ struct key *dest_keyring,
+ const struct key_type *type,
+ const union key_payload *payload,
+ struct key *restrict_key)
+{
+ if (machine_trusted_keys && type == &key_type_keyring &&
+ dest_keyring == secondary_trusted_keys &&
+ payload == &machine_trusted_keys->payload)
+ /* Allow the machine keyring to be added to the secondary */
+ return 0;
+
+ return restrict_link_by_builtin_and_secondary_trusted(dest_keyring, type,
+ payload, restrict_key);
+}
+#endif
/*
* Create the trusted keyrings
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 442765219c37..d6d7e84bb7f8 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -231,6 +231,13 @@ config CRYPTO_DH
help
Generic implementation of the Diffie-Hellman algorithm.
+config CRYPTO_DH_RFC7919_GROUPS
+ bool "Support for RFC 7919 FFDHE group parameters"
+ depends on CRYPTO_DH
+ select CRYPTO_RNG_DEFAULT
+ help
+ Provide support for RFC 7919 FFDHE group parameters. If unsure, say N.
+
config CRYPTO_ECC
tristate
select CRYPTO_RNG_DEFAULT
@@ -267,7 +274,7 @@ config CRYPTO_ECRDSA
config CRYPTO_SM2
tristate "SM2 algorithm"
- select CRYPTO_SM3
+ select CRYPTO_LIB_SM3
select CRYPTO_AKCIPHER
select CRYPTO_MANAGER
select MPILIB
@@ -425,6 +432,7 @@ config CRYPTO_LRW
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
select CRYPTO_GF128MUL
+ select CRYPTO_ECB
help
LRW: Liskov Rivest Wagner, a tweakable, non malleable, non movable
narrow block cipher mode for dm-crypt. Use it with cipher
@@ -999,6 +1007,7 @@ config CRYPTO_SHA3
config CRYPTO_SM3
tristate "SM3 digest algorithm"
select CRYPTO_HASH
+ select CRYPTO_LIB_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite.
@@ -1007,6 +1016,19 @@ config CRYPTO_SM3
http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
+config CRYPTO_SM3_AVX_X86_64
+ tristate "SM3 digest algorithm (x86_64/AVX)"
+ depends on X86 && 64BIT
+ select CRYPTO_HASH
+ select CRYPTO_LIB_SM3
+ help
+ SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
+ It is part of the Chinese Commercial Cryptography suite. This is
+ SM3 optimized implementation using Advanced Vector Extensions (AVX)
+ when available.
+
+ If unsure, say N.
+
config CRYPTO_STREEBOG
tristate "Streebog Hash Function"
select CRYPTO_HASH
@@ -1847,6 +1869,7 @@ config CRYPTO_JITTERENTROPY
config CRYPTO_KDF800108_CTR
tristate
+ select CRYPTO_HMAC
select CRYPTO_SHA256
config CRYPTO_USER_API
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index e1ea18536a5f..c8289b7a85ba 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -25,12 +25,9 @@ struct alg_type_list {
struct list_head list;
};
-static atomic_long_t alg_memory_allocated;
-
static struct proto alg_proto = {
.name = "ALG",
.owner = THIS_MODULE,
- .memory_allocated = &alg_memory_allocated,
.obj_size = sizeof(struct alg_sock),
};
diff --git a/crypto/algapi.c b/crypto/algapi.c
index a366cb3e8aa1..d1c99288af3e 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -6,6 +6,7 @@
*/
#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/fips.h>
@@ -21,6 +22,11 @@
static LIST_HEAD(crypto_template_list);
+#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
+#endif
+
static inline void crypto_check_module_sig(struct module *mod)
{
if (fips_enabled && mod && !module_sig_ok(mod))
@@ -322,9 +328,17 @@ void crypto_alg_tested(const char *name, int err)
found:
q->cra_flags |= CRYPTO_ALG_DEAD;
alg = test->adult;
- if (err || list_empty(&alg->cra_list))
+
+ if (list_empty(&alg->cra_list))
goto complete;
+ if (err == -ECANCELED)
+ alg->cra_flags |= CRYPTO_ALG_FIPS_INTERNAL;
+ else if (err)
+ goto complete;
+ else
+ alg->cra_flags &= ~CRYPTO_ALG_FIPS_INTERNAL;
+
alg->cra_flags |= CRYPTO_ALG_TESTED;
/* Only satisfy larval waiters if we are the best. */
@@ -604,6 +618,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
{
struct crypto_larval *larval;
struct crypto_spawn *spawn;
+ u32 fips_internal = 0;
int err;
err = crypto_check_alg(&inst->alg);
@@ -626,11 +641,15 @@ int crypto_register_instance(struct crypto_template *tmpl,
spawn->inst = inst;
spawn->registered = true;
+ fips_internal |= spawn->alg->cra_flags;
+
crypto_mod_put(spawn->alg);
spawn = next;
}
+ inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL);
+
larval = __crypto_register_alg(&inst->alg);
if (IS_ERR(larval))
goto unlock;
@@ -683,7 +702,8 @@ int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst,
if (IS_ERR(name))
return PTR_ERR(name);
- alg = crypto_find_alg(name, spawn->frontend, type, mask);
+ alg = crypto_find_alg(name, spawn->frontend,
+ type | CRYPTO_ALG_FIPS_INTERNAL, mask);
if (IS_ERR(alg))
return PTR_ERR(alg);
@@ -1002,7 +1022,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
}
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
- *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2;
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u64 l = get_unaligned((u64 *)src1) ^
+ get_unaligned((u64 *)src2);
+ put_unaligned(l, (u64 *)dst);
+ } else {
+ *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2;
+ }
dst += 8;
src1 += 8;
src2 += 8;
@@ -1010,7 +1036,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
}
while (len >= 4 && !(relalign & 3)) {
- *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u32 l = get_unaligned((u32 *)src1) ^
+ get_unaligned((u32 *)src2);
+ put_unaligned(l, (u32 *)dst);
+ } else {
+ *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+ }
dst += 4;
src1 += 4;
src2 += 4;
@@ -1018,7 +1050,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
}
while (len >= 2 && !(relalign & 1)) {
- *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u16 l = get_unaligned((u16 *)src1) ^
+ get_unaligned((u16 *)src2);
+ put_unaligned(l, (u16 *)dst);
+ } else {
+ *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+ }
dst += 2;
src1 += 2;
src2 += 2;
@@ -1324,3 +1362,4 @@ module_exit(crypto_algapi_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Cryptographic algorithms API");
+MODULE_SOFTDEP("pre: cryptomgr");
diff --git a/crypto/api.c b/crypto/api.c
index cf0869dd130b..69508ae9345e 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -223,6 +223,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
else if (crypto_is_test_larval(larval) &&
!(alg->cra_flags & CRYPTO_ALG_TESTED))
alg = ERR_PTR(-EAGAIN);
+ else if (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL)
+ alg = ERR_PTR(-EAGAIN);
else if (!crypto_mod_get(alg))
alg = ERR_PTR(-EAGAIN);
crypto_mod_put(&larval->alg);
@@ -233,6 +235,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
u32 mask)
{
+ const u32 fips = CRYPTO_ALG_FIPS_INTERNAL;
struct crypto_alg *alg;
u32 test = 0;
@@ -240,8 +243,20 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
test |= CRYPTO_ALG_TESTED;
down_read(&crypto_alg_sem);
- alg = __crypto_alg_lookup(name, type | test, mask | test);
- if (!alg && test) {
+ alg = __crypto_alg_lookup(name, (type | test) & ~fips,
+ (mask | test) & ~fips);
+ if (alg) {
+ if (((type | mask) ^ fips) & fips)
+ mask |= fips;
+ mask &= fips;
+
+ if (!crypto_is_larval(alg) &&
+ ((type ^ alg->cra_flags) & mask)) {
+ /* Algorithm is disallowed in FIPS mode. */
+ crypto_mod_put(alg);
+ alg = ERR_PTR(-ENOENT);
+ }
+ } else if (test) {
alg = __crypto_alg_lookup(name, type, mask);
if (alg && !crypto_is_larval(alg)) {
/* Test failed */
@@ -643,4 +658,3 @@ EXPORT_SYMBOL_GPL(crypto_req_done);
MODULE_DESCRIPTION("Cryptographic core API");
MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: cryptomgr");
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 1f1f004dc757..460bc5d0a828 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -22,18 +22,6 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
appropriate hash algorithms (such as SHA-1) must be available.
ENOPKG will be reported if the requisite algorithm is unavailable.
-config ASYMMETRIC_TPM_KEY_SUBTYPE
- tristate "Asymmetric TPM backed private key subtype"
- depends on TCG_TPM
- depends on TRUSTED_KEYS
- select CRYPTO_HMAC
- select CRYPTO_SHA1
- select CRYPTO_HASH_INFO
- help
- This option provides support for TPM backed private key type handling.
- Operations such as sign, verify, encrypt, decrypt are performed by
- the TPM after the private key is loaded.
-
config X509_CERTIFICATE_PARSER
tristate "X.509 certificate parser"
depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
@@ -54,15 +42,6 @@ config PKCS8_PRIVATE_KEY_PARSER
private key data and provides the ability to instantiate a crypto key
from that data.
-config TPM_KEY_PARSER
- tristate "TPM private key parser"
- depends on ASYMMETRIC_TPM_KEY_SUBTYPE
- select ASN1
- help
- This option provides support for parsing TPM format blobs for
- private key data and provides the ability to instantiate a crypto key
- from that data.
-
config PKCS7_MESSAGE_PARSER
tristate "PKCS#7 message parser"
depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 28b91adba2ae..c38424f55b08 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -11,7 +11,6 @@ asymmetric_keys-y := \
signature.o
obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
-obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
#
# X.509 Certificate handling
@@ -75,14 +74,3 @@ verify_signed_pefile-y := \
$(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
$(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
-
-#
-# TPM private key parsing
-#
-obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
-tpm_key_parser-y := \
- tpm.asn1.o \
- tpm_parser.o
-
-$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
-$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
deleted file mode 100644
index 0959613560b9..000000000000
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ /dev/null
@@ -1,957 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "ASYM-TPM: "fmt
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/seq_file.h>
-#include <linux/scatterlist.h>
-#include <linux/tpm.h>
-#include <linux/tpm_command.h>
-#include <crypto/akcipher.h>
-#include <crypto/hash.h>
-#include <crypto/sha1.h>
-#include <asm/unaligned.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/trusted_tpm.h>
-#include <crypto/asym_tpm_subtype.h>
-#include <crypto/public_key.h>
-
-#define TPM_ORD_FLUSHSPECIFIC 186
-#define TPM_ORD_LOADKEY2 65
-#define TPM_ORD_UNBIND 30
-#define TPM_ORD_SIGN 60
-
-#define TPM_RT_KEY 0x00000001
-
-/*
- * Load a TPM key from the blob provided by userspace
- */
-static int tpm_loadkey2(struct tpm_buf *tb,
- uint32_t keyhandle, unsigned char *keyauth,
- const unsigned char *keyblob, int keybloblen,
- uint32_t *newhandle)
-{
- unsigned char nonceodd[TPM_NONCE_SIZE];
- unsigned char enonce[TPM_NONCE_SIZE];
- unsigned char authdata[SHA1_DIGEST_SIZE];
- uint32_t authhandle = 0;
- unsigned char cont = 0;
- uint32_t ordinal;
- int ret;
-
- ordinal = htonl(TPM_ORD_LOADKEY2);
-
- /* session for loading the key */
- ret = oiap(tb, &authhandle, enonce);
- if (ret < 0) {
- pr_info("oiap failed (%d)\n", ret);
- return ret;
- }
-
- /* generate odd nonce */
- ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
- if (ret < 0) {
- pr_info("tpm_get_random failed (%d)\n", ret);
- return ret;
- }
-
- /* calculate authorization HMAC value */
- ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
- nonceodd, cont, sizeof(uint32_t), &ordinal,
- keybloblen, keyblob, 0, 0);
- if (ret < 0)
- return ret;
-
- /* build the request buffer */
- tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2);
- tpm_buf_append_u32(tb, keyhandle);
- tpm_buf_append(tb, keyblob, keybloblen);
- tpm_buf_append_u32(tb, authhandle);
- tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
- tpm_buf_append_u8(tb, cont);
- tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
- ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
- if (ret < 0) {
- pr_info("authhmac failed (%d)\n", ret);
- return ret;
- }
-
- ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
- SHA1_DIGEST_SIZE, 0, 0);
- if (ret < 0) {
- pr_info("TSS_checkhmac1 failed (%d)\n", ret);
- return ret;
- }
-
- *newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
- return 0;
-}
-
-/*
- * Execute the FlushSpecific TPM command
- */
-static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
-{
- tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC);
- tpm_buf_append_u32(tb, handle);
- tpm_buf_append_u32(tb, TPM_RT_KEY);
-
- return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-}
-
-/*
- * Decrypt a blob provided by userspace using a specific key handle.
- * The handle is a well known handle or previously loaded by e.g. LoadKey2
- */
-static int tpm_unbind(struct tpm_buf *tb,
- uint32_t keyhandle, unsigned char *keyauth,
- const unsigned char *blob, uint32_t bloblen,
- void *out, uint32_t outlen)
-{
- unsigned char nonceodd[TPM_NONCE_SIZE];
- unsigned char enonce[TPM_NONCE_SIZE];
- unsigned char authdata[SHA1_DIGEST_SIZE];
- uint32_t authhandle = 0;
- unsigned char cont = 0;
- uint32_t ordinal;
- uint32_t datalen;
- int ret;
-
- ordinal = htonl(TPM_ORD_UNBIND);
- datalen = htonl(bloblen);
-
- /* session for loading the key */
- ret = oiap(tb, &authhandle, enonce);
- if (ret < 0) {
- pr_info("oiap failed (%d)\n", ret);
- return ret;
- }
-
- /* generate odd nonce */
- ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
- if (ret < 0) {
- pr_info("tpm_get_random failed (%d)\n", ret);
- return ret;
- }
-
- /* calculate authorization HMAC value */
- ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
- nonceodd, cont, sizeof(uint32_t), &ordinal,
- sizeof(uint32_t), &datalen,
- bloblen, blob, 0, 0);
- if (ret < 0)
- return ret;
-
- /* build the request buffer */
- tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND);
- tpm_buf_append_u32(tb, keyhandle);
- tpm_buf_append_u32(tb, bloblen);
- tpm_buf_append(tb, blob, bloblen);
- tpm_buf_append_u32(tb, authhandle);
- tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
- tpm_buf_append_u8(tb, cont);
- tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
- ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
- if (ret < 0) {
- pr_info("authhmac failed (%d)\n", ret);
- return ret;
- }
-
- datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
- ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
- keyauth, SHA1_DIGEST_SIZE,
- sizeof(uint32_t), TPM_DATA_OFFSET,
- datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
- 0, 0);
- if (ret < 0) {
- pr_info("TSS_checkhmac1 failed (%d)\n", ret);
- return ret;
- }
-
- memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
- min(outlen, datalen));
-
- return datalen;
-}
-
-/*
- * Sign a blob provided by userspace (that has had the hash function applied)
- * using a specific key handle. The handle is assumed to have been previously
- * loaded by e.g. LoadKey2.
- *
- * Note that the key signature scheme of the used key should be set to
- * TPM_SS_RSASSAPKCS1v15_DER. This allows the hashed input to be of any size
- * up to key_length_in_bytes - 11 and not be limited to size 20 like the
- * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
- */
-static int tpm_sign(struct tpm_buf *tb,
- uint32_t keyhandle, unsigned char *keyauth,
- const unsigned char *blob, uint32_t bloblen,
- void *out, uint32_t outlen)
-{
- unsigned char nonceodd[TPM_NONCE_SIZE];
- unsigned char enonce[TPM_NONCE_SIZE];
- unsigned char authdata[SHA1_DIGEST_SIZE];
- uint32_t authhandle = 0;
- unsigned char cont = 0;
- uint32_t ordinal;
- uint32_t datalen;
- int ret;
-
- ordinal = htonl(TPM_ORD_SIGN);
- datalen = htonl(bloblen);
-
- /* session for loading the key */
- ret = oiap(tb, &authhandle, enonce);
- if (ret < 0) {
- pr_info("oiap failed (%d)\n", ret);
- return ret;
- }
-
- /* generate odd nonce */
- ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
- if (ret < 0) {
- pr_info("tpm_get_random failed (%d)\n", ret);
- return ret;
- }
-
- /* calculate authorization HMAC value */
- ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
- nonceodd, cont, sizeof(uint32_t), &ordinal,
- sizeof(uint32_t), &datalen,
- bloblen, blob, 0, 0);
- if (ret < 0)
- return ret;
-
- /* build the request buffer */
- tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN);
- tpm_buf_append_u32(tb, keyhandle);
- tpm_buf_append_u32(tb, bloblen);
- tpm_buf_append(tb, blob, bloblen);
- tpm_buf_append_u32(tb, authhandle);
- tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
- tpm_buf_append_u8(tb, cont);
- tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
- ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
- if (ret < 0) {
- pr_info("authhmac failed (%d)\n", ret);
- return ret;
- }
-
- datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
- ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
- keyauth, SHA1_DIGEST_SIZE,
- sizeof(uint32_t), TPM_DATA_OFFSET,
- datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
- 0, 0);
- if (ret < 0) {
- pr_info("TSS_checkhmac1 failed (%d)\n", ret);
- return ret;
- }
-
- memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
- min(datalen, outlen));
-
- return datalen;
-}
-
-/* Room to fit two u32 zeros for algo id and parameters length. */
-#define SETKEY_PARAMS_SIZE (sizeof(u32) * 2)
-
-/*
- * Maximum buffer size for the BER/DER encoded public key. The public key
- * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
- * bit key and e is usually 65537
- * The encoding overhead is:
- * - max 4 bytes for SEQUENCE
- * - max 4 bytes for INTEGER n type/length
- * - 257 bytes of n
- * - max 2 bytes for INTEGER e type/length
- * - 3 bytes of e
- * - 4+4 of zeros for set_pub_key parameters (SETKEY_PARAMS_SIZE)
- */
-#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3 + SETKEY_PARAMS_SIZE)
-
-/*
- * Provide a part of a description of the key for /proc/keys.
- */
-static void asym_tpm_describe(const struct key *asymmetric_key,
- struct seq_file *m)
-{
- struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
-
- if (!tk)
- return;
-
- seq_printf(m, "TPM1.2/Blob");
-}
-
-static void asym_tpm_destroy(void *payload0, void *payload3)
-{
- struct tpm_key *tk = payload0;
-
- if (!tk)
- return;
-
- kfree(tk->blob);
- tk->blob_len = 0;
-
- kfree(tk);
-}
-
-/* How many bytes will it take to encode the length */
-static inline uint32_t definite_length(uint32_t len)
-{
- if (len <= 127)
- return 1;
- if (len <= 255)
- return 2;
- return 3;
-}
-
-static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
- uint32_t len)
-{
- *buf++ = tag;
-
- if (len <= 127) {
- buf[0] = len;
- return buf + 1;
- }
-
- if (len <= 255) {
- buf[0] = 0x81;
- buf[1] = len;
- return buf + 2;
- }
-
- buf[0] = 0x82;
- put_unaligned_be16(len, buf + 1);
- return buf + 3;
-}
-
-static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
-{
- uint8_t *cur = buf;
- uint32_t n_len = definite_length(len) + 1 + len + 1;
- uint32_t e_len = definite_length(3) + 1 + 3;
- uint8_t e[3] = { 0x01, 0x00, 0x01 };
-
- /* SEQUENCE */
- cur = encode_tag_length(cur, 0x30, n_len + e_len);
- /* INTEGER n */
- cur = encode_tag_length(cur, 0x02, len + 1);
- cur[0] = 0x00;
- memcpy(cur + 1, pub_key, len);
- cur += len + 1;
- cur = encode_tag_length(cur, 0x02, sizeof(e));
- memcpy(cur, e, sizeof(e));
- cur += sizeof(e);
- /* Zero parameters to satisfy set_pub_key ABI. */
- memzero_explicit(cur, SETKEY_PARAMS_SIZE);
-
- return cur - buf;
-}
-
-/*
- * Determine the crypto algorithm name.
- */
-static int determine_akcipher(const char *encoding, const char *hash_algo,
- char alg_name[CRYPTO_MAX_ALG_NAME])
-{
- if (strcmp(encoding, "pkcs1") == 0) {
- if (!hash_algo) {
- strcpy(alg_name, "pkcs1pad(rsa)");
- return 0;
- }
-
- if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
- hash_algo) >= CRYPTO_MAX_ALG_NAME)
- return -EINVAL;
-
- return 0;
- }
-
- if (strcmp(encoding, "raw") == 0) {
- strcpy(alg_name, "rsa");
- return 0;
- }
-
- return -ENOPKG;
-}
-
-/*
- * Query information about a key.
- */
-static int tpm_key_query(const struct kernel_pkey_params *params,
- struct kernel_pkey_query *info)
-{
- struct tpm_key *tk = params->key->payload.data[asym_crypto];
- int ret;
- char alg_name[CRYPTO_MAX_ALG_NAME];
- struct crypto_akcipher *tfm;
- uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
- uint32_t der_pub_key_len;
- int len;
-
- /* TPM only works on private keys, public keys still done in software */
- ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
- if (ret < 0)
- return ret;
-
- tfm = crypto_alloc_akcipher(alg_name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
- der_pub_key);
-
- ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
- if (ret < 0)
- goto error_free_tfm;
-
- len = crypto_akcipher_maxsize(tfm);
-
- info->key_size = tk->key_len;
- info->max_data_size = tk->key_len / 8;
- info->max_sig_size = len;
- info->max_enc_size = len;
- info->max_dec_size = tk->key_len / 8;
-
- info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
- KEYCTL_SUPPORTS_DECRYPT |
- KEYCTL_SUPPORTS_VERIFY |
- KEYCTL_SUPPORTS_SIGN;
-
- ret = 0;
-error_free_tfm:
- crypto_free_akcipher(tfm);
- pr_devel("<==%s() = %d\n", __func__, ret);
- return ret;
-}
-
-/*
- * Encryption operation is performed with the public key. Hence it is done
- * in software
- */
-static int tpm_key_encrypt(struct tpm_key *tk,
- struct kernel_pkey_params *params,
- const void *in, void *out)
-{
- char alg_name[CRYPTO_MAX_ALG_NAME];
- struct crypto_akcipher *tfm;
- struct akcipher_request *req;
- struct crypto_wait cwait;
- struct scatterlist in_sg, out_sg;
- uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
- uint32_t der_pub_key_len;
- int ret;
-
- pr_devel("==>%s()\n", __func__);
-
- ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
- if (ret < 0)
- return ret;
-
- tfm = crypto_alloc_akcipher(alg_name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
- der_pub_key);
-
- ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
- if (ret < 0)
- goto error_free_tfm;
-
- ret = -ENOMEM;
- req = akcipher_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto error_free_tfm;
-
- sg_init_one(&in_sg, in, params->in_len);
- sg_init_one(&out_sg, out, params->out_len);
- akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
- params->out_len);
- crypto_init_wait(&cwait);
- akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP,
- crypto_req_done, &cwait);
-
- ret = crypto_akcipher_encrypt(req);
- ret = crypto_wait_req(ret, &cwait);
-
- if (ret == 0)
- ret = req->dst_len;
-
- akcipher_request_free(req);
-error_free_tfm:
- crypto_free_akcipher(tfm);
- pr_devel("<==%s() = %d\n", __func__, ret);
- return ret;
-}
-
-/*
- * Decryption operation is performed with the private key in the TPM.
- */
-static int tpm_key_decrypt(struct tpm_key *tk,
- struct kernel_pkey_params *params,
- const void *in, void *out)
-{
- struct tpm_buf tb;
- uint32_t keyhandle;
- uint8_t srkauth[SHA1_DIGEST_SIZE];
- uint8_t keyauth[SHA1_DIGEST_SIZE];
- int r;
-
- pr_devel("==>%s()\n", __func__);
-
- if (params->hash_algo)
- return -ENOPKG;
-
- if (strcmp(params->encoding, "pkcs1"))
- return -ENOPKG;
-
- r = tpm_buf_init(&tb, 0, 0);
- if (r)
- return r;
-
- /* TODO: Handle a non-all zero SRK authorization */
- memset(srkauth, 0, sizeof(srkauth));
-
- r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
- tk->blob, tk->blob_len, &keyhandle);
- if (r < 0) {
- pr_devel("loadkey2 failed (%d)\n", r);
- goto error;
- }
-
- /* TODO: Handle a non-all zero key authorization */
- memset(keyauth, 0, sizeof(keyauth));
-
- r = tpm_unbind(&tb, keyhandle, keyauth,
- in, params->in_len, out, params->out_len);
- if (r < 0)
- pr_devel("tpm_unbind failed (%d)\n", r);
-
- if (tpm_flushspecific(&tb, keyhandle) < 0)
- pr_devel("flushspecific failed (%d)\n", r);
-
-error:
- tpm_buf_destroy(&tb);
- pr_devel("<==%s() = %d\n", __func__, r);
- return r;
-}
-
-/*
- * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
- */
-static const u8 digest_info_md5[] = {
- 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
- 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
- 0x05, 0x00, 0x04, 0x10
-};
-
-static const u8 digest_info_sha1[] = {
- 0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
- 0x2b, 0x0e, 0x03, 0x02, 0x1a,
- 0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_rmd160[] = {
- 0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
- 0x2b, 0x24, 0x03, 0x02, 0x01,
- 0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_sha224[] = {
- 0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
- 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
- 0x05, 0x00, 0x04, 0x1c
-};
-
-static const u8 digest_info_sha256[] = {
- 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
- 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
- 0x05, 0x00, 0x04, 0x20
-};
-
-static const u8 digest_info_sha384[] = {
- 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
- 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
- 0x05, 0x00, 0x04, 0x30
-};
-
-static const u8 digest_info_sha512[] = {
- 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
- 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
- 0x05, 0x00, 0x04, 0x40
-};
-
-static const struct asn1_template {
- const char *name;
- const u8 *data;
- size_t size;
-} asn1_templates[] = {
-#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
- _(md5),
- _(sha1),
- _(rmd160),
- _(sha256),
- _(sha384),
- _(sha512),
- _(sha224),
- { NULL }
-#undef _
-};
-
-static const struct asn1_template *lookup_asn1(const char *name)
-{
- const struct asn1_template *p;
-
- for (p = asn1_templates; p->name; p++)
- if (strcmp(name, p->name) == 0)
- return p;
- return NULL;
-}
-
-/*
- * Sign operation is performed with the private key in the TPM.
- */
-static int tpm_key_sign(struct tpm_key *tk,
- struct kernel_pkey_params *params,
- const void *in, void *out)
-{
- struct tpm_buf tb;
- uint32_t keyhandle;
- uint8_t srkauth[SHA1_DIGEST_SIZE];
- uint8_t keyauth[SHA1_DIGEST_SIZE];
- void *asn1_wrapped = NULL;
- uint32_t in_len = params->in_len;
- int r;
-
- pr_devel("==>%s()\n", __func__);
-
- if (strcmp(params->encoding, "pkcs1"))
- return -ENOPKG;
-
- if (params->hash_algo) {
- const struct asn1_template *asn1 =
- lookup_asn1(params->hash_algo);
-
- if (!asn1)
- return -ENOPKG;
-
- /* request enough space for the ASN.1 template + input hash */
- asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
- if (!asn1_wrapped)
- return -ENOMEM;
-
- /* Copy ASN.1 template, then the input */
- memcpy(asn1_wrapped, asn1->data, asn1->size);
- memcpy(asn1_wrapped + asn1->size, in, in_len);
-
- in = asn1_wrapped;
- in_len += asn1->size;
- }
-
- if (in_len > tk->key_len / 8 - 11) {
- r = -EOVERFLOW;
- goto error_free_asn1_wrapped;
- }
-
- r = tpm_buf_init(&tb, 0, 0);
- if (r)
- goto error_free_asn1_wrapped;
-
- /* TODO: Handle a non-all zero SRK authorization */
- memset(srkauth, 0, sizeof(srkauth));
-
- r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
- tk->blob, tk->blob_len, &keyhandle);
- if (r < 0) {
- pr_devel("loadkey2 failed (%d)\n", r);
- goto error_free_tb;
- }
-
- /* TODO: Handle a non-all zero key authorization */
- memset(keyauth, 0, sizeof(keyauth));
-
- r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len);
- if (r < 0)
- pr_devel("tpm_sign failed (%d)\n", r);
-
- if (tpm_flushspecific(&tb, keyhandle) < 0)
- pr_devel("flushspecific failed (%d)\n", r);
-
-error_free_tb:
- tpm_buf_destroy(&tb);
-error_free_asn1_wrapped:
- kfree(asn1_wrapped);
- pr_devel("<==%s() = %d\n", __func__, r);
- return r;
-}
-
-/*
- * Do encryption, decryption and signing ops.
- */
-static int tpm_key_eds_op(struct kernel_pkey_params *params,
- const void *in, void *out)
-{
- struct tpm_key *tk = params->key->payload.data[asym_crypto];
- int ret = -EOPNOTSUPP;
-
- /* Perform the encryption calculation. */
- switch (params->op) {
- case kernel_pkey_encrypt:
- ret = tpm_key_encrypt(tk, params, in, out);
- break;
- case kernel_pkey_decrypt:
- ret = tpm_key_decrypt(tk, params, in, out);
- break;
- case kernel_pkey_sign:
- ret = tpm_key_sign(tk, params, in, out);
- break;
- default:
- BUG();
- }
-
- return ret;
-}
-
-/*
- * Verify a signature using a public key.
- */
-static int tpm_key_verify_signature(const struct key *key,
- const struct public_key_signature *sig)
-{
- const struct tpm_key *tk = key->payload.data[asym_crypto];
- struct crypto_wait cwait;
- struct crypto_akcipher *tfm;
- struct akcipher_request *req;
- struct scatterlist src_sg[2];
- char alg_name[CRYPTO_MAX_ALG_NAME];
- uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
- uint32_t der_pub_key_len;
- int ret;
-
- pr_devel("==>%s()\n", __func__);
-
- BUG_ON(!tk);
- BUG_ON(!sig);
- BUG_ON(!sig->s);
-
- if (!sig->digest)
- return -ENOPKG;
-
- ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
- if (ret < 0)
- return ret;
-
- tfm = crypto_alloc_akcipher(alg_name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
- der_pub_key);
-
- ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
- if (ret < 0)
- goto error_free_tfm;
-
- ret = -ENOMEM;
- req = akcipher_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto error_free_tfm;
-
- sg_init_table(src_sg, 2);
- sg_set_buf(&src_sg[0], sig->s, sig->s_size);
- sg_set_buf(&src_sg[1], sig->digest, sig->digest_size);
- akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size,
- sig->digest_size);
- crypto_init_wait(&cwait);
- akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP,
- crypto_req_done, &cwait);
- ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
-
- akcipher_request_free(req);
-error_free_tfm:
- crypto_free_akcipher(tfm);
- pr_devel("<==%s() = %d\n", __func__, ret);
- if (WARN_ON_ONCE(ret > 0))
- ret = -EINVAL;
- return ret;
-}
-
-/*
- * Parse enough information out of TPM_KEY structure:
- * TPM_STRUCT_VER -> 4 bytes
- * TPM_KEY_USAGE -> 2 bytes
- * TPM_KEY_FLAGS -> 4 bytes
- * TPM_AUTH_DATA_USAGE -> 1 byte
- * TPM_KEY_PARMS -> variable
- * UINT32 PCRInfoSize -> 4 bytes
- * BYTE* -> PCRInfoSize bytes
- * TPM_STORE_PUBKEY
- * UINT32 encDataSize;
- * BYTE* -> encDataSize;
- *
- * TPM_KEY_PARMS:
- * TPM_ALGORITHM_ID -> 4 bytes
- * TPM_ENC_SCHEME -> 2 bytes
- * TPM_SIG_SCHEME -> 2 bytes
- * UINT32 parmSize -> 4 bytes
- * BYTE* -> variable
- */
-static int extract_key_parameters(struct tpm_key *tk)
-{
- const void *cur = tk->blob;
- uint32_t len = tk->blob_len;
- const void *pub_key;
- uint32_t sz;
- uint32_t key_len;
-
- if (len < 11)
- return -EBADMSG;
-
- /* Ensure this is a legacy key */
- if (get_unaligned_be16(cur + 4) != 0x0015)
- return -EBADMSG;
-
- /* Skip to TPM_KEY_PARMS */
- cur += 11;
- len -= 11;
-
- if (len < 12)
- return -EBADMSG;
-
- /* Make sure this is an RSA key */
- if (get_unaligned_be32(cur) != 0x00000001)
- return -EBADMSG;
-
- /* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
- if (get_unaligned_be16(cur + 4) != 0x0002)
- return -EBADMSG;
-
- /* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
- if (get_unaligned_be16(cur + 6) != 0x0003)
- return -EBADMSG;
-
- sz = get_unaligned_be32(cur + 8);
- if (len < sz + 12)
- return -EBADMSG;
-
- /* Move to TPM_RSA_KEY_PARMS */
- len -= 12;
- cur += 12;
-
- /* Grab the RSA key length */
- key_len = get_unaligned_be32(cur);
-
- switch (key_len) {
- case 512:
- case 1024:
- case 1536:
- case 2048:
- break;
- default:
- return -EINVAL;
- }
-
- /* Move just past TPM_KEY_PARMS */
- cur += sz;
- len -= sz;
-
- if (len < 4)
- return -EBADMSG;
-
- sz = get_unaligned_be32(cur);
- if (len < 4 + sz)
- return -EBADMSG;
-
- /* Move to TPM_STORE_PUBKEY */
- cur += 4 + sz;
- len -= 4 + sz;
-
- /* Grab the size of the public key, it should jive with the key size */
- sz = get_unaligned_be32(cur);
- if (sz > 256)
- return -EINVAL;
-
- pub_key = cur + 4;
-
- tk->key_len = key_len;
- tk->pub_key = pub_key;
- tk->pub_key_len = sz;
-
- return 0;
-}
-
-/* Given the blob, parse it and load it into the TPM */
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
-{
- int r;
- struct tpm_key *tk;
-
- r = tpm_is_tpm2(NULL);
- if (r < 0)
- goto error;
-
- /* We don't support TPM2 yet */
- if (r > 0) {
- r = -ENODEV;
- goto error;
- }
-
- r = -ENOMEM;
- tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
- if (!tk)
- goto error;
-
- tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
- if (!tk->blob)
- goto error_memdup;
-
- tk->blob_len = blob_len;
-
- r = extract_key_parameters(tk);
- if (r < 0)
- goto error_extract;
-
- return tk;
-
-error_extract:
- kfree(tk->blob);
- tk->blob_len = 0;
-error_memdup:
- kfree(tk);
-error:
- return ERR_PTR(r);
-}
-EXPORT_SYMBOL_GPL(tpm_key_create);
-
-/*
- * TPM-based asymmetric key subtype
- */
-struct asymmetric_key_subtype asym_tpm_subtype = {
- .owner = THIS_MODULE,
- .name = "asym_tpm",
- .name_len = sizeof("asym_tpm") - 1,
- .describe = asym_tpm_describe,
- .destroy = asym_tpm_destroy,
- .query = tpm_key_query,
- .eds_op = tpm_key_eds_op,
- .verify_signature = tpm_key_verify_signature,
-};
-EXPORT_SYMBOL_GPL(asym_tpm_subtype);
-
-MODULE_DESCRIPTION("TPM based asymmetric key subtype");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 0b4d07aa8811..f6321c785714 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7,
pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
sinfo->index, certix);
- if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
- pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
- sinfo->index);
- continue;
- }
-
sinfo->signer = x509;
return 0;
}
@@ -226,9 +220,6 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
return 0;
}
- if (x509->unsupported_key)
- goto unsupported_crypto_in_x509;
-
pr_debug("- issuer %s\n", x509->issuer);
sig = x509->sig;
if (sig->auth_ids[0])
@@ -245,7 +236,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
* authority.
*/
if (x509->unsupported_sig)
- goto unsupported_crypto_in_x509;
+ goto unsupported_sig_in_x509;
x509->signer = x509;
pr_debug("- self-signed\n");
return 0;
@@ -309,7 +300,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
might_sleep();
}
-unsupported_crypto_in_x509:
+unsupported_sig_in_x509:
/* Just prune the certificate chain at this point if we lack some
* crypto module to go further. Note, however, we don't want to set
* sinfo->unsupported_crypto as the signed info block may still be
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 4fefb219bfdc..7c9e6be35c30 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3)
}
/*
- * Determine the crypto algorithm name.
+ * Given a public_key, and an encoding and hash_algo to be used for signing
+ * and/or verification with that key, determine the name of the corresponding
+ * akcipher algorithm. Also check that encoding and hash_algo are allowed.
*/
-static
-int software_key_determine_akcipher(const char *encoding,
- const char *hash_algo,
- const struct public_key *pkey,
- char alg_name[CRYPTO_MAX_ALG_NAME])
+static int
+software_key_determine_akcipher(const struct public_key *pkey,
+ const char *encoding, const char *hash_algo,
+ char alg_name[CRYPTO_MAX_ALG_NAME])
{
int n;
- if (strcmp(encoding, "pkcs1") == 0) {
- /* The data wangled by the RSA algorithm is typically padded
- * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
- * sec 8.2].
+ if (!encoding)
+ return -EINVAL;
+
+ if (strcmp(pkey->pkey_algo, "rsa") == 0) {
+ /*
+ * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2].
+ */
+ if (strcmp(encoding, "pkcs1") == 0) {
+ if (!hash_algo)
+ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+ "pkcs1pad(%s)",
+ pkey->pkey_algo);
+ else
+ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+ "pkcs1pad(%s,%s)",
+ pkey->pkey_algo, hash_algo);
+ return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+ }
+ if (strcmp(encoding, "raw") != 0)
+ return -EINVAL;
+ /*
+ * Raw RSA cannot differentiate between different hash
+ * algorithms.
+ */
+ if (hash_algo)
+ return -EINVAL;
+ } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
+ if (strcmp(encoding, "x962") != 0)
+ return -EINVAL;
+ /*
+ * ECDSA signatures are taken over a raw hash, so they don't
+ * differentiate between different hash algorithms. That means
+ * that the verifier should hard-code a specific hash algorithm.
+ * Unfortunately, in practice ECDSA is used with multiple SHAs,
+ * so we have to allow all of them and not just one.
*/
if (!hash_algo)
- n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
- "pkcs1pad(%s)",
- pkey->pkey_algo);
- else
- n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
- "pkcs1pad(%s,%s)",
- pkey->pkey_algo, hash_algo);
- return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
- }
-
- if (strcmp(encoding, "raw") == 0 ||
- strcmp(encoding, "x962") == 0) {
- strcpy(alg_name, pkey->pkey_algo);
- return 0;
+ return -EINVAL;
+ if (strcmp(hash_algo, "sha1") != 0 &&
+ strcmp(hash_algo, "sha224") != 0 &&
+ strcmp(hash_algo, "sha256") != 0 &&
+ strcmp(hash_algo, "sha384") != 0 &&
+ strcmp(hash_algo, "sha512") != 0)
+ return -EINVAL;
+ } else if (strcmp(pkey->pkey_algo, "sm2") == 0) {
+ if (strcmp(encoding, "raw") != 0)
+ return -EINVAL;
+ if (!hash_algo)
+ return -EINVAL;
+ if (strcmp(hash_algo, "sm3") != 0)
+ return -EINVAL;
+ } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) {
+ if (strcmp(encoding, "raw") != 0)
+ return -EINVAL;
+ if (!hash_algo)
+ return -EINVAL;
+ if (strcmp(hash_algo, "streebog256") != 0 &&
+ strcmp(hash_algo, "streebog512") != 0)
+ return -EINVAL;
+ } else {
+ /* Unknown public key algorithm */
+ return -ENOPKG;
}
-
- return -ENOPKG;
+ if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0)
+ return -EINVAL;
+ return 0;
}
static u8 *pkey_pack_u32(u8 *dst, u32 val)
@@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params,
u8 *key, *ptr;
int ret, len;
- ret = software_key_determine_akcipher(params->encoding,
- params->hash_algo,
- pkey, alg_name);
+ ret = software_key_determine_akcipher(pkey, params->encoding,
+ params->hash_algo, alg_name);
if (ret < 0)
return ret;
@@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
pr_devel("==>%s()\n", __func__);
- ret = software_key_determine_akcipher(params->encoding,
- params->hash_algo,
- pkey, alg_name);
+ ret = software_key_determine_akcipher(pkey, params->encoding,
+ params->hash_algo, alg_name);
if (ret < 0)
return ret;
@@ -325,9 +367,23 @@ int public_key_verify_signature(const struct public_key *pkey,
BUG_ON(!sig);
BUG_ON(!sig->s);
- ret = software_key_determine_akcipher(sig->encoding,
- sig->hash_algo,
- pkey, alg_name);
+ /*
+ * If the signature specifies a public key algorithm, it *must* match
+ * the key's actual public key algorithm.
+ *
+ * Small exception: ECDSA signatures don't specify the curve, but ECDSA
+ * keys do. So the strings can mismatch slightly in that case:
+ * "ecdsa-nist-*" for the key, but "ecdsa" for the signature.
+ */
+ if (sig->pkey_algo) {
+ if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 &&
+ (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 ||
+ strcmp(sig->pkey_algo, "ecdsa") != 0))
+ return -EKEYREJECTED;
+ }
+
+ ret = software_key_determine_akcipher(pkey, sig->encoding,
+ sig->hash_algo, alg_name);
if (ret < 0)
return ret;
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 4aff3eebec17..2deff81f8af5 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -35,7 +35,7 @@ void public_key_signature_free(struct public_key_signature *sig)
EXPORT_SYMBOL_GPL(public_key_signature_free);
/**
- * query_asymmetric_key - Get information about an aymmetric key.
+ * query_asymmetric_key - Get information about an asymmetric key.
* @params: Various parameters.
* @info: Where to put the information.
*/
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
deleted file mode 100644
index d7f194232f30..000000000000
--- a/crypto/asymmetric_keys/tpm.asn1
+++ /dev/null
@@ -1,5 +0,0 @@
---
--- Unencryted TPM Blob. For details of the format, see:
--- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
---
-PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
deleted file mode 100644
index 96405d8dcd98..000000000000
--- a/crypto/asymmetric_keys/tpm_parser.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "TPM-PARSER: "fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/asymmetric-parser.h>
-#include <crypto/asym_tpm_subtype.h>
-#include "tpm.asn1.h"
-
-struct tpm_parse_context {
- const void *blob;
- u32 blob_len;
-};
-
-/*
- * Note the key data of the ASN.1 blob.
- */
-int tpm_note_key(void *context, size_t hdrlen,
- unsigned char tag,
- const void *value, size_t vlen)
-{
- struct tpm_parse_context *ctx = context;
-
- ctx->blob = value;
- ctx->blob_len = vlen;
-
- return 0;
-}
-
-/*
- * Parse a TPM-encrypted private key blob.
- */
-static struct tpm_key *tpm_parse(const void *data, size_t datalen)
-{
- struct tpm_parse_context ctx;
- long ret;
-
- memset(&ctx, 0, sizeof(ctx));
-
- /* Attempt to decode the private key */
- ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
- if (ret < 0)
- goto error;
-
- return tpm_key_create(ctx.blob, ctx.blob_len);
-
-error:
- return ERR_PTR(ret);
-}
-/*
- * Attempt to parse a data blob for a key as a TPM private key blob.
- */
-static int tpm_key_preparse(struct key_preparsed_payload *prep)
-{
- struct tpm_key *tk;
-
- /*
- * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
- * more than 4x that
- */
- if (prep->datalen > 256 * 4)
- return -EMSGSIZE;
-
- tk = tpm_parse(prep->data, prep->datalen);
-
- if (IS_ERR(tk))
- return PTR_ERR(tk);
-
- /* We're pinning the module by being linked against it */
- __module_get(asym_tpm_subtype.owner);
- prep->payload.data[asym_subtype] = &asym_tpm_subtype;
- prep->payload.data[asym_key_ids] = NULL;
- prep->payload.data[asym_crypto] = tk;
- prep->payload.data[asym_auth] = NULL;
- prep->quotalen = 100;
- return 0;
-}
-
-static struct asymmetric_key_parser tpm_key_parser = {
- .owner = THIS_MODULE,
- .name = "tpm_parser",
- .parse = tpm_key_preparse,
-};
-
-static int __init tpm_key_init(void)
-{
- return register_asymmetric_key_parser(&tpm_key_parser);
-}
-
-static void __exit tpm_key_exit(void)
-{
- unregister_asymmetric_key_parser(&tpm_key_parser);
-}
-
-module_init(tpm_key_init);
-module_exit(tpm_key_exit);
-
-MODULE_DESCRIPTION("TPM private key-blob parser");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/x509.asn1 b/crypto/asymmetric_keys/x509.asn1
index 5c9f4e4a5231..92d59c32f96a 100644
--- a/crypto/asymmetric_keys/x509.asn1
+++ b/crypto/asymmetric_keys/x509.asn1
@@ -7,7 +7,7 @@ Certificate ::= SEQUENCE {
TBSCertificate ::= SEQUENCE {
version [ 0 ] Version DEFAULT,
serialNumber CertificateSerialNumber ({ x509_note_serial }),
- signature AlgorithmIdentifier ({ x509_note_pkey_algo }),
+ signature AlgorithmIdentifier ({ x509_note_sig_algo }),
issuer Name ({ x509_note_issuer }),
validity Validity,
subject Name ({ x509_note_subject }),
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 083405eb80c3..2899ed80bb18 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -19,15 +19,13 @@
struct x509_parse_context {
struct x509_certificate *cert; /* Certificate being constructed */
unsigned long data; /* Start of data */
- const void *cert_start; /* Start of cert content */
const void *key; /* Key data */
size_t key_size; /* Size of key data */
const void *params; /* Key parameters */
size_t params_size; /* Size of key parameters */
- enum OID key_algo; /* Public key algorithm */
+ enum OID key_algo; /* Algorithm used by the cert's key */
enum OID last_oid; /* Last OID encountered */
- enum OID algo_oid; /* Algorithm OID */
- unsigned char nr_mpi; /* Number of MPIs stored */
+ enum OID sig_algo; /* Algorithm used to sign the cert */
u8 o_size; /* Size of organizationName (O) */
u8 cn_size; /* Size of commonName (CN) */
u8 email_size; /* Size of emailAddress */
@@ -187,11 +185,10 @@ int x509_note_tbs_certificate(void *context, size_t hdrlen,
}
/*
- * Record the public key algorithm
+ * Record the algorithm that was used to sign this certificate.
*/
-int x509_note_pkey_algo(void *context, size_t hdrlen,
- unsigned char tag,
- const void *value, size_t vlen)
+int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag,
+ const void *value, size_t vlen)
{
struct x509_parse_context *ctx = context;
@@ -263,22 +260,22 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
rsa_pkcs1:
ctx->cert->sig->pkey_algo = "rsa";
ctx->cert->sig->encoding = "pkcs1";
- ctx->algo_oid = ctx->last_oid;
+ ctx->sig_algo = ctx->last_oid;
return 0;
ecrdsa:
ctx->cert->sig->pkey_algo = "ecrdsa";
ctx->cert->sig->encoding = "raw";
- ctx->algo_oid = ctx->last_oid;
+ ctx->sig_algo = ctx->last_oid;
return 0;
sm2:
ctx->cert->sig->pkey_algo = "sm2";
ctx->cert->sig->encoding = "raw";
- ctx->algo_oid = ctx->last_oid;
+ ctx->sig_algo = ctx->last_oid;
return 0;
ecdsa:
ctx->cert->sig->pkey_algo = "ecdsa";
ctx->cert->sig->encoding = "x962";
- ctx->algo_oid = ctx->last_oid;
+ ctx->sig_algo = ctx->last_oid;
return 0;
}
@@ -291,11 +288,16 @@ int x509_note_signature(void *context, size_t hdrlen,
{
struct x509_parse_context *ctx = context;
- pr_debug("Signature type: %u size %zu\n", ctx->last_oid, vlen);
+ pr_debug("Signature: alg=%u, size=%zu\n", ctx->last_oid, vlen);
- if (ctx->last_oid != ctx->algo_oid) {
- pr_warn("Got cert with pkey (%u) and sig (%u) algorithm OIDs\n",
- ctx->algo_oid, ctx->last_oid);
+ /*
+ * In X.509 certificates, the signature's algorithm is stored in two
+ * places: inside the TBSCertificate (the data that is signed), and
+ * alongside the signature. These *must* match.
+ */
+ if (ctx->last_oid != ctx->sig_algo) {
+ pr_warn("signatureAlgorithm (%u) differs from tbsCertificate.signature (%u)\n",
+ ctx->last_oid, ctx->sig_algo);
return -EINVAL;
}
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index c233f136fb35..97a886cbe01c 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -22,7 +22,7 @@ struct x509_certificate {
time64_t valid_to;
const void *tbs; /* Signed data */
unsigned tbs_size; /* Size of signed data */
- unsigned raw_sig_size; /* Size of sigature */
+ unsigned raw_sig_size; /* Size of signature */
const void *raw_sig; /* Signature data */
const void *raw_serial; /* Raw serial number in ASN.1 */
unsigned raw_serial_size;
@@ -36,7 +36,6 @@ struct x509_certificate {
bool seen; /* Infinite recursion prevention */
bool verified;
bool self_signed; /* T if self-signed (check unsupported_sig too) */
- bool unsupported_key; /* T if key uses unsupported crypto */
bool unsupported_sig; /* T if signature uses unsupported crypto */
bool blacklisted;
};
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index fe14cae115b5..91a4ad50dea2 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -33,18 +33,6 @@ int x509_get_sig_params(struct x509_certificate *cert)
sig->data = cert->tbs;
sig->data_size = cert->tbs_size;
- if (!cert->pub->pkey_algo)
- cert->unsupported_key = true;
-
- if (!sig->pkey_algo)
- cert->unsupported_sig = true;
-
- /* We check the hash if we can - even if we can't then verify it */
- if (!sig->hash_algo) {
- cert->unsupported_sig = true;
- return 0;
- }
-
sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL);
if (!sig->s)
return -ENOMEM;
@@ -128,12 +116,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
goto out;
}
- ret = -EKEYREJECTED;
- if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 &&
- (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 ||
- strcmp(cert->sig->pkey_algo, "ecdsa") != 0))
- goto out;
-
ret = public_key_verify_signature(cert->pub, cert->sig);
if (ret < 0) {
if (ret == -ENOPKG) {
@@ -173,12 +155,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
pr_devel("Cert Issuer: %s\n", cert->issuer);
pr_devel("Cert Subject: %s\n", cert->subject);
-
- if (cert->unsupported_key) {
- ret = -ENOPKG;
- goto error_free_cert;
- }
-
pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo);
pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to);
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index d8a91521144e..1a3855284091 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -170,8 +170,8 @@ dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset,
*
* xor_blocks always uses the dest as a source so the
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation. The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation. The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
* in the source list.
*
* src_list note: if the dest is also a source it must be at index zero.
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(async_xor_offs);
*
* xor_blocks always uses the dest as a source so the
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation. The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation. The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
* in the source list.
*
* src_list note: if the dest is also a source it must be at index zero.
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 66db82e5a3b1..c9d218e53bcb 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -217,7 +217,7 @@ static int raid6_test(void)
err += test(12, &tests);
}
- /* the 24 disk case is special for ioatdma as it is the boudary point
+ /* the 24 disk case is special for ioatdma as it is the boundary point
* at which it needs to switch from 8-source ops to 16-source
* ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
*/
@@ -241,7 +241,7 @@ static void raid6_test_exit(void)
}
/* when compiled-in wait for drivers to load first (assumes dma drivers
- * are also compliled-in)
+ * are also compiled-in)
*/
late_initcall(raid6_test);
module_exit(raid6_test_exit);
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 670bf1a01d00..17f674a7cdff 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
skcipher_request_set_tfm(skreq, ctx->enc);
- skcipher_request_set_callback(skreq, aead_request_flags(req),
+ skcipher_request_set_callback(skreq, flags,
req->base.complete, req->base.data);
skcipher_request_set_crypt(skreq, src, dst,
req->cryptlen - authsize, req->iv);
diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
index 72fe480f9bd6..5f96a21f8788 100644
--- a/crypto/blake2s_generic.c
+++ b/crypto/blake2s_generic.c
@@ -15,12 +15,12 @@
static int crypto_blake2s_update_generic(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic);
+ return crypto_blake2s_update(desc, in, inlen, true);
}
static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
{
- return crypto_blake2s_final(desc, out, blake2s_compress_generic);
+ return crypto_blake2s_final(desc, out, true);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 0d664dfb47bc..5c36b7b65e2a 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -1,4 +1,4 @@
-//SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
/*
* CFB: Cipher FeedBack mode
*
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index fb07da9920ee..6056a990c9f2 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -53,6 +53,7 @@ static void crypto_finalize_request(struct crypto_engine *engine,
dev_err(engine->dev, "failed to unprepare request\n");
}
}
+ lockdep_assert_in_softirq();
req->complete(req, err);
kthread_queue_work(engine->kworker, &engine->pump_requests);
diff --git a/crypto/dh.c b/crypto/dh.c
index 27e62a2a8027..4406aeb1ff61 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -10,11 +10,11 @@
#include <crypto/internal/kpp.h>
#include <crypto/kpp.h>
#include <crypto/dh.h>
+#include <crypto/rng.h>
#include <linux/mpi.h>
struct dh_ctx {
MPI p; /* Value is guaranteed to be set. */
- MPI q; /* Value is optional. */
MPI g; /* Value is guaranteed to be set. */
MPI xa; /* Value is guaranteed to be set. */
};
@@ -22,7 +22,6 @@ struct dh_ctx {
static void dh_clear_ctx(struct dh_ctx *ctx)
{
mpi_free(ctx->p);
- mpi_free(ctx->q);
mpi_free(ctx->g);
mpi_free(ctx->xa);
memset(ctx, 0, sizeof(*ctx));
@@ -62,12 +61,6 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
if (!ctx->p)
return -EINVAL;
- if (params->q && params->q_size) {
- ctx->q = mpi_read_raw_data(params->q, params->q_size);
- if (!ctx->q)
- return -EINVAL;
- }
-
ctx->g = mpi_read_raw_data(params->g, params->g_size);
if (!ctx->g)
return -EINVAL;
@@ -104,11 +97,12 @@ err_clear_ctx:
/*
* SP800-56A public key verification:
*
- * * If Q is provided as part of the domain paramenters, a full validation
- * according to SP800-56A section 5.6.2.3.1 is performed.
+ * * For the safe-prime groups in FIPS mode, Q can be computed
+ * trivially from P and a full validation according to SP800-56A
+ * section 5.6.2.3.1 is performed.
*
- * * If Q is not provided, a partial validation according to SP800-56A section
- * 5.6.2.3.2 is performed.
+ * * For all other sets of group parameters, only a partial validation
+ * according to SP800-56A section 5.6.2.3.2 is performed.
*/
static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
{
@@ -119,21 +113,40 @@ static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
* Step 1: Verify that 2 <= y <= p - 2.
*
* The upper limit check is actually y < p instead of y < p - 1
- * as the mpi_sub_ui function is yet missing.
+ * in order to save one mpi_sub_ui() invocation here. Note that
+ * p - 1 is the non-trivial element of the subgroup of order 2 and
+ * thus, the check on y^q below would fail if y == p - 1.
*/
if (mpi_cmp_ui(y, 1) < 1 || mpi_cmp(y, ctx->p) >= 0)
return -EINVAL;
- /* Step 2: Verify that 1 = y^q mod p */
- if (ctx->q) {
- MPI val = mpi_alloc(0);
+ /*
+ * Step 2: Verify that 1 = y^q mod p
+ *
+ * For the safe-prime groups q = (p - 1)/2.
+ */
+ if (fips_enabled) {
+ MPI val, q;
int ret;
+ val = mpi_alloc(0);
if (!val)
return -ENOMEM;
- ret = mpi_powm(val, y, ctx->q, ctx->p);
+ q = mpi_alloc(mpi_get_nlimbs(ctx->p));
+ if (!q) {
+ mpi_free(val);
+ return -ENOMEM;
+ }
+
+ /*
+ * ->p is odd, so no need to explicitly subtract one
+ * from it before shifting to the right.
+ */
+ mpi_rshift(q, ctx->p, 1);
+ ret = mpi_powm(val, y, q, ctx->p);
+ mpi_free(q);
if (ret) {
mpi_free(val);
return ret;
@@ -263,13 +276,645 @@ static struct kpp_alg dh = {
},
};
+
+struct dh_safe_prime {
+ unsigned int max_strength;
+ unsigned int p_size;
+ const char *p;
+};
+
+static const char safe_prime_g[] = { 2 };
+
+struct dh_safe_prime_instance_ctx {
+ struct crypto_kpp_spawn dh_spawn;
+ const struct dh_safe_prime *safe_prime;
+};
+
+struct dh_safe_prime_tfm_ctx {
+ struct crypto_kpp *dh_tfm;
+};
+
+static void dh_safe_prime_free_instance(struct kpp_instance *inst)
+{
+ struct dh_safe_prime_instance_ctx *ctx = kpp_instance_ctx(inst);
+
+ crypto_drop_kpp(&ctx->dh_spawn);
+ kfree(inst);
+}
+
+static inline struct dh_safe_prime_instance_ctx *dh_safe_prime_instance_ctx(
+ struct crypto_kpp *tfm)
+{
+ return kpp_instance_ctx(kpp_alg_instance(tfm));
+}
+
+static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
+{
+ struct dh_safe_prime_instance_ctx *inst_ctx =
+ dh_safe_prime_instance_ctx(tfm);
+ struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+ tfm_ctx->dh_tfm = crypto_spawn_kpp(&inst_ctx->dh_spawn);
+ if (IS_ERR(tfm_ctx->dh_tfm))
+ return PTR_ERR(tfm_ctx->dh_tfm);
+
+ return 0;
+}
+
+static void dh_safe_prime_exit_tfm(struct crypto_kpp *tfm)
+{
+ struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+ crypto_free_kpp(tfm_ctx->dh_tfm);
+}
+
+static u64 __add_u64_to_be(__be64 *dst, unsigned int n, u64 val)
+{
+ unsigned int i;
+
+ for (i = n; val && i > 0; --i) {
+ u64 tmp = be64_to_cpu(dst[i - 1]);
+
+ tmp += val;
+ val = tmp >= val ? 0 : 1;
+ dst[i - 1] = cpu_to_be64(tmp);
+ }
+
+ return val;
+}
+
+static void *dh_safe_prime_gen_privkey(const struct dh_safe_prime *safe_prime,
+ unsigned int *key_size)
+{
+ unsigned int n, oversampling_size;
+ __be64 *key;
+ int err;
+ u64 h, o;
+
+ /*
+ * Generate a private key following NIST SP800-56Ar3,
+ * sec. 5.6.1.1.1 and 5.6.1.1.3 resp..
+ *
+ * 5.6.1.1.1: choose key length N such that
+ * 2 * ->max_strength <= N <= log2(q) + 1 = ->p_size * 8 - 1
+ * with q = (p - 1) / 2 for the safe-prime groups.
+ * Choose the lower bound's next power of two for N in order to
+ * avoid excessively large private keys while still
+ * maintaining some extra reserve beyond the bare minimum in
+ * most cases. Note that for each entry in safe_prime_groups[],
+ * the following holds for such N:
+ * - N >= 256, in particular it is a multiple of 2^6 = 64
+ * bits and
+ * - N < log2(q) + 1, i.e. N respects the upper bound.
+ */
+ n = roundup_pow_of_two(2 * safe_prime->max_strength);
+ WARN_ON_ONCE(n & ((1u << 6) - 1));
+ n >>= 6; /* Convert N into units of u64. */
+
+ /*
+ * Reserve one extra u64 to hold the extra random bits
+ * required as per 5.6.1.1.3.
+ */
+ oversampling_size = (n + 1) * sizeof(__be64);
+ key = kmalloc(oversampling_size, GFP_KERNEL);
+ if (!key)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * 5.6.1.1.3, step 3 (and implicitly step 4): obtain N + 64
+ * random bits and interpret them as a big endian integer.
+ */
+ err = -EFAULT;
+ if (crypto_get_default_rng())
+ goto out_err;
+
+ err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)key,
+ oversampling_size);
+ crypto_put_default_rng();
+ if (err)
+ goto out_err;
+
+ /*
+ * 5.6.1.1.3, step 5 is implicit: 2^N < q and thus,
+ * M = min(2^N, q) = 2^N.
+ *
+ * For step 6, calculate
+ * key = (key[] mod (M - 1)) + 1 = (key[] mod (2^N - 1)) + 1.
+ *
+ * In order to avoid expensive divisions, note that
+ * 2^N mod (2^N - 1) = 1 and thus, for any integer h,
+ * 2^N * h mod (2^N - 1) = h mod (2^N - 1) always holds.
+ * The big endian integer key[] composed of n + 1 64bit words
+ * may be written as key[] = h * 2^N + l, with h = key[0]
+ * representing the 64 most significant bits and l
+ * corresponding to the remaining 2^N bits. With the remark
+ * from above,
+ * h * 2^N + l mod (2^N - 1) = l + h mod (2^N - 1).
+ * As both, l and h are less than 2^N, their sum after
+ * this first reduction is guaranteed to be <= 2^(N + 1) - 2.
+ * Or equivalently, that their sum can again be written as
+ * h' * 2^N + l' with h' now either zero or one and if one,
+ * then l' <= 2^N - 2. Thus, all bits at positions >= N will
+ * be zero after a second reduction:
+ * h' * 2^N + l' mod (2^N - 1) = l' + h' mod (2^N - 1).
+ * At this point, it is still possible that
+ * l' + h' = 2^N - 1, i.e. that l' + h' mod (2^N - 1)
+ * is zero. This condition will be detected below by means of
+ * the final increment overflowing in this case.
+ */
+ h = be64_to_cpu(key[0]);
+ h = __add_u64_to_be(key + 1, n, h);
+ h = __add_u64_to_be(key + 1, n, h);
+ WARN_ON_ONCE(h);
+
+ /* Increment to obtain the final result. */
+ o = __add_u64_to_be(key + 1, n, 1);
+ /*
+ * The overflow bit o from the increment is either zero or
+ * one. If zero, key[1:n] holds the final result in big-endian
+ * order. If one, key[1:n] is zero now, but needs to be set to
+ * one, c.f. above.
+ */
+ if (o)
+ key[n] = cpu_to_be64(1);
+
+ /* n is in units of u64, convert to bytes. */
+ *key_size = n << 3;
+ /* Strip the leading extra __be64, which is (virtually) zero by now. */
+ memmove(key, &key[1], *key_size);
+
+ return key;
+
+out_err:
+ kfree_sensitive(key);
+ return ERR_PTR(err);
+}
+
+static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
+ unsigned int len)
+{
+ struct dh_safe_prime_instance_ctx *inst_ctx =
+ dh_safe_prime_instance_ctx(tfm);
+ struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+ struct dh params = {};
+ void *buf = NULL, *key = NULL;
+ unsigned int buf_size;
+ int err;
+
+ if (buffer) {
+ err = __crypto_dh_decode_key(buffer, len, &params);
+ if (err)
+ return err;
+ if (params.p_size || params.g_size)
+ return -EINVAL;
+ }
+
+ params.p = inst_ctx->safe_prime->p;
+ params.p_size = inst_ctx->safe_prime->p_size;
+ params.g = safe_prime_g;
+ params.g_size = sizeof(safe_prime_g);
+
+ if (!params.key_size) {
+ key = dh_safe_prime_gen_privkey(inst_ctx->safe_prime,
+ &params.key_size);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ params.key = key;
+ }
+
+ buf_size = crypto_dh_key_len(&params);
+ buf = kmalloc(buf_size, GFP_KERNEL);
+ if (!buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = crypto_dh_encode_key(buf, buf_size, &params);
+ if (err)
+ goto out;
+
+ err = crypto_kpp_set_secret(tfm_ctx->dh_tfm, buf, buf_size);
+out:
+ kfree_sensitive(buf);
+ kfree_sensitive(key);
+ return err;
+}
+
+static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req,
+ int err)
+{
+ struct kpp_request *req = dh_req->data;
+
+ kpp_request_complete(req, err);
+}
+
+static struct kpp_request *dh_safe_prime_prepare_dh_req(struct kpp_request *req)
+{
+ struct dh_safe_prime_tfm_ctx *tfm_ctx =
+ kpp_tfm_ctx(crypto_kpp_reqtfm(req));
+ struct kpp_request *dh_req = kpp_request_ctx(req);
+
+ kpp_request_set_tfm(dh_req, tfm_ctx->dh_tfm);
+ kpp_request_set_callback(dh_req, req->base.flags,
+ dh_safe_prime_complete_req, req);
+
+ kpp_request_set_input(dh_req, req->src, req->src_len);
+ kpp_request_set_output(dh_req, req->dst, req->dst_len);
+
+ return dh_req;
+}
+
+static int dh_safe_prime_generate_public_key(struct kpp_request *req)
+{
+ struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+ return crypto_kpp_generate_public_key(dh_req);
+}
+
+static int dh_safe_prime_compute_shared_secret(struct kpp_request *req)
+{
+ struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+ return crypto_kpp_compute_shared_secret(dh_req);
+}
+
+static unsigned int dh_safe_prime_max_size(struct crypto_kpp *tfm)
+{
+ struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+ return crypto_kpp_maxsize(tfm_ctx->dh_tfm);
+}
+
+static int __maybe_unused __dh_safe_prime_create(
+ struct crypto_template *tmpl, struct rtattr **tb,
+ const struct dh_safe_prime *safe_prime)
+{
+ struct kpp_instance *inst;
+ struct dh_safe_prime_instance_ctx *ctx;
+ const char *dh_name;
+ struct kpp_alg *dh_alg;
+ u32 mask;
+ int err;
+
+ err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_KPP, &mask);
+ if (err)
+ return err;
+
+ dh_name = crypto_attr_alg_name(tb[1]);
+ if (IS_ERR(dh_name))
+ return PTR_ERR(dh_name);
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ ctx = kpp_instance_ctx(inst);
+
+ err = crypto_grab_kpp(&ctx->dh_spawn, kpp_crypto_instance(inst),
+ dh_name, 0, mask);
+ if (err)
+ goto err_free_inst;
+
+ err = -EINVAL;
+ dh_alg = crypto_spawn_kpp_alg(&ctx->dh_spawn);
+ if (strcmp(dh_alg->base.cra_name, "dh"))
+ goto err_free_inst;
+
+ ctx->safe_prime = safe_prime;
+
+ err = crypto_inst_setname(kpp_crypto_instance(inst),
+ tmpl->name, &dh_alg->base);
+ if (err)
+ goto err_free_inst;
+
+ inst->alg.set_secret = dh_safe_prime_set_secret;
+ inst->alg.generate_public_key = dh_safe_prime_generate_public_key;
+ inst->alg.compute_shared_secret = dh_safe_prime_compute_shared_secret;
+ inst->alg.max_size = dh_safe_prime_max_size;
+ inst->alg.init = dh_safe_prime_init_tfm;
+ inst->alg.exit = dh_safe_prime_exit_tfm;
+ inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize;
+ inst->alg.base.cra_priority = dh_alg->base.cra_priority;
+ inst->alg.base.cra_module = THIS_MODULE;
+ inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx);
+
+ inst->free = dh_safe_prime_free_instance;
+
+ err = kpp_register_instance(tmpl, inst);
+ if (err)
+ goto err_free_inst;
+
+ return 0;
+
+err_free_inst:
+ dh_safe_prime_free_instance(inst);
+
+ return err;
+}
+
+#ifdef CONFIG_CRYPTO_DH_RFC7919_GROUPS
+
+static const struct dh_safe_prime ffdhe2048_prime = {
+ .max_strength = 112,
+ .p_size = 256,
+ .p =
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+ "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+ "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+ "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+ "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+ "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+ "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+ "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+ "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+ "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+ "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+ "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+ "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+ "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+ "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+ "\x88\x6b\x42\x38\x61\x28\x5c\x97\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe3072_prime = {
+ .max_strength = 128,
+ .p_size = 384,
+ .p =
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+ "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+ "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+ "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+ "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+ "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+ "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+ "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+ "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+ "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+ "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+ "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+ "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+ "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+ "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+ "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+ "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+ "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+ "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+ "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+ "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+ "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+ "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+ "\x25\xe4\x1d\x2b\x66\xc6\x2e\x37\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe4096_prime = {
+ .max_strength = 152,
+ .p_size = 512,
+ .p =
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+ "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+ "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+ "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+ "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+ "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+ "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+ "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+ "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+ "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+ "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+ "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+ "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+ "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+ "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+ "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+ "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+ "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+ "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+ "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+ "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+ "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+ "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+ "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+ "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+ "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+ "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+ "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+ "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+ "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+ "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+ "\xc6\x8a\x00\x7e\x5e\x65\x5f\x6a\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe6144_prime = {
+ .max_strength = 176,
+ .p_size = 768,
+ .p =
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+ "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+ "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+ "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+ "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+ "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+ "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+ "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+ "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+ "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+ "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+ "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+ "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+ "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+ "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+ "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+ "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+ "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+ "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+ "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+ "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+ "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+ "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+ "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+ "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+ "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+ "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+ "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+ "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+ "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+ "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+ "\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+ "\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+ "\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+ "\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+ "\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+ "\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+ "\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+ "\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+ "\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+ "\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+ "\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+ "\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+ "\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+ "\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+ "\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+ "\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+ "\xa4\x0e\x32\x9c\xd0\xe4\x0e\x65\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe8192_prime = {
+ .max_strength = 200,
+ .p_size = 1024,
+ .p =
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+ "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+ "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+ "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+ "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+ "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+ "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+ "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+ "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+ "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+ "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+ "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+ "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+ "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+ "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+ "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+ "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+ "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+ "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+ "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+ "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+ "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+ "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+ "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+ "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+ "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+ "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+ "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+ "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+ "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+ "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+ "\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+ "\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+ "\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+ "\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+ "\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+ "\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+ "\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+ "\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+ "\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+ "\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+ "\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+ "\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+ "\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+ "\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+ "\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+ "\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+ "\xa4\x0e\x32\x9c\xcf\xf4\x6a\xaa\x36\xad\x00\x4c\xf6\x00\xc8\x38"
+ "\x1e\x42\x5a\x31\xd9\x51\xae\x64\xfd\xb2\x3f\xce\xc9\x50\x9d\x43"
+ "\x68\x7f\xeb\x69\xed\xd1\xcc\x5e\x0b\x8c\xc3\xbd\xf6\x4b\x10\xef"
+ "\x86\xb6\x31\x42\xa3\xab\x88\x29\x55\x5b\x2f\x74\x7c\x93\x26\x65"
+ "\xcb\x2c\x0f\x1c\xc0\x1b\xd7\x02\x29\x38\x88\x39\xd2\xaf\x05\xe4"
+ "\x54\x50\x4a\xc7\x8b\x75\x82\x82\x28\x46\xc0\xba\x35\xc3\x5f\x5c"
+ "\x59\x16\x0c\xc0\x46\xfd\x82\x51\x54\x1f\xc6\x8c\x9c\x86\xb0\x22"
+ "\xbb\x70\x99\x87\x6a\x46\x0e\x74\x51\xa8\xa9\x31\x09\x70\x3f\xee"
+ "\x1c\x21\x7e\x6c\x38\x26\xe5\x2c\x51\xaa\x69\x1e\x0e\x42\x3c\xfc"
+ "\x99\xe9\xe3\x16\x50\xc1\x21\x7b\x62\x48\x16\xcd\xad\x9a\x95\xf9"
+ "\xd5\xb8\x01\x94\x88\xd9\xc0\xa0\xa1\xfe\x30\x75\xa5\x77\xe2\x31"
+ "\x83\xf8\x1d\x4a\x3f\x2f\xa4\x57\x1e\xfc\x8c\xe0\xba\x8a\x4f\xe8"
+ "\xb6\x85\x5d\xfe\x72\xb0\xa6\x6e\xde\xd2\xfb\xab\xfb\xe5\x8a\x30"
+ "\xfa\xfa\xbe\x1c\x5d\x71\xa8\x7e\x2f\x74\x1e\xf8\xc1\xfe\x86\xfe"
+ "\xa6\xbb\xfd\xe5\x30\x67\x7f\x0d\x97\xd1\x1d\x49\xf7\xa8\x44\x3d"
+ "\x08\x22\xe5\x06\xa9\xf4\x61\x4e\x01\x1e\x2a\x94\x83\x8f\xf8\x8c"
+ "\xd6\x8c\x8b\xb7\xc5\xc6\x42\x4c\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static int dh_ffdhe2048_create(struct crypto_template *tmpl,
+ struct rtattr **tb)
+{
+ return __dh_safe_prime_create(tmpl, tb, &ffdhe2048_prime);
+}
+
+static int dh_ffdhe3072_create(struct crypto_template *tmpl,
+ struct rtattr **tb)
+{
+ return __dh_safe_prime_create(tmpl, tb, &ffdhe3072_prime);
+}
+
+static int dh_ffdhe4096_create(struct crypto_template *tmpl,
+ struct rtattr **tb)
+{
+ return __dh_safe_prime_create(tmpl, tb, &ffdhe4096_prime);
+}
+
+static int dh_ffdhe6144_create(struct crypto_template *tmpl,
+ struct rtattr **tb)
+{
+ return __dh_safe_prime_create(tmpl, tb, &ffdhe6144_prime);
+}
+
+static int dh_ffdhe8192_create(struct crypto_template *tmpl,
+ struct rtattr **tb)
+{
+ return __dh_safe_prime_create(tmpl, tb, &ffdhe8192_prime);
+}
+
+static struct crypto_template crypto_ffdhe_templates[] = {
+ {
+ .name = "ffdhe2048",
+ .create = dh_ffdhe2048_create,
+ .module = THIS_MODULE,
+ },
+ {
+ .name = "ffdhe3072",
+ .create = dh_ffdhe3072_create,
+ .module = THIS_MODULE,
+ },
+ {
+ .name = "ffdhe4096",
+ .create = dh_ffdhe4096_create,
+ .module = THIS_MODULE,
+ },
+ {
+ .name = "ffdhe6144",
+ .create = dh_ffdhe6144_create,
+ .module = THIS_MODULE,
+ },
+ {
+ .name = "ffdhe8192",
+ .create = dh_ffdhe8192_create,
+ .module = THIS_MODULE,
+ },
+};
+
+#else /* ! CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+static struct crypto_template crypto_ffdhe_templates[] = {};
+
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+
static int dh_init(void)
{
- return crypto_register_kpp(&dh);
+ int err;
+
+ err = crypto_register_kpp(&dh);
+ if (err)
+ return err;
+
+ err = crypto_register_templates(crypto_ffdhe_templates,
+ ARRAY_SIZE(crypto_ffdhe_templates));
+ if (err) {
+ crypto_unregister_kpp(&dh);
+ return err;
+ }
+
+ return 0;
}
static void dh_exit(void)
{
+ crypto_unregister_templates(crypto_ffdhe_templates,
+ ARRAY_SIZE(crypto_ffdhe_templates));
crypto_unregister_kpp(&dh);
}
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 9fd5a42eea15..2d499879328b 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -10,7 +10,7 @@
#include <crypto/dh.h>
#include <crypto/kpp.h>
-#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 4 * sizeof(int))
+#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int))
static inline u8 *dh_pack_data(u8 *dst, u8 *end, const void *src, size_t size)
{
@@ -28,7 +28,7 @@ static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size)
static inline unsigned int dh_data_size(const struct dh *p)
{
- return p->key_size + p->p_size + p->q_size + p->g_size;
+ return p->key_size + p->p_size + p->g_size;
}
unsigned int crypto_dh_key_len(const struct dh *p)
@@ -53,11 +53,9 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
ptr = dh_pack_data(ptr, end, &params->key_size,
sizeof(params->key_size));
ptr = dh_pack_data(ptr, end, &params->p_size, sizeof(params->p_size));
- ptr = dh_pack_data(ptr, end, &params->q_size, sizeof(params->q_size));
ptr = dh_pack_data(ptr, end, &params->g_size, sizeof(params->g_size));
ptr = dh_pack_data(ptr, end, params->key, params->key_size);
ptr = dh_pack_data(ptr, end, params->p, params->p_size);
- ptr = dh_pack_data(ptr, end, params->q, params->q_size);
ptr = dh_pack_data(ptr, end, params->g, params->g_size);
if (ptr != end)
return -EINVAL;
@@ -65,7 +63,7 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
}
EXPORT_SYMBOL_GPL(crypto_dh_encode_key);
-int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+int __crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
{
const u8 *ptr = buf;
struct kpp_secret secret;
@@ -79,28 +77,36 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size));
- ptr = dh_unpack_data(&params->q_size, ptr, sizeof(params->q_size));
ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size));
if (secret.len != crypto_dh_key_len(params))
return -EINVAL;
+ /* Don't allocate memory. Set pointers to data within
+ * the given buffer
+ */
+ params->key = (void *)ptr;
+ params->p = (void *)(ptr + params->key_size);
+ params->g = (void *)(ptr + params->key_size + params->p_size);
+
+ return 0;
+}
+
+int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+{
+ int err;
+
+ err = __crypto_dh_decode_key(buf, len, params);
+ if (err)
+ return err;
+
/*
* Don't permit the buffer for 'key' or 'g' to be larger than 'p', since
* some drivers assume otherwise.
*/
if (params->key_size > params->p_size ||
- params->g_size > params->p_size || params->q_size > params->p_size)
+ params->g_size > params->p_size)
return -EINVAL;
- /* Don't allocate memory. Set pointers to data within
- * the given buffer
- */
- params->key = (void *)ptr;
- params->p = (void *)(ptr + params->key_size);
- params->q = (void *)(ptr + params->key_size + params->p_size);
- params->g = (void *)(ptr + params->key_size + params->p_size +
- params->q_size);
-
/*
* Don't permit 'p' to be 0. It's not a prime number, and it's subject
* to corner cases such as 'mod 0' being undefined or
@@ -109,10 +115,6 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
if (memchr_inv(params->p, 0, params->p_size) == NULL)
return -EINVAL;
- /* It is permissible to not provide Q. */
- if (params->q_size == 0)
- params->q = NULL;
-
return 0;
}
EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 25856aa7ccbf..3610ff0b6739 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -15,6 +15,7 @@
#include <crypto/internal/hash.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
+#include <linux/fips.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -51,6 +52,9 @@ static int hmac_setkey(struct crypto_shash *parent,
SHASH_DESC_ON_STACK(shash, hash);
unsigned int i;
+ if (fips_enabled && (keylen < 112 / 8))
+ return -EINVAL;
+
shash->tfm = hash;
if (keylen > bs) {
diff --git a/crypto/kpp.c b/crypto/kpp.c
index 313b2c699963..7aa6ba4b60a4 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -68,9 +68,17 @@ static int crypto_kpp_init_tfm(struct crypto_tfm *tfm)
return 0;
}
+static void crypto_kpp_free_instance(struct crypto_instance *inst)
+{
+ struct kpp_instance *kpp = kpp_instance(inst);
+
+ kpp->free(kpp);
+}
+
static const struct crypto_type crypto_kpp_type = {
.extsize = crypto_alg_extsize,
.init_tfm = crypto_kpp_init_tfm,
+ .free = crypto_kpp_free_instance,
#ifdef CONFIG_PROC_FS
.show = crypto_kpp_show,
#endif
@@ -87,6 +95,15 @@ struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask)
}
EXPORT_SYMBOL_GPL(crypto_alloc_kpp);
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+ struct crypto_instance *inst,
+ const char *name, u32 type, u32 mask)
+{
+ spawn->base.frontend = &crypto_kpp_type;
+ return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_kpp);
+
static void kpp_prepare_alg(struct kpp_alg *alg)
{
struct crypto_alg *base = &alg->base;
@@ -111,5 +128,17 @@ void crypto_unregister_kpp(struct kpp_alg *alg)
}
EXPORT_SYMBOL_GPL(crypto_unregister_kpp);
+int kpp_register_instance(struct crypto_template *tmpl,
+ struct kpp_instance *inst)
+{
+ if (WARN_ON(!inst->free))
+ return -EINVAL;
+
+ kpp_prepare_alg(&inst->alg);
+
+ return crypto_register_instance(tmpl, kpp_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(kpp_register_instance);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Key-agreement Protocol Primitives");
diff --git a/crypto/lrw.c b/crypto/lrw.c
index bcf09fbc750a..8d59a66b6525 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -428,3 +428,4 @@ module_exit(lrw_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LRW block cipher mode");
MODULE_ALIAS_CRYPTO("lrw");
+MODULE_SOFTDEP("pre: ecb");
diff --git a/crypto/memneq.c b/crypto/memneq.c
index afed1bd16aee..fb11608b1ec1 100644
--- a/crypto/memneq.c
+++ b/crypto/memneq.c
@@ -60,6 +60,7 @@
*/
#include <crypto/algapi.h>
+#include <asm/unaligned.h>
#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
@@ -71,7 +72,8 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
while (size >= sizeof(unsigned long)) {
- neq |= *(unsigned long *)a ^ *(unsigned long *)b;
+ neq |= get_unaligned((unsigned long *)a) ^
+ get_unaligned((unsigned long *)b);
OPTIMIZER_HIDE_VAR(neq);
a += sizeof(unsigned long);
b += sizeof(unsigned long);
@@ -95,18 +97,24 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
if (sizeof(unsigned long) == 8) {
- neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
+ neq |= get_unaligned((unsigned long *)a) ^
+ get_unaligned((unsigned long *)b);
OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
+ neq |= get_unaligned((unsigned long *)(a + 8)) ^
+ get_unaligned((unsigned long *)(b + 8));
OPTIMIZER_HIDE_VAR(neq);
} else if (sizeof(unsigned int) == 4) {
- neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
+ neq |= get_unaligned((unsigned int *)a) ^
+ get_unaligned((unsigned int *)b);
OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
+ neq |= get_unaligned((unsigned int *)(a + 4)) ^
+ get_unaligned((unsigned int *)(b + 4));
OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
+ neq |= get_unaligned((unsigned int *)(a + 8)) ^
+ get_unaligned((unsigned int *)(b + 8));
OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
+ neq |= get_unaligned((unsigned int *)(a + 12)) ^
+ get_unaligned((unsigned int *)(b + 12));
OPTIMIZER_HIDE_VAR(neq);
} else
#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 8ac3e73e8ea6..3285e3af43e1 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -385,15 +385,15 @@ static int pkcs1pad_sign(struct akcipher_request *req)
struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
const struct rsa_asn1_template *digest_info = ictx->digest_info;
int err;
- unsigned int ps_end, digest_size = 0;
+ unsigned int ps_end, digest_info_size = 0;
if (!ctx->key_size)
return -EINVAL;
if (digest_info)
- digest_size = digest_info->size;
+ digest_info_size = digest_info->size;
- if (req->src_len + digest_size > ctx->key_size - 11)
+ if (req->src_len + digest_info_size > ctx->key_size - 11)
return -EOVERFLOW;
if (req->dst_len < ctx->key_size) {
@@ -406,7 +406,7 @@ static int pkcs1pad_sign(struct akcipher_request *req)
if (!req_ctx->in_buf)
return -ENOMEM;
- ps_end = ctx->key_size - digest_size - req->src_len - 2;
+ ps_end = ctx->key_size - digest_info_size - req->src_len - 2;
req_ctx->in_buf[0] = 0x01;
memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
req_ctx->in_buf[ps_end] = 0x00;
@@ -441,6 +441,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
struct akcipher_instance *inst = akcipher_alg_instance(tfm);
struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
const struct rsa_asn1_template *digest_info = ictx->digest_info;
+ const unsigned int sig_size = req->src_len;
+ const unsigned int digest_size = req->dst_len;
unsigned int dst_len;
unsigned int pos;
u8 *out_buf;
@@ -476,6 +478,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
pos++;
if (digest_info) {
+ if (digest_info->size > dst_len - pos)
+ goto done;
if (crypto_memneq(out_buf + pos, digest_info->data,
digest_info->size))
goto done;
@@ -485,20 +489,19 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
err = 0;
- if (req->dst_len != dst_len - pos) {
+ if (digest_size != dst_len - pos) {
err = -EKEYREJECTED;
req->dst_len = dst_len - pos;
goto done;
}
/* Extract appended digest. */
sg_pcopy_to_buffer(req->src,
- sg_nents_for_len(req->src,
- req->src_len + req->dst_len),
+ sg_nents_for_len(req->src, sig_size + digest_size),
req_ctx->out_buf + ctx->key_size,
- req->dst_len, ctx->key_size);
+ digest_size, sig_size);
/* Do the actual verification step. */
if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos,
- req->dst_len) != 0)
+ digest_size) != 0)
err = -EKEYREJECTED;
done:
kfree_sensitive(req_ctx->out_buf);
@@ -534,14 +537,15 @@ static int pkcs1pad_verify(struct akcipher_request *req)
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
+ const unsigned int sig_size = req->src_len;
+ const unsigned int digest_size = req->dst_len;
int err;
- if (WARN_ON(req->dst) ||
- WARN_ON(!req->dst_len) ||
- !ctx->key_size || req->src_len < ctx->key_size)
+ if (WARN_ON(req->dst) || WARN_ON(!digest_size) ||
+ !ctx->key_size || sig_size != ctx->key_size)
return -EINVAL;
- req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);
+ req_ctx->out_buf = kmalloc(ctx->key_size + digest_size, GFP_KERNEL);
if (!req_ctx->out_buf)
return -ENOMEM;
@@ -554,8 +558,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
/* Reuse input buffer, output to a new buffer */
akcipher_request_set_crypt(&req_ctx->child_req, req->src,
- req_ctx->out_sg, req->src_len,
- ctx->key_size);
+ req_ctx->out_sg, sig_size, ctx->key_size);
err = crypto_akcipher_encrypt(&req_ctx->child_req);
if (err != -EINPROGRESS && err != -EBUSY)
@@ -621,6 +624,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn);
+ if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) {
+ err = -EINVAL;
+ goto err_free_inst;
+ }
+
err = -ENAMETOOLONG;
hash_name = crypto_attr_alg_name(tb[2]);
if (IS_ERR(hash_name)) {
diff --git a/crypto/sm2.c b/crypto/sm2.c
index db8a4a265669..f3e1592965c0 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SM2 asymmetric public-key algorithm
* as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and
@@ -13,7 +13,7 @@
#include <crypto/internal/akcipher.h>
#include <crypto/akcipher.h>
#include <crypto/hash.h>
-#include <crypto/sm3_base.h>
+#include <crypto/sm3.h>
#include <crypto/rng.h>
#include <crypto/sm2.h>
#include "sm2signature.asn1.h"
@@ -213,7 +213,7 @@ int sm2_get_signature_s(void *context, size_t hdrlen, unsigned char tag,
return 0;
}
-static int sm2_z_digest_update(struct shash_desc *desc,
+static int sm2_z_digest_update(struct sm3_state *sctx,
MPI m, unsigned int pbytes)
{
static const unsigned char zero[32];
@@ -226,20 +226,20 @@ static int sm2_z_digest_update(struct shash_desc *desc,
if (inlen < pbytes) {
/* padding with zero */
- crypto_sm3_update(desc, zero, pbytes - inlen);
- crypto_sm3_update(desc, in, inlen);
+ sm3_update(sctx, zero, pbytes - inlen);
+ sm3_update(sctx, in, inlen);
} else if (inlen > pbytes) {
/* skip the starting zero */
- crypto_sm3_update(desc, in + inlen - pbytes, pbytes);
+ sm3_update(sctx, in + inlen - pbytes, pbytes);
} else {
- crypto_sm3_update(desc, in, inlen);
+ sm3_update(sctx, in, inlen);
}
kfree(in);
return 0;
}
-static int sm2_z_digest_update_point(struct shash_desc *desc,
+static int sm2_z_digest_update_point(struct sm3_state *sctx,
MPI_POINT point, struct mpi_ec_ctx *ec, unsigned int pbytes)
{
MPI x, y;
@@ -249,8 +249,8 @@ static int sm2_z_digest_update_point(struct shash_desc *desc,
y = mpi_new(0);
if (!mpi_ec_get_affine(x, y, point, ec) &&
- !sm2_z_digest_update(desc, x, pbytes) &&
- !sm2_z_digest_update(desc, y, pbytes))
+ !sm2_z_digest_update(sctx, x, pbytes) &&
+ !sm2_z_digest_update(sctx, y, pbytes))
ret = 0;
mpi_free(x);
@@ -265,7 +265,7 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm);
uint16_t bits_len;
unsigned char entl[2];
- SHASH_DESC_ON_STACK(desc, NULL);
+ struct sm3_state sctx;
unsigned int pbytes;
if (id_len > (USHRT_MAX / 8) || !ec->Q)
@@ -278,17 +278,17 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
pbytes = MPI_NBYTES(ec->p);
/* ZA = H256(ENTLA | IDA | a | b | xG | yG | xA | yA) */
- sm3_base_init(desc);
- crypto_sm3_update(desc, entl, 2);
- crypto_sm3_update(desc, id, id_len);
-
- if (sm2_z_digest_update(desc, ec->a, pbytes) ||
- sm2_z_digest_update(desc, ec->b, pbytes) ||
- sm2_z_digest_update_point(desc, ec->G, ec, pbytes) ||
- sm2_z_digest_update_point(desc, ec->Q, ec, pbytes))
+ sm3_init(&sctx);
+ sm3_update(&sctx, entl, 2);
+ sm3_update(&sctx, id, id_len);
+
+ if (sm2_z_digest_update(&sctx, ec->a, pbytes) ||
+ sm2_z_digest_update(&sctx, ec->b, pbytes) ||
+ sm2_z_digest_update_point(&sctx, ec->G, ec, pbytes) ||
+ sm2_z_digest_update_point(&sctx, ec->Q, ec, pbytes))
return -EINVAL;
- crypto_sm3_final(desc, dgst);
+ sm3_final(&sctx, dgst);
return 0;
}
EXPORT_SYMBOL(sm2_compute_z_digest);
diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c
index 193c4584bd00..a215c1c37e73 100644
--- a/crypto/sm3_generic.c
+++ b/crypto/sm3_generic.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2017 ARM Limited or its affiliates.
* Written by Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <crypto/internal/hash.h>
@@ -26,143 +27,29 @@ const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = {
};
EXPORT_SYMBOL_GPL(sm3_zero_message_hash);
-static inline u32 p0(u32 x)
-{
- return x ^ rol32(x, 9) ^ rol32(x, 17);
-}
-
-static inline u32 p1(u32 x)
-{
- return x ^ rol32(x, 15) ^ rol32(x, 23);
-}
-
-static inline u32 ff(unsigned int n, u32 a, u32 b, u32 c)
-{
- return (n < 16) ? (a ^ b ^ c) : ((a & b) | (a & c) | (b & c));
-}
-
-static inline u32 gg(unsigned int n, u32 e, u32 f, u32 g)
-{
- return (n < 16) ? (e ^ f ^ g) : ((e & f) | ((~e) & g));
-}
-
-static inline u32 t(unsigned int n)
-{
- return (n < 16) ? SM3_T1 : SM3_T2;
-}
-
-static void sm3_expand(u32 *t, u32 *w, u32 *wt)
-{
- int i;
- unsigned int tmp;
-
- /* load the input */
- for (i = 0; i <= 15; i++)
- w[i] = get_unaligned_be32((__u32 *)t + i);
-
- for (i = 16; i <= 67; i++) {
- tmp = w[i - 16] ^ w[i - 9] ^ rol32(w[i - 3], 15);
- w[i] = p1(tmp) ^ (rol32(w[i - 13], 7)) ^ w[i - 6];
- }
-
- for (i = 0; i <= 63; i++)
- wt[i] = w[i] ^ w[i + 4];
-}
-
-static void sm3_compress(u32 *w, u32 *wt, u32 *m)
-{
- u32 ss1;
- u32 ss2;
- u32 tt1;
- u32 tt2;
- u32 a, b, c, d, e, f, g, h;
- int i;
-
- a = m[0];
- b = m[1];
- c = m[2];
- d = m[3];
- e = m[4];
- f = m[5];
- g = m[6];
- h = m[7];
-
- for (i = 0; i <= 63; i++) {
-
- ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7);
-
- ss2 = ss1 ^ rol32(a, 12);
-
- tt1 = ff(i, a, b, c) + d + ss2 + *wt;
- wt++;
-
- tt2 = gg(i, e, f, g) + h + ss1 + *w;
- w++;
-
- d = c;
- c = rol32(b, 9);
- b = a;
- a = tt1;
- h = g;
- g = rol32(f, 19);
- f = e;
- e = p0(tt2);
- }
-
- m[0] = a ^ m[0];
- m[1] = b ^ m[1];
- m[2] = c ^ m[2];
- m[3] = d ^ m[3];
- m[4] = e ^ m[4];
- m[5] = f ^ m[5];
- m[6] = g ^ m[6];
- m[7] = h ^ m[7];
-
- a = b = c = d = e = f = g = h = ss1 = ss2 = tt1 = tt2 = 0;
-}
-
-static void sm3_transform(struct sm3_state *sst, u8 const *src)
-{
- unsigned int w[68];
- unsigned int wt[64];
-
- sm3_expand((u32 *)src, w, wt);
- sm3_compress(w, wt, sst->state);
-
- memzero_explicit(w, sizeof(w));
- memzero_explicit(wt, sizeof(wt));
-}
-
-static void sm3_generic_block_fn(struct sm3_state *sst, u8 const *src,
- int blocks)
-{
- while (blocks--) {
- sm3_transform(sst, src);
- src += SM3_BLOCK_SIZE;
- }
-}
-
-int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
+ sm3_update(shash_desc_ctx(desc), data, len);
+ return 0;
}
-EXPORT_SYMBOL(crypto_sm3_update);
-int crypto_sm3_final(struct shash_desc *desc, u8 *out)
+static int crypto_sm3_final(struct shash_desc *desc, u8 *out)
{
- sm3_base_do_finalize(desc, sm3_generic_block_fn);
- return sm3_base_finish(desc, out);
+ sm3_final(shash_desc_ctx(desc), out);
+ return 0;
}
-EXPORT_SYMBOL(crypto_sm3_final);
-int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *hash)
{
- sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
- return crypto_sm3_final(desc, hash);
+ struct sm3_state *sctx = shash_desc_ctx(desc);
+
+ if (len)
+ sm3_update(sctx, data, len);
+ sm3_final(sctx, hash);
+ return 0;
}
-EXPORT_SYMBOL(crypto_sm3_finup);
static struct shash_alg sm3_alg = {
.digestsize = SM3_DIGEST_SIZE,
@@ -174,6 +61,7 @@ static struct shash_alg sm3_alg = {
.base = {
.cra_name = "sm3",
.cra_driver_name = "sm3-generic",
+ .cra_priority = 100,
.cra_blocksize = SM3_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 00149657a4bc..2bacf8384f59 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -724,200 +724,6 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
return crypto_wait_req(ret, wait);
}
-struct test_mb_ahash_data {
- struct scatterlist sg[XBUFSIZE];
- char result[64];
- struct ahash_request *req;
- struct crypto_wait wait;
- char *xbuf[XBUFSIZE];
-};
-
-static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
- int *rc)
-{
- int i, err = 0;
-
- /* Fire up a bunch of concurrent requests */
- for (i = 0; i < num_mb; i++)
- rc[i] = crypto_ahash_digest(data[i].req);
-
- /* Wait for all requests to finish */
- for (i = 0; i < num_mb; i++) {
- rc[i] = crypto_wait_req(rc[i], &data[i].wait);
-
- if (rc[i]) {
- pr_info("concurrent request %d error %d\n", i, rc[i]);
- err = rc[i];
- }
- }
-
- return err;
-}
-
-static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
- int secs, u32 num_mb)
-{
- unsigned long start, end;
- int bcount;
- int ret = 0;
- int *rc;
-
- rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
- if (!rc)
- return -ENOMEM;
-
- for (start = jiffies, end = start + secs * HZ, bcount = 0;
- time_before(jiffies, end); bcount++) {
- ret = do_mult_ahash_op(data, num_mb, rc);
- if (ret)
- goto out;
- }
-
- pr_cont("%d operations in %d seconds (%llu bytes)\n",
- bcount * num_mb, secs, (u64)bcount * blen * num_mb);
-
-out:
- kfree(rc);
- return ret;
-}
-
-static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
- u32 num_mb)
-{
- unsigned long cycles = 0;
- int ret = 0;
- int i;
- int *rc;
-
- rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
- if (!rc)
- return -ENOMEM;
-
- /* Warm-up run. */
- for (i = 0; i < 4; i++) {
- ret = do_mult_ahash_op(data, num_mb, rc);
- if (ret)
- goto out;
- }
-
- /* The real thing. */
- for (i = 0; i < 8; i++) {
- cycles_t start, end;
-
- start = get_cycles();
- ret = do_mult_ahash_op(data, num_mb, rc);
- end = get_cycles();
-
- if (ret)
- goto out;
-
- cycles += end - start;
- }
-
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / (8 * num_mb), blen);
-
-out:
- kfree(rc);
- return ret;
-}
-
-static void test_mb_ahash_speed(const char *algo, unsigned int secs,
- struct hash_speed *speed, u32 num_mb)
-{
- struct test_mb_ahash_data *data;
- struct crypto_ahash *tfm;
- unsigned int i, j, k;
- int ret;
-
- data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
- if (!data)
- return;
-
- tfm = crypto_alloc_ahash(algo, 0, 0);
- if (IS_ERR(tfm)) {
- pr_err("failed to load transform for %s: %ld\n",
- algo, PTR_ERR(tfm));
- goto free_data;
- }
-
- for (i = 0; i < num_mb; ++i) {
- if (testmgr_alloc_buf(data[i].xbuf))
- goto out;
-
- crypto_init_wait(&data[i].wait);
-
- data[i].req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!data[i].req) {
- pr_err("alg: hash: Failed to allocate request for %s\n",
- algo);
- goto out;
- }
-
- ahash_request_set_callback(data[i].req, 0, crypto_req_done,
- &data[i].wait);
-
- sg_init_table(data[i].sg, XBUFSIZE);
- for (j = 0; j < XBUFSIZE; j++) {
- sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
- memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
- }
- }
-
- pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
- get_driver_name(crypto_ahash, tfm));
-
- for (i = 0; speed[i].blen != 0; i++) {
- /* For some reason this only tests digests. */
- if (speed[i].blen != speed[i].plen)
- continue;
-
- if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
- pr_err("template (%u) too big for tvmem (%lu)\n",
- speed[i].blen, XBUFSIZE * PAGE_SIZE);
- goto out;
- }
-
- if (klen)
- crypto_ahash_setkey(tfm, tvmem[0], klen);
-
- for (k = 0; k < num_mb; k++)
- ahash_request_set_crypt(data[k].req, data[k].sg,
- data[k].result, speed[i].blen);
-
- pr_info("test%3u "
- "(%5u byte blocks,%5u bytes per update,%4u updates): ",
- i, speed[i].blen, speed[i].plen,
- speed[i].blen / speed[i].plen);
-
- if (secs) {
- ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
- num_mb);
- cond_resched();
- } else {
- ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
- }
-
-
- if (ret) {
- pr_err("At least one hashing failed ret=%d\n", ret);
- break;
- }
- }
-
-out:
- for (k = 0; k < num_mb; ++k)
- ahash_request_free(data[k].req);
-
- for (k = 0; k < num_mb; ++k)
- testmgr_free_buf(data[k].xbuf);
-
- crypto_free_ahash(tfm);
-
-free_data:
- kfree(data);
-}
-
static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
char *out, int secs)
{
@@ -1667,8 +1473,8 @@ static inline int tcrypt_test(const char *alg)
pr_debug("testing %s\n", alg);
ret = alg_test(alg, alg, 0, 0);
- /* non-fips algs return -EINVAL in fips mode */
- if (fips_enabled && ret == -EINVAL)
+ /* non-fips algs return -EINVAL or -ECANCELED in fips mode */
+ if (fips_enabled && (ret == -EINVAL || ret == -ECANCELED))
ret = 0;
return ret;
}
@@ -2571,33 +2377,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
if (mode > 400 && mode < 500) break;
fallthrough;
case 422:
- test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 423:
- test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 424:
- test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 425:
- test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
- num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 426:
- test_mb_ahash_speed("streebog256", sec,
- generic_hash_speed_template, num_mb);
- if (mode > 400 && mode < 500) break;
- fallthrough;
- case 427:
- test_mb_ahash_speed("streebog512", sec,
- generic_hash_speed_template, num_mb);
+ test_ahash_speed("sm3", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
case 499:
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5831d4bbc64f..2d632a285869 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -55,9 +55,6 @@ MODULE_PARM_DESC(noextratests, "disable expensive crypto self-tests");
static unsigned int fuzz_iterations = 100;
module_param(fuzz_iterations, uint, 0644);
MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
-
-DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
-EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
#endif
#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
@@ -1854,6 +1851,9 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
}
for (i = 0; i < num_vecs; i++) {
+ if (fips_enabled && vecs[i].fips_skip)
+ continue;
+
err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate);
if (err)
goto out;
@@ -4650,7 +4650,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "dh",
.test = alg_test_kpp,
- .fips_allowed = 1,
.suite = {
.kpp = __VECS(dh_tv_template)
}
@@ -4973,6 +4972,43 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(essiv_aes_cbc_tv_template)
}
}, {
+#if IS_ENABLED(CONFIG_CRYPTO_DH_RFC7919_GROUPS)
+ .alg = "ffdhe2048(dh)",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ffdhe2048_dh_tv_template)
+ }
+ }, {
+ .alg = "ffdhe3072(dh)",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ffdhe3072_dh_tv_template)
+ }
+ }, {
+ .alg = "ffdhe4096(dh)",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ffdhe4096_dh_tv_template)
+ }
+ }, {
+ .alg = "ffdhe6144(dh)",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ffdhe6144_dh_tv_template)
+ }
+ }, {
+ .alg = "ffdhe8192(dh)",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ffdhe8192_dh_tv_template)
+ }
+ }, {
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
.alg = "gcm(aes)",
.generic_driver = "gcm_base(ctr(aes-generic),ghash-generic)",
.test = alg_test_aead,
@@ -5613,6 +5649,13 @@ static int alg_find_test(const char *alg)
return -1;
}
+static int alg_fips_disabled(const char *driver, const char *alg)
+{
+ pr_info("alg: %s (%s) is disabled due to FIPS\n", alg, driver);
+
+ return -ECANCELED;
+}
+
int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
{
int i;
@@ -5649,9 +5692,13 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
if (i < 0 && j < 0)
goto notest;
- if (fips_enabled && ((i >= 0 && !alg_test_descs[i].fips_allowed) ||
- (j >= 0 && !alg_test_descs[j].fips_allowed)))
- goto non_fips_alg;
+ if (fips_enabled) {
+ if (j >= 0 && !alg_test_descs[j].fips_allowed)
+ return -EINVAL;
+
+ if (i >= 0 && !alg_test_descs[i].fips_allowed)
+ goto non_fips_alg;
+ }
rc = 0;
if (i >= 0)
@@ -5681,9 +5728,13 @@ test_done:
notest:
printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
+
+ if (type & CRYPTO_ALG_FIPS_INTERNAL)
+ return alg_fips_disabled(driver, alg);
+
return 0;
non_fips_alg:
- return -EINVAL;
+ return alg_fips_disabled(driver, alg);
}
#endif /* CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index a253d66ba1c1..d1aa90993bbd 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -33,6 +33,7 @@
* @ksize: Length of @key in bytes (0 if no key)
* @setkey_error: Expected error from setkey()
* @digest_error: Expected error from digest()
+ * @fips_skip: Skip the test vector in FIPS mode
*/
struct hash_testvec {
const char *key;
@@ -42,6 +43,7 @@ struct hash_testvec {
unsigned short ksize;
int setkey_error;
int digest_error;
+ bool fips_skip;
};
/*
@@ -1244,17 +1246,15 @@ static const struct kpp_testvec dh_tv_template[] = {
.secret =
#ifdef __LITTLE_ENDIAN
"\x01\x00" /* type */
- "\x15\x02" /* len */
+ "\x11\x02" /* len */
"\x00\x01\x00\x00" /* key_size */
"\x00\x01\x00\x00" /* p_size */
- "\x00\x00\x00\x00" /* q_size */
"\x01\x00\x00\x00" /* g_size */
#else
"\x00\x01" /* type */
- "\x02\x15" /* len */
+ "\x02\x11" /* len */
"\x00\x00\x01\x00" /* key_size */
"\x00\x00\x01\x00" /* p_size */
- "\x00\x00\x00\x00" /* q_size */
"\x00\x00\x00\x01" /* g_size */
#endif
/* xa */
@@ -1344,7 +1344,7 @@ static const struct kpp_testvec dh_tv_template[] = {
"\xd3\x34\x49\xad\x64\xa6\xb1\xc0\x59\x28\x75\x60\xa7\x8a\xb0\x11"
"\x56\x89\x42\x74\x11\xf5\xf6\x5e\x6f\x16\x54\x6a\xb1\x76\x4d\x50"
"\x8a\x68\xc1\x5b\x82\xb9\x0d\x00\x32\x50\xed\x88\x87\x48\x92\x17",
- .secret_size = 533,
+ .secret_size = 529,
.b_public_size = 256,
.expected_a_public_size = 256,
.expected_ss_size = 256,
@@ -1353,17 +1353,15 @@ static const struct kpp_testvec dh_tv_template[] = {
.secret =
#ifdef __LITTLE_ENDIAN
"\x01\x00" /* type */
- "\x15\x02" /* len */
+ "\x11\x02" /* len */
"\x00\x01\x00\x00" /* key_size */
"\x00\x01\x00\x00" /* p_size */
- "\x00\x00\x00\x00" /* q_size */
"\x01\x00\x00\x00" /* g_size */
#else
"\x00\x01" /* type */
- "\x02\x15" /* len */
+ "\x02\x11" /* len */
"\x00\x00\x01\x00" /* key_size */
"\x00\x00\x01\x00" /* p_size */
- "\x00\x00\x00\x00" /* q_size */
"\x00\x00\x00\x01" /* g_size */
#endif
/* xa */
@@ -1453,13 +1451,1442 @@ static const struct kpp_testvec dh_tv_template[] = {
"\x5e\x5a\x64\xbd\xf6\x85\x04\xe8\x28\x6a\xac\xef\xce\x19\x8e\x9a"
"\xfe\x75\xc0\x27\x69\xe3\xb3\x7b\x21\xa7\xb1\x16\xa4\x85\x23\xee"
"\xb0\x1b\x04\x6e\xbd\xab\x16\xde\xfd\x86\x6b\xa9\x95\xd7\x0b\xfd",
- .secret_size = 533,
+ .secret_size = 529,
.b_public_size = 256,
.expected_a_public_size = 256,
.expected_ss_size = 256,
}
};
+static const struct kpp_testvec ffdhe2048_dh_tv_template[] __maybe_unused = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x01" /* len */
+ "\x00\x01\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x01\x10" /* len */
+ "\x00\x00\x01\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+ "\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+ "\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+ "\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+ "\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+ "\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+ "\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+ "\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+ "\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+ "\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+ "\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+ "\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+ "\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+ "\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+ "\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+ "\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+ .b_public =
+ "\x5c\x00\x6f\xda\xfe\x4c\x0c\xc2\x18\xff\xa9\xec\x7a\xbe\x8a\x51"
+ "\x64\x6b\x57\xf8\xed\xe2\x36\x77\xc1\x23\xbf\x56\xa6\x48\x76\x34"
+ "\x0e\xf3\x68\x05\x45\x6a\x98\x5b\x9e\x8b\xc0\x11\x29\xcb\x5b\x66"
+ "\x2d\xc2\xeb\x4c\xf1\x7d\x85\x30\xaa\xd5\xf5\xb8\xd3\x62\x1e\x97"
+ "\x1e\x34\x18\xf8\x76\x8c\x10\xca\x1f\xe4\x5d\x62\xe1\xbe\x61\xef"
+ "\xaf\x2c\x8d\x97\x15\xa5\x86\xd5\xd3\x12\x6f\xec\xe2\xa4\xb2\x5a"
+ "\x35\x1d\xd4\x91\xa6\xef\x13\x09\x65\x9c\x45\xc0\x12\xad\x7f\xee"
+ "\x93\x5d\xfa\x89\x26\x7d\xae\xee\xea\x8c\xa3\xcf\x04\x2d\xa0\xc7"
+ "\xd9\x14\x62\xaf\xdf\xa0\x33\xd7\x5e\x83\xa2\xe6\x0e\x0e\x5d\x77"
+ "\xce\xe6\x72\xe4\xec\x9d\xff\x72\x9f\x38\x95\x19\x96\xba\x4c\xe3"
+ "\x5f\xb8\x46\x4a\x1d\xe9\x62\x7b\xa8\xdc\xe7\x61\x90\x6b\xb9\xd4"
+ "\xad\x0b\xa3\x06\xb3\x70\xfa\xea\x2b\xc4\x2c\xde\x43\x37\xf6\x8d"
+ "\x72\xf0\x86\x9a\xbb\x3b\x8e\x7a\x71\x03\x30\x30\x2a\x5d\xcd\x1e"
+ "\xe4\xd3\x08\x07\x75\x17\x17\x72\x1e\x77\x6c\x98\x0d\x29\x7f\xac"
+ "\xe7\xb2\xee\xa9\x1c\x33\x9d\x08\x39\xe1\xd8\x5b\xe5\xbc\x48\xb2"
+ "\xb6\xdf\xcd\xa0\x42\x06\xcc\xfb\xed\x60\x6f\xbc\x57\xac\x09\x45",
+ .expected_a_public =
+ "\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+ "\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+ "\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+ "\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+ "\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+ "\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+ "\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+ "\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+ "\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+ "\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+ "\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+ "\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+ "\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+ "\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+ "\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+ "\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+ .expected_ss =
+ "\xf3\x0e\x64\x7b\x66\xd7\x82\x7e\xab\x7e\x4a\xbe\x13\x6f\x43\x3d"
+ "\xea\x4f\x1f\x8b\x9d\x41\x56\x71\xe1\x06\x96\x02\x68\xfa\x44\x6e"
+ "\xe7\xf2\x26\xd4\x01\x4a\xf0\x28\x25\x76\xad\xd7\xe0\x17\x74\xfe"
+ "\xf9\xe1\x6d\xd3\xf7\xc7\xdf\xc0\x62\xa5\xf3\x4e\x1b\x5c\x77\x2a"
+ "\xfb\x0b\x87\xc3\xde\x1e\xc1\xe0\xd3\x7a\xb8\x02\x02\xec\x9c\x97"
+ "\xfb\x34\xa0\x20\x10\x23\x87\xb2\x9a\x72\xe3\x3d\xb2\x18\x50\xf3"
+ "\x6a\xd3\xd3\x19\xc4\x36\xd5\x59\xd6\xd6\xa7\x5c\xc3\xf9\x09\x33"
+ "\xa1\xf5\xb9\x4b\xf3\x0b\xe1\x4f\x79\x6b\x45\xf2\xec\x8b\xe5\x69"
+ "\x9f\xc6\x05\x01\xfe\x3a\x13\xfd\x6d\xea\x03\x83\x29\x7c\x7f\xf5"
+ "\x41\x55\x95\xde\x7e\x62\xae\xaf\x28\xdb\x7c\xa9\x90\x1e\xb2\xb1"
+ "\x1b\xef\xf1\x2e\xde\x47\xaa\xa8\x92\x9a\x49\x3d\xc0\xe0\x8d\xbb"
+ "\x0c\x42\x86\xaf\x00\xce\xb0\xab\x22\x7c\xe9\xbe\xb9\x72\x2f\xcf"
+ "\x5e\x5d\x62\x52\x2a\xd1\xfe\xcc\xa2\xf3\x40\xfd\x01\xa7\x54\x0a"
+ "\xa1\xfb\x1c\xf2\x44\xa6\x47\x30\x5a\xba\x2a\x05\xff\xd0\x6c\xab"
+ "\xeb\xe6\x8f\xf6\xd7\x73\xa3\x0e\x6c\x0e\xcf\xfd\x8e\x16\x5d\xe0"
+ "\x2c\x11\x05\x82\x3c\x22\x16\x6c\x52\x61\xcf\xbb\xff\xf8\x06\xd0",
+ .secret_size = 272,
+ .b_public_size = 256,
+ .expected_a_public_size = 256,
+ .expected_ss_size = 256,
+ },
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x00" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x00\x10" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#endif
+ .b_secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x01" /* len */
+ "\x00\x01\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x01\x10" /* len */
+ "\x00\x00\x01\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+ "\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+ "\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+ "\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+ "\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+ "\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+ "\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+ "\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+ "\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+ "\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+ "\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+ "\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+ "\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+ "\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+ "\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+ "\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+ .b_public =
+ "\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+ "\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+ "\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+ "\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+ "\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+ "\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+ "\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+ "\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+ "\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+ "\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+ "\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+ "\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+ "\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+ "\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+ "\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+ "\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+ .secret_size = 16,
+ .b_secret_size = 272,
+ .b_public_size = 256,
+ .expected_a_public_size = 256,
+ .expected_ss_size = 256,
+ .genkey = true,
+ },
+};
+
+static const struct kpp_testvec ffdhe3072_dh_tv_template[] __maybe_unused = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x90\x01" /* len */
+ "\x80\x01\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x01\x90" /* len */
+ "\x00\x00\x01\x80" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+ "\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+ "\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+ "\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+ "\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+ "\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+ "\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+ "\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+ "\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+ "\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+ "\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+ "\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+ "\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+ "\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+ "\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+ "\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+ "\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+ "\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+ "\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+ "\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+ "\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+ "\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+ "\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+ "\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+ .b_public =
+ "\x73\x40\x8b\xce\xe8\x6a\x1c\x03\x50\x54\x42\x36\x22\xc6\x1d\xe8"
+ "\xe1\xef\x5c\x89\xa5\x55\xc1\xc4\x1c\xd7\x4f\xee\x5d\xba\x62\x60"
+ "\xfe\x93\x2f\xfd\x93\x2c\x8f\x70\xc6\x47\x17\x25\xb2\x95\xd7\x7d"
+ "\x41\x81\x4d\x52\x1c\xbe\x4d\x57\x3e\x26\x51\x28\x03\x8f\x67\xf5"
+ "\x22\x16\x1c\x67\xf7\x62\xcb\xfd\xa3\xee\x8d\xe0\xfa\x15\x9a\x53"
+ "\xbe\x7b\x9f\xc0\x12\x7a\xfc\x5e\x77\x2d\x60\x06\xba\x71\xc5\xca"
+ "\xd7\x26\xaf\x3b\xba\x6f\xd3\xc4\x82\x57\x19\x26\xb0\x16\x7b\xbd"
+ "\x83\xf2\x21\x03\x79\xff\x0a\x6f\xc5\x7b\x00\x15\xad\x5b\xf4\x42"
+ "\x1f\xcb\x7f\x3d\x34\x77\x3c\xc3\xe0\x38\xa5\x40\x51\xbe\x6f\xd9"
+ "\xc9\x77\x9c\xfc\x0d\xc1\x8e\xef\x0f\xaa\x5e\xa8\xbb\x16\x4a\x3e"
+ "\x26\x55\xae\xc1\xb6\x3e\xfd\x73\xf7\x59\xd2\xe5\x4b\x91\x8e\x28"
+ "\x77\x1e\x5a\xe2\xcd\xce\x92\x35\xbb\x1e\xbb\xcf\x79\x94\xdf\x31"
+ "\xde\x31\xa8\x75\xf6\xe0\xaa\x2e\xe9\x4f\x44\xc8\xba\xb9\xab\x80"
+ "\x29\xa1\xea\x58\x2e\x40\x96\xa0\x1a\xf5\x2c\x38\x47\x43\x5d\x26"
+ "\x2c\xd8\xad\xea\xd3\xad\xe8\x51\x49\xad\x45\x2b\x25\x7c\xde\xe4"
+ "\xaf\x03\x2a\x39\x26\x86\x66\x10\xbc\xa8\x71\xda\xe0\xe8\xf1\xdd"
+ "\x50\xff\x44\xb2\xd3\xc7\xff\x66\x63\xf6\x42\xe3\x97\x9d\x9e\xf4"
+ "\xa6\x89\xb9\xab\x12\x17\xf2\x85\x56\x9c\x6b\x24\x71\x83\x57\x7d"
+ "\x3c\x7b\x2b\x88\x92\x19\xd7\x1a\x00\xd5\x38\x94\x43\x60\x4d\xa7"
+ "\x12\x9e\x0d\xf6\x5c\x9a\xd3\xe2\x9e\xb1\x21\xe8\xe2\x9e\xe9\x1e"
+ "\x9d\xa5\x94\x95\xa6\x3d\x12\x15\xd8\x8b\xac\xe0\x8c\xde\xe6\x40"
+ "\x98\xaa\x5e\x55\x4f\x3d\x86\x87\x0d\xe3\xc6\x68\x15\xe6\xde\x17"
+ "\x78\x21\xc8\x6c\x06\xc7\x94\x56\xb4\xaf\xa2\x35\x0b\x0c\x97\xd7"
+ "\xa4\x12\xee\xf4\xd2\xef\x80\x28\xb3\xee\xe9\x15\x8b\x01\x32\x79",
+ .expected_a_public =
+ "\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+ "\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+ "\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+ "\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+ "\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+ "\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+ "\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+ "\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+ "\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+ "\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+ "\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+ "\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+ "\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+ "\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+ "\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+ "\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+ "\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+ "\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+ "\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+ "\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+ "\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+ "\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+ "\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+ "\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+ .expected_ss =
+ "\x47\x8e\xb2\x19\x09\xf0\x46\x99\x6b\x41\x86\xf7\x34\xad\xbf\x2a"
+ "\x18\x1b\x7d\xec\xa9\xb2\x47\x2f\x40\xfb\x9a\x64\x30\x44\xf3\x4c"
+ "\x01\x67\xad\x57\x5a\xbc\xd4\xc8\xef\x7e\x8a\x14\x74\x1d\x6d\x8c"
+ "\x7b\xce\xc5\x57\x5f\x95\xe8\x72\xba\xdf\xa3\xcd\x00\xbe\x09\x4c"
+ "\x06\x72\xe7\x17\xb0\xe5\xe5\xb7\x20\xa5\xcb\xd9\x68\x99\xad\x3f"
+ "\xde\xf3\xde\x1d\x1c\x00\x74\xd2\xd1\x57\x55\x5d\xce\x76\x0c\xc4"
+ "\x7a\xc4\x65\x7c\x19\x17\x0a\x09\x66\x7d\x3a\xab\xf7\x61\x3a\xe3"
+ "\x5b\xac\xcf\x69\xb0\x8b\xee\x5d\x28\x36\xbb\x3f\x74\xce\x6e\x38"
+ "\x1e\x39\xab\x26\xca\x89\xdc\x58\x59\xcb\x95\xe4\xbc\xd6\x19\x48"
+ "\xd0\x55\x68\x7b\xb4\x27\x95\x3c\xd9\x58\x10\x4f\x8f\x55\x1c\x3f"
+ "\x04\xce\x89\x1f\x82\x28\xe9\x48\x17\x47\x8f\xee\xb7\x8f\xeb\xb1"
+ "\x29\xa8\x23\x18\x73\x33\x9f\x83\x08\xca\xcd\x54\x6e\xca\xec\x78"
+ "\x7b\x16\x83\x3f\xdb\x0a\xef\xfd\x87\x94\x19\x08\x6e\x6e\x22\x57"
+ "\xd7\xd2\x79\xf9\xf6\xeb\xe0\x6c\x93\x9d\x95\xfa\x41\x7a\xa9\xd6"
+ "\x2a\xa3\x26\x9b\x24\x1b\x8b\xa0\xed\x04\xb2\xe4\x6c\x4e\xc4\x3f"
+ "\x61\xe5\xe0\x4d\x09\x28\xaf\x58\x35\x25\x0b\xd5\x38\x18\x69\x51"
+ "\x18\x51\x73\x7b\x28\x19\x9f\xe4\x69\xfc\x2c\x25\x08\x99\x8f\x62"
+ "\x65\x62\xa5\x28\xf1\xf4\xfb\x02\x29\x27\xb0\x5e\xbb\x4f\xf9\x1a"
+ "\xa7\xc4\x38\x63\x5b\x01\xfe\x00\x66\xe3\x47\x77\x21\x85\x17\xd5"
+ "\x34\x19\xd3\x87\xab\x44\x62\x08\x59\xb2\x6b\x1f\x21\x0c\x23\x84"
+ "\xf7\xba\x92\x67\xf9\x16\x85\x6a\xe0\xeb\xe7\x4f\x06\x80\x81\x81"
+ "\x28\x9c\xe8\x2e\x71\x97\x48\xe0\xd1\xbc\xce\xe9\x42\x2c\x89\xdf"
+ "\x0b\xa9\xa1\x07\x84\x33\x78\x7f\x49\x2f\x1c\x55\xc3\x7f\xc3\x37"
+ "\x40\xdf\x13\xf4\xa0\x21\x79\x6e\x3a\xe3\xb8\x23\x9e\x8a\x6e\x9c",
+ .secret_size = 400,
+ .b_public_size = 384,
+ .expected_a_public_size = 384,
+ .expected_ss_size = 384,
+ },
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x00" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x00\x10" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#endif
+ .b_secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x90\x01" /* len */
+ "\x80\x01\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x01\x90" /* len */
+ "\x00\x00\x01\x80" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+ "\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+ "\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+ "\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+ "\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+ "\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+ "\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+ "\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+ "\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+ "\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+ "\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+ "\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+ "\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+ "\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+ "\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+ "\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+ "\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+ "\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+ "\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+ "\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+ "\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+ "\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+ "\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+ "\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+ .b_public =
+ "\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+ "\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+ "\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+ "\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+ "\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+ "\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+ "\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+ "\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+ "\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+ "\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+ "\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+ "\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+ "\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+ "\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+ "\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+ "\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+ "\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+ "\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+ "\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+ "\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+ "\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+ "\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+ "\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+ "\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+ .secret_size = 16,
+ .b_secret_size = 400,
+ .b_public_size = 384,
+ .expected_a_public_size = 384,
+ .expected_ss_size = 384,
+ .genkey = true,
+ },
+};
+
+static const struct kpp_testvec ffdhe4096_dh_tv_template[] __maybe_unused = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x02" /* len */
+ "\x00\x02\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x02\x10" /* len */
+ "\x00\x00\x02\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+ "\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+ "\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+ "\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+ "\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+ "\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+ "\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+ "\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+ "\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+ "\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+ "\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+ "\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+ "\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+ "\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+ "\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+ "\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+ "\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+ "\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+ "\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+ "\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+ "\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+ "\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+ "\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+ "\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+ "\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+ "\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+ "\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+ "\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+ "\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+ "\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+ "\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+ "\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+ .b_public =
+ "\x24\x38\x02\x02\x2f\xeb\x54\xdd\x73\x21\x91\x4a\xd8\xa4\x0a\xbf"
+ "\xf4\xf5\x9a\x45\xb5\xcd\x42\xa3\x57\xcc\x65\x4a\x23\x2e\xee\x59"
+ "\xba\x6f\x14\x89\xae\x2e\x14\x0a\x72\x77\x23\x7f\x6c\x2e\xba\x52"
+ "\x3f\x71\xbf\xe4\x60\x03\x16\xaa\x61\xf5\x80\x1d\x8a\x45\x9e\x53"
+ "\x7b\x07\xd9\x7e\xfe\xaf\xcb\xda\xff\x20\x71\xba\x89\x39\x75\xc3"
+ "\xb3\x65\x0c\xb1\xa7\xfa\x4a\xe7\xe0\x85\xc5\x4e\x91\x47\x41\xf4"
+ "\xdd\xcd\xc5\x3d\x17\x12\xed\xee\xc0\x31\xb1\xaf\xc1\xd5\x3c\x07"
+ "\xa1\x5a\xc4\x05\x45\xe3\x10\x0c\xc3\x14\xae\x65\xca\x40\xae\x31"
+ "\x5c\x13\x0d\x32\x85\xa7\x6e\xf4\x5e\x29\x3d\x4e\xd3\xd7\x49\x58"
+ "\xe1\x73\xbb\x0a\x7b\xd6\x13\xea\x49\xd7\x20\x3d\x31\xaa\x77\xab"
+ "\x21\x74\xe9\x2f\xe9\x5e\xbe\x2f\xb4\xa2\x79\xf2\xbc\xcc\x51\x94"
+ "\xd2\x1d\xb2\xe6\xc5\x39\x66\xd7\xe5\x46\x75\x53\x76\xed\x49\xea"
+ "\x3b\xdd\x01\x27\xdb\x83\xa5\x9f\xd2\xee\xc8\xde\x9e\xde\xd2\xe7"
+ "\x99\xad\x9c\xe0\x71\x66\x29\xd8\x0d\xfe\xdc\xd1\xbc\xc7\x9a\xbe"
+ "\x8b\x26\x46\x57\xb6\x79\xfa\xad\x8b\x45\x2e\xb5\xe5\x89\x34\x01"
+ "\x93\x00\x9d\xe9\x58\x74\x8b\xda\x07\x92\xb5\x01\x4a\xe1\x44\x36"
+ "\xc7\x6c\xde\xc8\x7a\x17\xd0\xde\xee\x68\x92\xb5\xde\x21\x2b\x1c"
+ "\xbc\x65\x30\x1e\xae\x15\x3d\x9a\xaf\x20\xa3\xc4\x21\x70\xfb\x2f"
+ "\x36\x72\x31\xc0\xe8\x85\xdf\xc5\x50\x4c\x90\x10\x32\xa4\xc7\xee"
+ "\x59\x5a\x21\xf4\xf1\x33\xcf\xbe\xac\x67\xb1\x40\x7c\x0b\x3f\x64"
+ "\xe5\xd2\x2d\xb7\x7d\x0f\xce\xf7\x9b\x05\xee\x37\x61\xd2\x61\x9e"
+ "\x1a\x80\x2e\x79\xe6\x1b\x25\xb3\x61\x3d\x53\xe7\xe5\x97\x9a\xc2"
+ "\x39\xb1\xe3\x91\xc6\xee\x96\x2e\xa9\xb4\xb8\xad\xd8\x04\x3e\x11"
+ "\x31\x67\xb8\x6a\xcb\x6e\x1a\x4c\x7f\x74\xc7\x1f\x09\xd1\xd0\x6b"
+ "\x17\xde\xea\xe8\x0b\xe6\x6a\xee\x2f\xe3\x5b\x9c\x59\x5d\x00\x57"
+ "\xbf\x24\x25\xba\x22\x34\xb9\xc5\x3c\xc4\x57\x26\xd0\x6d\x89\xee"
+ "\x67\x79\x3c\x70\xf9\xc3\xb4\x30\xf0\x2e\xca\xfa\x74\x00\xd1\x00"
+ "\x6d\x03\x97\xd5\x08\x3f\x0b\x8e\xb8\x1d\xa3\x91\x7f\xa9\x3a\xf0"
+ "\x37\x57\x46\x87\x82\xa3\xb5\x8f\x51\xaa\xc7\x7b\xfe\x86\x26\xb9"
+ "\xfa\xe6\x1e\xee\x92\x9d\x3a\xed\x5b\x5e\x3f\xe5\xca\x5e\x13\x01"
+ "\xdd\x4c\x8d\x85\xf0\x60\x61\xb7\x60\x24\x83\x9f\xbe\x72\x21\x81"
+ "\x55\x7e\x7e\x6d\xf3\x28\xc8\x77\x5a\xae\x5a\x32\x86\xd5\x61\xad",
+ .expected_a_public =
+ "\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+ "\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+ "\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+ "\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+ "\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+ "\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+ "\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+ "\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+ "\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+ "\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+ "\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+ "\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+ "\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+ "\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+ "\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+ "\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+ "\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+ "\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+ "\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+ "\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+ "\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+ "\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+ "\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+ "\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+ "\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+ "\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+ "\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+ "\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+ "\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+ "\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+ "\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+ "\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+ .expected_ss =
+ "\xe2\xce\x0e\x4b\x64\xf3\x84\x62\x38\xfd\xe3\x6f\x69\x40\x22\xb0"
+ "\x73\x27\x03\x12\x82\xa4\x6e\x03\x57\xec\x3d\xa0\xc1\x4f\x4b\x09"
+ "\xa1\xd4\xe0\x1a\x5d\x91\x2e\x08\xad\x57\xfa\xcc\x55\x90\x5f\xa0"
+ "\x52\x27\x62\x8d\xe5\x2d\xa1\x5f\xf0\x30\x43\x77\x4e\x3f\x02\x58"
+ "\xcb\xa0\x51\xae\x1d\x24\xf9\x0a\xd1\x36\x0b\x95\x0f\x07\xd9\xf7"
+ "\xe2\x36\x14\x2f\xf0\x11\xc2\xc9\xaf\x66\x4e\x0d\xb4\x60\x01\x4e"
+ "\xa8\x49\xc6\xec\x5f\xb2\xbc\x05\x48\x91\x4e\xe1\xc3\x99\x9f\xeb"
+ "\x4a\xc1\xde\x05\x9a\x65\x39\x7d\x2f\x89\x85\xb2\xcf\xec\x25\x27"
+ "\x5f\x1c\x11\x63\xcf\x7b\x86\x98\x39\xae\xc2\x16\x8f\x79\xd1\x20"
+ "\xd0\xb4\xa0\xba\x44\xd8\xf5\x3a\x0a\x08\x4c\xd1\xb9\xdd\x0a\x5b"
+ "\x9e\x62\xf3\x52\x0c\x84\x12\x43\x9b\xd7\xdf\x86\x71\x03\xdd\x04"
+ "\x98\x55\x0c\x7b\xe2\xe8\x03\x17\x25\x84\xd9\xbd\xe1\xce\x64\xbe"
+ "\xca\x55\xd4\x5b\xef\x61\x5b\x68\x4b\x80\x37\x40\xae\x28\x87\x81"
+ "\x55\x34\x96\x50\x21\x47\x49\xc0\xda\x26\x46\xb8\xe8\xcc\x5a\x27"
+ "\x9c\x9d\x0a\x3d\xcc\x4c\x63\x27\x81\x82\x2e\xf4\xa8\x91\x37\x3e"
+ "\xa7\x34\x6a\x0f\x60\x44\xdd\x2e\xdc\xf9\x19\xf2\x2e\x81\x05\x51"
+ "\x16\xbc\xc0\x85\xa5\xd5\x08\x09\x1f\xcd\xed\xa4\xc5\xdb\x16\x43"
+ "\xb5\x7a\x71\x66\x19\x2e\xef\x13\xbc\x40\x39\x0a\x00\x45\x7e\x61"
+ "\xe9\x68\x60\x83\x00\x70\xd1\x71\xd3\xa2\x61\x3e\x00\x46\x93\x0d"
+ "\xbf\xe6\xa2\x07\xe6\x40\x1a\xf4\x57\xc6\x67\x39\xd8\xd7\x6b\xc5"
+ "\xa5\xd8\x38\x78\x12\xb4\x97\x12\xbe\x97\x13\xef\xe4\x74\x0c\xe0"
+ "\x75\x89\x64\xf4\xe8\x85\xda\x84\x7b\x1d\xfe\xdd\x21\xba\xda\x01"
+ "\x52\xdc\x59\xe5\x47\x50\x7e\x15\x20\xd0\x43\x37\x6e\x48\x39\x00"
+ "\xee\xd9\x54\x6d\x00\x65\xc9\x4b\x85\xa2\x8a\x40\x55\xd0\x63\x0c"
+ "\xb5\x7a\x0d\x37\x67\x27\x73\x18\x7f\x5a\xf5\x0e\x22\xb9\xb0\x3f"
+ "\xda\xf1\xec\x7c\x24\x01\x49\xa9\x09\x0e\x0f\xc4\xa9\xef\xc8\x2b"
+ "\x13\xd1\x0a\x6f\xf8\x92\x4b\x1d\xdd\x6c\x9c\x35\xde\x75\x46\x32"
+ "\xe6\xfb\xda\x58\xba\x81\x08\xca\xa9\xb6\x69\x71\x96\x2a\x1f\x2e"
+ "\x25\xe0\x37\xfe\xee\x4d\x27\xaa\x04\xda\x95\xbb\x93\xcf\x8f\xa2"
+ "\x1d\x67\x35\xe3\x51\x8f\x87\x3b\xa9\x62\x05\xee\x44\xb7\x2e\xd0"
+ "\x07\x63\x32\xf5\xcd\x64\x18\x20\xcf\x22\x42\x28\x22\x1a\xa8\xbb"
+ "\x74\x8a\x6f\x2a\xea\x8a\x48\x0a\xad\xd7\xed\xba\xa3\x89\x37\x01",
+ .secret_size = 528,
+ .b_public_size = 512,
+ .expected_a_public_size = 512,
+ .expected_ss_size = 512,
+ },
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x00" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x00\x10" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#endif
+ .b_secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x02" /* len */
+ "\x00\x02\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x02\x10" /* len */
+ "\x00\x00\x02\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+ "\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+ "\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+ "\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+ "\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+ "\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+ "\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+ "\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+ "\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+ "\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+ "\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+ "\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+ "\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+ "\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+ "\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+ "\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+ "\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+ "\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+ "\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+ "\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+ "\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+ "\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+ "\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+ "\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+ "\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+ "\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+ "\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+ "\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+ "\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+ "\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+ "\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+ "\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+ .b_public =
+ "\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+ "\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+ "\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+ "\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+ "\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+ "\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+ "\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+ "\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+ "\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+ "\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+ "\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+ "\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+ "\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+ "\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+ "\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+ "\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+ "\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+ "\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+ "\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+ "\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+ "\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+ "\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+ "\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+ "\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+ "\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+ "\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+ "\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+ "\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+ "\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+ "\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+ "\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+ "\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+ .secret_size = 16,
+ .b_secret_size = 528,
+ .b_public_size = 512,
+ .expected_a_public_size = 512,
+ .expected_ss_size = 512,
+ .genkey = true,
+ },
+};
+
+static const struct kpp_testvec ffdhe6144_dh_tv_template[] __maybe_unused = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x03" /* len */
+ "\x00\x03\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x03\x10" /* len */
+ "\x00\x00\x03\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+ "\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+ "\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+ "\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+ "\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+ "\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+ "\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+ "\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+ "\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+ "\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+ "\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+ "\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+ "\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+ "\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+ "\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+ "\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+ "\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+ "\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+ "\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+ "\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+ "\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+ "\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+ "\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+ "\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+ "\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+ "\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+ "\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+ "\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+ "\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+ "\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+ "\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+ "\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+ "\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+ "\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+ "\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+ "\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+ "\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+ "\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+ "\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+ "\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+ "\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+ "\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+ "\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+ "\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+ "\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+ "\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+ "\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+ "\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+ .b_public =
+ "\x30\x31\xbe\x43\xd0\x14\x22\x6b\x4b\x8c\x9a\xca\xc6\xdd\xe5\x99"
+ "\xce\xb8\x30\x23\xb6\xa8\x8c\x4d\xfa\xef\xad\xa6\x6a\x21\x50\xa6"
+ "\x45\x2d\x19\x2a\x29\x81\xc5\xac\xb4\xa8\x5f\x6d\x5b\xc8\x5f\x12"
+ "\x35\x21\xfb\x37\xaa\x0c\x79\xeb\xd4\x83\x01\xda\xa3\xf3\x51\x6e"
+ "\x17\xf9\xef\x3f\xbd\x2f\xd2\x43\x82\x12\x48\xeb\x61\x4c\x8e\xf2"
+ "\x6c\x76\xf9\x6d\x42\x2a\xcb\x10\x13\x3b\xf6\x9b\xcd\x46\x1e\xa2"
+ "\xa7\x2c\x08\x56\xd2\x42\xf5\x03\xf0\x3e\xef\xa2\xa2\xf2\x4c\xf2"
+ "\xdb\x4f\xeb\x40\x15\x53\x27\xf7\xd4\x8e\x58\x23\xf5\x2c\x88\x04"
+ "\x1e\xb1\xb6\xe3\xd6\x9c\x49\x08\xa1\x4b\xb8\x33\xe4\x75\x85\xa1"
+ "\x86\x97\xce\x1d\xe9\x9f\xe2\xd8\xf2\x7e\xad\xdc\x8a\x4d\xbd\x06"
+ "\x52\x00\x9a\x2c\x69\xdd\x02\x0c\x69\x5a\xf9\x1d\xfd\xdc\xfb\x82"
+ "\xb2\xe5\xf3\x24\xba\xd1\x09\x76\x90\xb5\x7a\x92\xa6\x6b\x97\xc0"
+ "\xce\x13\x9b\x4b\xbc\x30\x91\xb2\x13\x8b\x57\x6c\x8b\x66\x6e\x58"
+ "\x3e\x91\x50\xc7\x6c\xe1\x18\xec\xbf\x69\xcd\xcb\xa0\xbc\x0d\x05"
+ "\xc4\xf8\x45\x92\xe0\x05\xd3\x08\xb3\x30\x19\xc8\x80\xf8\x17\x9f"
+ "\x1e\x6a\x49\x8e\x43\xef\x7a\x49\xa5\x93\xd9\xed\xd1\x07\x03\xe4"
+ "\xa3\x55\xeb\x1e\x2f\x69\xd7\x40\x8f\x6e\x1c\xb6\x94\xfb\xba\x4e"
+ "\x46\xd0\x38\x71\x00\x88\x93\x6a\x55\xfc\x16\x95\x1f\xb1\xf6\x2f"
+ "\x26\x45\x50\x54\x30\x62\x62\xe8\x80\xe5\x24\x0b\xe4\x15\x6b\x32"
+ "\x16\xc2\x30\x9b\x56\xb4\xc9\x5e\x50\xb4\x27\x82\x86\x01\xda\x68"
+ "\x44\x4b\x15\x81\x31\x13\x52\xd8\x08\xbc\xae\xf3\xa5\x94\x1c\x81"
+ "\xe8\x42\xd6\x42\xd6\xff\x99\x58\x0f\x61\x3e\x82\x9e\x2d\x13\x03"
+ "\x54\x02\x74\xf4\x6b\x43\x43\xce\x54\x44\x36\x3f\x55\xfa\xb2\x56"
+ "\xdc\xac\xb5\x65\x89\xbe\x36\xd2\x58\x65\x79\x4c\xf3\xe2\x01\xf1"
+ "\x69\x96\x29\x20\x5d\xee\xf5\x8a\x8b\x9f\x72\xf7\x27\x02\xde\x3b"
+ "\xc7\x52\x19\xdc\x8e\x22\x36\x09\x14\x59\x07\xbb\x1e\x49\x69\x4f"
+ "\x00\x7b\x9a\x5d\x23\xe9\xbe\x0d\x52\x90\xa3\x0d\xde\xe7\x80\x57"
+ "\x53\x69\x39\xe6\xf8\x33\xeb\x92\x0d\x9e\x04\x8b\x16\x16\x16\x1c"
+ "\xa9\xe6\xe3\x0e\x0a\xc6\xf6\x61\xd1\x44\x2b\x3e\x5e\x02\xfe\xaa"
+ "\xe3\xf3\x8f\xf9\xc8\x20\x37\xad\xbc\x95\xb8\xc5\xe7\x95\xda\xfb"
+ "\x80\x5b\xf6\x40\x28\xae\xc1\x4c\x09\xde\xff\x1e\xbf\x51\xd2\xfe"
+ "\x08\xdc\xb0\x48\x21\xf5\x4c\x43\xdc\x7b\x69\x83\xc8\x69\x5c\xc4"
+ "\xa9\x98\x76\x4b\xc4\x4a\xac\x1d\xa5\x52\xe3\x35\x43\xdd\x30\xd4"
+ "\xa0\x51\x9c\xc2\x62\x4c\x7e\xa5\xfb\xd3\x2c\x8a\x09\x7f\x53\xa3"
+ "\xcd\xca\x58\x1b\x4c\xaf\xba\x21\x8b\x88\x1d\xc0\xe9\x0a\x17\x30"
+ "\x33\xd6\xa2\xa5\x49\x50\x61\x3b\xff\x37\x71\x66\xef\x61\xbc\xb2"
+ "\x53\x82\xe5\x70\xef\x32\xff\x9d\x97\xe0\x82\xe0\xbb\x49\xc2\x29"
+ "\x58\x89\xdd\xe9\x62\x52\xfb\xba\x22\xa6\xd9\x16\xfa\x55\xb3\x06"
+ "\xed\x6d\x70\x6e\xdc\x47\x7c\x67\x1a\xcc\x27\x98\xd4\xd7\xe6\xf0"
+ "\xf8\x9f\x51\x3e\xf0\xee\xad\xb6\x78\x69\x71\xb5\xcb\x09\xa3\xa6"
+ "\x3f\x29\x24\x46\xe0\x65\xbc\x9f\x6c\xe9\xf9\x49\x49\x96\x75\xe5"
+ "\xe1\xff\x82\x70\xf4\x7e\xff\x8f\xec\x47\x98\x6d\x5b\x88\x60\xee"
+ "\x43\xb1\xe2\x14\xc1\x49\x95\x74\x46\xd3\x3f\x73\xb2\xe9\x88\xe0"
+ "\xd3\xb1\xc4\x2c\xef\xee\xdd\x6c\xc5\xa1\x29\xef\x86\xd2\x36\x8a"
+ "\x2f\x7c\x9d\x28\x0a\x6d\xc9\x5a\xdb\xd4\x04\x06\x36\x96\x09\x03"
+ "\x71\x5d\x38\x67\xa2\x08\x2a\x04\xe7\xd6\x51\x5a\x19\x9d\xe7\xf1"
+ "\x5d\x6f\xe2\xff\x48\x37\xb7\x8b\xb1\x14\xb4\x96\xcd\xf0\xa7\xbd"
+ "\xef\x20\xff\x0a\x8d\x08\xb7\x15\x98\x5a\x13\xd2\xda\x2a\x27\x75",
+ .expected_a_public =
+ "\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+ "\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+ "\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+ "\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+ "\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+ "\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+ "\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+ "\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+ "\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+ "\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+ "\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+ "\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+ "\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+ "\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+ "\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+ "\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+ "\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+ "\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+ "\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+ "\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+ "\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+ "\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+ "\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+ "\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+ "\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+ "\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+ "\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+ "\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+ "\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+ "\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+ "\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+ "\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+ "\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+ "\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+ "\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+ "\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+ "\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+ "\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+ "\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+ "\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+ "\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+ "\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+ "\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+ "\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+ "\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+ "\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+ "\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+ "\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+ .expected_ss =
+ "\x9a\x9c\x1c\xb7\x73\x2f\xf2\x12\xed\x59\x01\xbb\x75\xf7\xf5\xe4"
+ "\xa0\xa8\xbc\x3f\x3f\xb6\xf7\x74\x6e\xc4\xba\x6d\x6c\x4d\x93\x31"
+ "\x2b\xa7\xa4\xb3\x47\x8f\x77\x04\xb5\xa5\xab\xca\x6b\x5a\xe2\x86"
+ "\x02\x60\xca\xb4\xd7\x5e\xe0\x0f\x73\xdd\xa2\x38\x7c\xae\x0f\x5a"
+ "\x1a\xd7\xfd\xb6\xc8\x6f\xdd\xe0\x98\xd5\x07\xea\x1f\x2a\xbb\x9e"
+ "\xef\x01\x24\x04\xee\xf5\x89\xb1\x12\x26\x54\x95\xef\xcb\x84\xe9"
+ "\xae\x05\xef\x63\x25\x15\x65\x79\x79\x79\x91\xc3\x76\x72\xb4\x85"
+ "\x86\xd9\xd3\x03\xb0\xff\x04\x96\x05\x3c\xde\xbf\x47\x34\x76\x70"
+ "\x17\xd2\x24\x83\xb9\xbb\xcf\x70\x7c\xb8\xc6\x7b\x4e\x01\x86\x36"
+ "\xc7\xc5\xe5\x8b\x7c\x69\x74\x9a\xfe\x1f\x58\x85\x0f\x00\xf8\x4e"
+ "\xf1\x56\xdc\xd1\x11\x28\x2c\xcf\x6c\xb9\xc9\x57\x17\x2e\x19\x19"
+ "\x55\xb3\x4c\xd8\xfb\xe7\x6f\x70\x63\xf9\x53\x45\xdd\xd5\x62\x95"
+ "\xd3\x7d\x7e\xa0\x00\x1a\x62\x9f\x96\x0a\x5d\x0a\x25\x02\xbb\xff"
+ "\x5a\xe8\x9e\x5a\x66\x08\x93\xbc\x92\xaf\xd2\x28\x04\x97\xc1\x54"
+ "\xfe\xcc\x0a\x25\xa2\xf4\x1d\x5a\x9a\xb1\x3e\x9c\xba\x78\xe2\xcf"
+ "\x71\x70\xe3\x40\xea\xba\x69\x9b\x03\xdd\x99\x26\x09\x84\x9d\x69"
+ "\x4d\x3d\x0b\xe9\x3f\x51\xcd\x05\xe5\x00\xaf\x2c\xd3\xf6\xc0\x68"
+ "\xb5\x23\x53\x33\x14\xbd\x39\x1c\xbd\x1b\xe6\x72\x90\xcc\xc2\x86"
+ "\x1a\x42\x83\x55\xb3\xed\x0b\x62\x6d\x0e\xbb\x9e\x2a\x42\x32\x05"
+ "\x3f\xf2\x2c\xc8\x9f\x3c\xd2\xb1\x0b\xb6\x4c\xa0\x22\x36\xee\xb9"
+ "\x55\x23\x3e\x80\xc7\x28\x7c\x39\x11\xd3\x4a\x96\x2e\xef\x52\x34"
+ "\xf2\xda\xb1\xc6\xf5\x02\x10\xbf\x56\x6b\x50\x56\xcd\x2c\xfe\xe1"
+ "\x94\x14\x19\x24\x6e\x9a\xdf\x0c\xb8\xe2\xb8\xd5\xa3\xc1\x22\x8e"
+ "\x84\x92\x00\x16\xf1\x3f\x83\xf6\x36\x31\xa5\x38\xc6\xcf\xf8\x9b"
+ "\x03\xc7\x6f\xb9\xa1\x04\xdf\x20\x0f\x0b\x0f\x70\xff\x57\x36\x7f"
+ "\xb3\x6b\xcb\x8f\x48\xf7\xb2\xdb\x85\x05\xd1\xfe\x34\x05\xf6\x57"
+ "\xb4\x5b\xcc\x3f\x0e\xba\x36\x59\xb0\xfd\x4d\xf6\xf4\x5e\xd2\x65"
+ "\x1d\x98\x87\xb4\x5e\xff\x29\xaa\x84\x9b\x44\x0f\x06\x36\x61\xbd"
+ "\xdb\x51\xda\x56\xc2\xd6\x19\xe2\x57\x4f\xd0\x29\x71\xc8\xe4\xd6"
+ "\xfb\x8c\xd0\xfc\x4f\x25\x09\xa6\xfc\x67\xe2\xb8\xac\xd3\x88\x8f"
+ "\x1f\xf6\xa1\xe3\x45\xa6\x34\xe3\xb1\x6b\xb7\x37\x0e\x06\xc7\x63"
+ "\xde\xac\x3b\xac\x07\x91\x64\xcc\x12\x10\x46\x85\x14\x0b\x6b\x03"
+ "\xba\x4a\x85\xae\xc5\x8c\xa5\x9d\x36\x38\x33\xca\x42\x9c\x4b\x0c"
+ "\x46\xe1\x77\xe9\x1f\x80\xfe\xb7\x1d\x5a\xf4\xc6\x11\x26\x78\xea"
+ "\x81\x25\x77\x47\xed\x8b\x59\xc2\x6b\x49\xff\x83\x56\xec\xa5\xf0"
+ "\xe0\x8b\x15\xd4\x99\x40\x2a\x65\x2a\x98\xf4\x71\x35\x63\x84\x08"
+ "\x4d\xcd\x71\x85\x55\xbc\xa4\x1c\x90\x93\x03\x41\xde\xed\x78\x62"
+ "\x07\x30\x50\xac\x60\x21\x06\xc3\xab\xa4\x04\xc0\xc2\x32\x07\xc4"
+ "\x1f\x2f\xec\xe2\x32\xbf\xbe\x5e\x50\x5b\x2a\x19\x71\x44\x37\x76"
+ "\x8b\xbc\xdb\x73\x98\x65\x78\xc9\x33\x97\x7e\xdc\x60\xa8\x87\xf2"
+ "\xb5\x96\x55\x7f\x44\x07\xcb\x3b\xf3\xd7\x82\xfd\x77\x21\x82\x21"
+ "\x1a\x8b\xa2\xf5\x1f\x66\xd0\x57\x00\x4f\xa9\xa5\x33\xb8\x69\x91"
+ "\xe8\x2e\xf7\x73\x47\x89\x30\x9b\xb1\xfd\xe1\x5d\x11\xfd\x84\xd9"
+ "\xa2\x91\x1f\x8a\xa7\x7a\x77\x8e\x3b\x10\x1d\x0a\x59\x50\x34\xb0"
+ "\xc3\x90\x9f\x56\xb7\x43\xeb\x51\x99\x2b\x8e\x6d\x7b\x58\xe7\xc0"
+ "\x7f\x3d\xa0\x27\x50\xf2\x6e\xc8\x1e\x7f\x84\xb3\xe1\xf7\x09\x85"
+ "\xd2\x9b\x56\x6b\xba\xa5\x19\x2e\xec\xd8\x5c\xf5\x4e\x43\x36\x2e"
+ "\x89\x85\x41\x7f\x9c\x91\x2e\x62\xc3\x41\xcf\x0e\xa1\x7f\xeb\x50",
+ .secret_size = 784,
+ .b_public_size = 768,
+ .expected_a_public_size = 768,
+ .expected_ss_size = 768,
+ },
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x00" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x00\x10" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#endif
+ .b_secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x03" /* len */
+ "\x00\x03\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x03\x10" /* len */
+ "\x00\x00\x03\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+ "\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+ "\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+ "\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+ "\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+ "\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+ "\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+ "\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+ "\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+ "\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+ "\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+ "\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+ "\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+ "\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+ "\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+ "\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+ "\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+ "\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+ "\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+ "\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+ "\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+ "\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+ "\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+ "\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+ "\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+ "\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+ "\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+ "\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+ "\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+ "\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+ "\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+ "\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+ "\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+ "\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+ "\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+ "\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+ "\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+ "\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+ "\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+ "\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+ "\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+ "\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+ "\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+ "\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+ "\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+ "\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+ "\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+ "\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+ .b_public =
+ "\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+ "\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+ "\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+ "\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+ "\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+ "\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+ "\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+ "\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+ "\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+ "\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+ "\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+ "\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+ "\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+ "\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+ "\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+ "\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+ "\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+ "\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+ "\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+ "\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+ "\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+ "\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+ "\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+ "\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+ "\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+ "\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+ "\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+ "\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+ "\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+ "\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+ "\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+ "\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+ "\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+ "\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+ "\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+ "\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+ "\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+ "\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+ "\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+ "\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+ "\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+ "\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+ "\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+ "\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+ "\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+ "\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+ "\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+ "\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+ .secret_size = 16,
+ .b_secret_size = 784,
+ .b_public_size = 768,
+ .expected_a_public_size = 768,
+ .expected_ss_size = 768,
+ .genkey = true,
+ },
+};
+
+static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x04" /* len */
+ "\x00\x04\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x04\x10" /* len */
+ "\x00\x00\x04\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+ "\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+ "\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+ "\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+ "\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+ "\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+ "\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+ "\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+ "\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+ "\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+ "\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+ "\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+ "\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+ "\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+ "\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+ "\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+ "\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+ "\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+ "\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+ "\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+ "\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+ "\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+ "\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+ "\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+ "\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+ "\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+ "\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+ "\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+ "\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+ "\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+ "\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+ "\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+ "\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+ "\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+ "\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+ "\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+ "\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+ "\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+ "\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+ "\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+ "\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+ "\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+ "\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+ "\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+ "\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+ "\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+ "\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+ "\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+ "\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+ "\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+ "\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+ "\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+ "\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+ "\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+ "\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+ "\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+ "\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+ "\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+ "\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+ "\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+ "\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+ "\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+ "\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+ "\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+ .b_public =
+ "\x26\xa8\x3a\x97\xe0\x52\x76\x07\x26\xa7\xbb\x21\xfd\xe5\x69\xde"
+ "\xe6\xe0\xb5\xa0\xf1\xaa\x51\x2b\x56\x1c\x3c\x6c\xe5\x9f\x8f\x75"
+ "\x71\x04\x86\xf6\x43\x2f\x20\x7f\x45\x4f\x5c\xb9\xf3\x90\xbe\xa9"
+ "\xa0\xd7\xe8\x03\x0e\xfe\x99\x9b\x8a\x1c\xbe\xa7\x63\xe8\x2b\x45"
+ "\xd4\x2c\x65\x25\x4c\x33\xda\xc5\x85\x77\x5d\x62\xea\x93\xe4\x45"
+ "\x59\xff\xa1\xd2\xf1\x73\x11\xed\x02\x64\x8a\x1a\xfb\xe1\x88\xa6"
+ "\x50\x6f\xff\x87\x12\xbb\xfc\x10\xcf\x19\x41\xb0\x35\x44\x7d\x51"
+ "\xe9\xc0\x77\xf2\x73\x21\x2e\x62\xbf\x65\xa5\xd1\x3b\xb1\x3e\x19"
+ "\x75\x4b\xb7\x8e\x03\xc3\xdf\xc8\xb2\xe6\xec\x2d\x7d\xa5\x6a\xba"
+ "\x93\x47\x50\xeb\x6e\xdb\x88\x05\x45\xad\x03\x8c\xf7\x9a\xe1\xc9"
+ "\x1e\x16\x96\x37\xa5\x3e\xe9\xb9\xa8\xdc\xb9\xa9\xf6\xa1\x3d\xed"
+ "\xbe\x12\x29\x8a\x3d\x3d\x90\xfc\x94\xfe\x66\x28\x1c\x1b\xa4\x89"
+ "\x47\x66\x4f\xac\x14\x00\x22\x2d\x5c\x03\xea\x71\x4d\x19\x7d\xd6"
+ "\x58\x39\x4c\x3d\x06\x2b\x30\xa6\xdc\x2c\x8d\xd1\xde\x79\x77\xfa"
+ "\x9c\x6b\x72\x11\x8a\x7f\x7d\x37\x28\x2a\x88\xbf\x0a\xdb\xac\x3b"
+ "\xc5\xa5\xd5\x7e\x25\xec\xa6\x7f\x5b\x53\x75\x83\x49\xd4\x77\xcc"
+ "\x7d\x7e\xd3\x3d\x30\x2c\x98\x3f\x18\x9a\x11\x8a\x37\xda\x99\x0f"
+ "\x3b\x06\xe1\x87\xd5\xe9\x4e\xe0\x9c\x0e\x39\x34\xe2\xdd\xf6\x58"
+ "\x60\x63\xa6\xea\xe8\xc0\xb4\xde\xdf\xa0\xbc\x21\xc3\x2d\xf4\xa4"
+ "\xc8\x6f\x62\x6c\x0f\x71\x88\xf9\xda\x2d\x30\xd5\x95\xe1\xfc\x6d"
+ "\x88\xc5\xc3\x95\x51\x83\xde\x41\x46\x6f\x7e\x1b\x10\x48\xad\x2b"
+ "\x82\x88\xa2\x6f\x57\x4d\x4a\xbd\x90\xc8\x06\x8f\x52\x5d\x6e\xee"
+ "\x09\xe6\xa3\xcb\x30\x9c\x14\xf6\xac\x66\x9b\x81\x0a\x75\x42\x6b"
+ "\xab\x27\xec\x76\xfb\x8d\xc5\xbf\x0e\x93\x81\x7b\x81\xd4\x85\xa6"
+ "\x90\x5a\xa6\xa2\x8b\xa9\xb7\x34\xe6\x15\x36\x93\x8b\xe2\x99\xc7"
+ "\xad\x66\x7e\xd6\x89\xa9\xc8\x15\xcb\xc5\xeb\x06\x85\xd4\x2f\x6e"
+ "\x9b\x95\x7a\x06\x6c\xfa\x31\x1d\xc4\xe5\x7d\xfb\x10\x35\x88\xc2"
+ "\xbe\x1c\x16\x5d\xc2\xf4\x0d\xf3\xc9\x94\xb2\x7e\xa7\xbd\x9c\x03"
+ "\x32\xaf\x8b\x1a\xc8\xcc\x82\xd8\x87\x96\x6e\x3d\xcc\x93\xd2\x43"
+ "\x73\xf9\xde\xec\x49\x49\xf4\x56\x2a\xc8\x6e\x32\x70\x48\xf8\x70"
+ "\xa3\x96\x31\xf4\xf2\x08\xc5\x12\xd2\xeb\xb6\xea\xa3\x07\x05\x61"
+ "\x74\xa3\x04\x2f\x17\x82\x40\x5e\x4c\xd1\x51\xb8\x10\x5b\xc8\x9f"
+ "\x87\x73\x80\x0d\x6f\xc6\xb9\xf6\x7c\x31\x0a\xcc\xd9\x03\x0f\x7a"
+ "\x47\x69\xb1\x55\xab\xe9\xb5\x75\x62\x9e\x95\xbe\x7b\xa9\x53\x6e"
+ "\x28\x73\xdc\xb3\xa4\x8a\x1c\x91\xf5\x8a\xf9\x32\x2b\xbd\xa5\xdc"
+ "\x07\xb5\xaf\x49\xdb\x9c\x35\xc9\x69\xde\xac\xb1\xd0\x86\xcb\x31"
+ "\x0b\xc4\x4f\x63\x4e\x70\xa7\x80\xe3\xbc\x0b\x73\x0e\xf2\x8c\x87"
+ "\x88\x7b\xa9\x6d\xde\x8a\x73\x14\xb9\x80\x55\x03\x2b\x29\x64\x6a"
+ "\xda\x48\x0e\x78\x07\x40\x48\x46\x58\xa9\x4e\x68\x1d\xd1\xc1\xc8"
+ "\x3b\x35\x53\x61\xd5\xe3\x0d\x4c\x42\x74\x10\x67\x85\x9f\x66\x2a"
+ "\xf7\x2b\x7b\x77\x8b\x6e\xda\x2c\xc1\x5a\x20\x34\x3f\xf5\x8b\x6f"
+ "\xe4\x61\xf5\x58\xab\x72\x1a\xf1\x8d\x28\xcc\xa5\x30\x68\xb5\x50"
+ "\x7b\x81\x43\x89\x8e\xa9\xac\x63\x3a\x4a\x78\x7b\xd2\x45\xe6\xe0"
+ "\xdc\x5d\xf2\x1a\x2b\x54\x50\xa5\x9d\xf6\xe7\x9f\x25\xaf\x56\x6a"
+ "\x84\x2a\x75\xa3\x9a\xc7\xfa\x94\xec\x83\xab\xa5\xaa\xe1\xf9\x89"
+ "\x29\xa9\xf6\x53\x24\x24\xae\x4a\xe8\xbc\xe8\x9e\x5c\xd7\x54\x7c"
+ "\x65\x20\x97\x28\x94\x76\xf9\x9e\x81\xcf\x98\x6a\x3a\x7b\xec\xf3"
+ "\x09\x60\x2e\x43\x18\xb5\xf6\x8c\x44\x0f\xf2\x0a\x17\x5b\xac\x98"
+ "\x30\xab\x6e\xd5\xb3\xef\x25\x68\x50\xb6\xe1\xc0\xe4\x5a\x63\x43"
+ "\xea\xca\xda\x23\xc1\xc2\xe9\x30\xec\xb3\x9f\xbf\x1f\x09\x76\xaf"
+ "\x65\xbc\xb5\xab\x30\xac\x0b\x05\xef\x5c\xa3\x65\x77\x33\x1c\xc5"
+ "\xdf\xc9\x39\xab\xca\xf4\x3b\x88\x25\x6d\x50\x87\xb1\x79\xc2\x23"
+ "\x9d\xb5\x21\x01\xaa\xa3\xb7\x61\xa3\x48\x91\x72\x3d\x54\x85\x86"
+ "\x91\x81\x35\x78\xbf\x8f\x27\x57\xcb\x9b\x34\xab\x63\x40\xf1\xbc"
+ "\x23\x5a\x26\x6a\xba\x57\xe2\x8f\x2a\xdc\x82\xe0\x3b\x7f\xec\xd3"
+ "\xd8\x9d\xd3\x13\x54\x70\x64\xc3\xfd\xbf\xa3\x46\xa7\x53\x42\x7f"
+ "\xc1\xbd\x7b\xb3\x13\x47\x2a\x45\x1e\x76\x2c\x0d\x6d\x46\x26\x24"
+ "\xa8\xc7\x00\x2b\x10\x7f\x2a\x6c\xfc\x68\x4e\x6e\x85\x53\x00\xaf"
+ "\xd5\xfb\x59\x64\xc7\x9b\x24\xd1\x05\xdc\x34\x53\x6d\x27\xa9\x79"
+ "\xff\xd7\x5e\x7a\x40\x81\x8e\xc3\xf2\x38\xc9\x8d\x87\xb5\x38\xda"
+ "\x43\x64\x1b\x59\x62\x88\xc1\x6e\x85\x84\x33\xcd\x6d\x7b\x62\x1d"
+ "\x60\xf9\x98\xf7\xd1\xb1\xd4\xbe\x56\x6e\xa8\x6f\xff\xe7\x8b\x60"
+ "\x53\x80\xc7\x7c\xe0\x78\x89\xa9\xab\x42\x8f\x8e\x4d\x92\xac\xa7"
+ "\xfd\x47\x11\xc7\xdb\x7c\x77\xfb\xa4\x1d\x70\xaf\x56\x14\x52\xb0",
+ .expected_a_public =
+ "\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+ "\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+ "\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+ "\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+ "\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+ "\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+ "\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+ "\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+ "\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+ "\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+ "\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+ "\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+ "\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+ "\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+ "\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+ "\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+ "\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+ "\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+ "\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+ "\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+ "\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+ "\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+ "\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+ "\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+ "\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+ "\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+ "\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+ "\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+ "\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+ "\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+ "\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+ "\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+ "\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+ "\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+ "\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+ "\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+ "\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+ "\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+ "\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+ "\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+ "\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+ "\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+ "\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+ "\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+ "\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+ "\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+ "\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+ "\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+ "\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+ "\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+ "\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+ "\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+ "\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+ "\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+ "\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+ "\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+ "\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+ "\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+ "\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+ "\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+ "\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+ "\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+ "\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+ "\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+ .expected_ss =
+ "\xf9\x55\x4f\x48\x38\x74\xb7\x46\xa3\xc4\x2e\x88\xf0\x34\xab\x1d"
+ "\xcd\xa5\x58\xa7\x95\x88\x36\x62\x6f\x8a\xbd\xf2\xfb\x6f\x3e\xb9"
+ "\x91\x65\x58\xef\x70\x2f\xd5\xc2\x97\x70\xcb\xce\x8b\x78\x1c\xe0"
+ "\xb9\xfa\x77\x34\xd2\x4a\x19\x58\x11\xfd\x93\x84\x40\xc0\x8c\x19"
+ "\x8b\x98\x50\x83\xba\xfb\xe2\xad\x8b\x81\x84\x63\x90\x41\x4b\xf8"
+ "\xe8\x78\x86\x04\x09\x8d\x84\xd1\x43\xfd\xa3\x58\x21\x2a\x3b\xb1"
+ "\xa2\x5b\x48\x74\x3c\xa9\x16\x34\x28\xf0\x8e\xde\xe2\xcf\x8e\x68"
+ "\x53\xab\x65\x06\xb7\x86\xb1\x08\x4f\x73\x97\x00\x10\x95\xd1\x84"
+ "\x72\xcf\x14\xdb\xff\xa7\x80\xd8\xe5\xf2\x2c\x89\x37\xb0\x81\x2c"
+ "\xf5\xd6\x7d\x1b\xb0\xe2\x8e\x87\x32\x3d\x37\x6a\x79\xaa\xe7\x08"
+ "\xc9\x67\x55\x5f\x1c\xae\xa6\xf5\xef\x79\x3a\xaf\x3f\x82\x14\xe2"
+ "\xf3\x69\x91\xed\xb7\x9e\xc9\xde\xd0\x29\x70\xd9\xeb\x0f\xf5\xc7"
+ "\xf6\x7c\xa7\x7f\xec\xed\xe1\xbd\x13\xe1\x43\xe4\x42\x30\xe3\x5f"
+ "\xe0\xf3\x15\x55\x2f\x7a\x42\x17\x67\xcb\xc2\x4f\xd0\x85\xfc\x6c"
+ "\xec\xe8\xfc\x25\x78\x4b\xe4\x0f\xd4\x3d\x78\x28\xd3\x53\x79\xcb"
+ "\x2c\x82\x67\x9a\xdc\x32\x55\xd2\xda\xae\xd8\x61\xce\xd6\x59\x0b"
+ "\xc5\x44\xeb\x08\x81\x8c\x65\xb2\xb7\xa6\xff\xf7\xbf\x99\xc6\x8a"
+ "\xbe\xde\xc2\x17\x56\x05\x6e\xd2\xf1\x1e\xa2\x04\xeb\x02\x74\xaa"
+ "\x04\xfc\xf0\x6b\xd4\xfc\xf0\x7a\x5f\xfe\xe2\x74\x7f\xeb\x9b\x6a"
+ "\x8a\x09\x96\x5d\xe1\x91\xb6\x9e\x37\xd7\x63\xd7\xb3\x5c\xb5\xa3"
+ "\x5f\x62\x00\xdf\xc5\xbf\x85\xba\xa7\xa9\xb6\x1f\x76\x78\x65\x01"
+ "\xfe\x1d\x6c\xfe\x15\x9e\xf4\xb1\xbc\x8d\xad\x3c\xec\x69\x27\x57"
+ "\xa4\x89\x77\x46\xe1\x49\xc7\x22\xde\x79\xe0\xf7\x3a\xa1\x59\x8b"
+ "\x59\x71\xcc\xd6\x18\x24\xc1\x8a\x2f\xe3\xdf\xdd\x6c\xf7\x62\xaa"
+ "\x15\xaa\x39\x37\x3b\xaf\x7d\x6e\x88\xeb\x19\xa8\xa0\x26\xd3\xaa"
+ "\x2d\xcc\x5f\x56\x99\x86\xa9\xed\x4d\x02\x31\x40\x97\x70\x83\xa7"
+ "\x08\x98\x7e\x49\x46\xd9\x75\xb5\x7a\x6a\x40\x69\xa0\x6d\xb2\x18"
+ "\xc0\xad\x88\x05\x02\x95\x6f\xf7\x8f\xcb\xa2\xe4\x7b\xab\x4a\x0f"
+ "\x9a\x1b\xef\xcc\xd1\x6a\x5d\x1e\x6a\x2a\x8b\x5b\x80\xbc\x5f\x38"
+ "\xdd\xaf\xad\x44\x15\xb4\xaf\x26\x1c\x1a\x4d\xa7\x4b\xec\x88\x33"
+ "\x24\x42\xb5\x0c\x9c\x56\xd4\xba\xa7\xb9\x65\xd5\x76\xb2\xbc\x16"
+ "\x8e\xfa\x0c\x7a\xc0\xa2\x2c\x5a\x39\x56\x7d\xe6\xf8\xa9\xf4\x49"
+ "\xd0\x50\xf2\x5e\x4b\x0a\x43\xe4\x9a\xbb\xea\x35\x28\x99\x84\x83"
+ "\xec\xc1\xa0\x68\x15\x9a\x2b\x01\x04\x48\x09\x11\x1b\xb6\xa4\xd8"
+ "\x03\xad\xb6\x4c\x9e\x1d\x90\xae\x88\x0f\x75\x95\x25\xa0\x27\x13"
+ "\xb7\x4f\xe2\x3e\xd5\x59\x1a\x7c\xde\x95\x14\x28\xd1\xde\x84\xe4"
+ "\x07\x7c\x5b\x06\xd6\xe6\x9c\x8a\xbe\xd2\xb4\x62\xd1\x67\x8a\x9c"
+ "\xac\x4f\xfa\x70\xd6\xc8\xc0\xeb\x5e\xf6\x3e\xdc\x48\x8e\xce\x3f"
+ "\x92\x3e\x60\x77\x63\x60\x6b\x76\x04\xa5\xba\xc9\xab\x92\x4e\x0d"
+ "\xdc\xca\x82\x44\x5f\x3a\x42\xeb\x01\xe7\xe0\x33\xb3\x32\xaf\x4b"
+ "\x81\x35\x2d\xb6\x57\x15\xfe\x52\xc7\x54\x2e\x41\x3b\x22\x6b\x12"
+ "\x72\xdb\x5c\x66\xd0\xb6\xb4\xfe\x90\xc0\x20\x34\x95\xf9\xe4\xc7"
+ "\x7e\x71\x89\x4f\x6f\xfb\x2a\xf3\xdf\x3f\xe3\xcf\x0e\x1a\xd9\xf2"
+ "\xc1\x02\x67\x5d\xdc\xf1\x7d\xe8\xcf\x64\x77\x4d\x12\x03\x77\x2c"
+ "\xfb\xe1\x59\xf7\x2c\x96\x9c\xaf\x46\x9c\xc7\x67\xcf\xee\x94\x50"
+ "\xc7\xa1\x23\xe6\x9f\x4d\x73\x92\xad\xf9\x4a\xce\xdb\x44\xd5\xe3"
+ "\x17\x05\x37\xdb\x9c\x6c\xc5\x7e\xb7\xd4\x11\x4a\x8c\x51\x03\xaa"
+ "\x73\x4b\x16\xd9\x79\xf5\xf1\x67\x20\x9b\x25\xe5\x41\x52\x59\x06"
+ "\x8b\xf2\x23\x2f\x6e\xea\xf3\x24\x0a\x94\xbb\xb8\x7e\xd9\x23\x4a"
+ "\x9f\x1f\xe1\x13\xb5\xfe\x85\x2f\x4c\xbe\x6a\x66\x02\x1d\x90\xd2"
+ "\x01\x25\x8a\xfd\x78\x3a\x28\xb8\x18\xc1\x38\x16\x21\x6b\xb4\xf9"
+ "\x64\x0f\xf1\x73\xc4\x5c\xd1\x41\xf2\xfe\xe7\x26\xad\x79\x12\x75"
+ "\x49\x48\xdb\x21\x71\x35\xf7\xb7\x46\x5a\xa1\x81\x25\x47\x31\xea"
+ "\x1d\x76\xbb\x32\x5a\x90\xb0\x42\x1a\x47\xe8\x0c\x82\x92\x43\x1c"
+ "\x0b\xdd\xe5\x25\xce\xd3\x06\xcc\x59\x5a\xc9\xa0\x01\xac\x29\x12"
+ "\x31\x2e\x3d\x1a\xed\x3b\xf3\xa7\xef\x52\xc2\x0d\x18\x1f\x03\x28"
+ "\xc9\x2b\x38\x61\xa4\x01\xc9\x3c\x11\x08\x14\xd4\xe5\x31\xe9\x3c"
+ "\x1d\xad\xf8\x76\xc4\x84\x9f\xea\x16\x61\x3d\x6d\xa3\x32\x31\xcd"
+ "\x1c\xca\xb8\x74\xc2\x45\xf3\x01\x9c\x7a\xaf\xfd\xe7\x1e\x5a\x18"
+ "\xb1\x9d\xbb\x7a\x2d\x34\x40\x17\x49\xad\x1f\xeb\x2d\xa2\x26\xb8"
+ "\x16\x28\x4b\x72\xdd\xd0\x8d\x85\x4c\xdd\xf8\x57\x48\xd5\x1d\xfb"
+ "\xbd\xec\x11\x5d\x1e\x9c\x26\x81\xbf\xf1\x16\x12\x32\xc3\xf3\x07"
+ "\x0e\x6e\x7f\x17\xec\xfb\xf4\x5d\xe2\xb1\xca\x97\xca\x46\x20\x2d"
+ "\x09\x85\x19\x25\x89\xa8\x9b\x51\x74\xae\xc9\x1b\x4c\xb6\x80\x62",
+ .secret_size = 1040,
+ .b_public_size = 1024,
+ .expected_a_public_size = 1024,
+ .expected_ss_size = 1024,
+ },
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x00" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x00\x10" /* len */
+ "\x00\x00\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00", /* g_size */
+#endif
+ .b_secret =
+#ifdef __LITTLE_ENDIAN
+ "\x01\x00" /* type */
+ "\x10\x04" /* len */
+ "\x00\x04\x00\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#else
+ "\x00\x01" /* type */
+ "\x04\x10" /* len */
+ "\x00\x00\x04\x00" /* key_size */
+ "\x00\x00\x00\x00" /* p_size */
+ "\x00\x00\x00\x00" /* g_size */
+#endif
+ /* xa */
+ "\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+ "\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+ "\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+ "\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+ "\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+ "\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+ "\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+ "\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+ "\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+ "\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+ "\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+ "\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+ "\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+ "\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+ "\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+ "\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+ "\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+ "\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+ "\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+ "\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+ "\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+ "\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+ "\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+ "\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+ "\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+ "\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+ "\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+ "\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+ "\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+ "\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+ "\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+ "\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+ "\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+ "\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+ "\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+ "\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+ "\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+ "\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+ "\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+ "\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+ "\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+ "\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+ "\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+ "\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+ "\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+ "\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+ "\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+ "\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+ "\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+ "\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+ "\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+ "\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+ "\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+ "\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+ "\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+ "\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+ "\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+ "\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+ "\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+ "\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+ "\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+ "\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+ "\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+ "\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+ .b_public =
+ "\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+ "\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+ "\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+ "\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+ "\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+ "\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+ "\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+ "\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+ "\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+ "\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+ "\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+ "\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+ "\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+ "\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+ "\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+ "\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+ "\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+ "\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+ "\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+ "\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+ "\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+ "\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+ "\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+ "\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+ "\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+ "\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+ "\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+ "\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+ "\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+ "\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+ "\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+ "\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+ "\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+ "\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+ "\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+ "\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+ "\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+ "\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+ "\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+ "\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+ "\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+ "\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+ "\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+ "\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+ "\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+ "\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+ "\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+ "\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+ "\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+ "\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+ "\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+ "\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+ "\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+ "\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+ "\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+ "\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+ "\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+ "\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+ "\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+ "\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+ "\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+ "\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+ "\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+ "\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+ .secret_size = 16,
+ .b_secret_size = 1040,
+ .b_public_size = 1024,
+ .expected_a_public_size = 1024,
+ .expected_ss_size = 1024,
+ .genkey = true,
+ },
+};
+
static const struct kpp_testvec curve25519_tv_template[] = {
{
.secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
@@ -5713,6 +7140,7 @@ static const struct hash_testvec hmac_sha1_tv_template[] = {
.psize = 28,
.digest = "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74"
"\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa",
.ksize = 20,
@@ -5802,6 +7230,7 @@ static const struct hash_testvec hmac_sha224_tv_template[] = {
"\x45\x69\x0f\x3a\x7e\x9e\x6d\x0f"
"\x8b\xbe\xa2\xa3\x9e\x61\x48\x00"
"\x8f\xd0\x5e\x44",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -5945,6 +7374,7 @@ static const struct hash_testvec hmac_sha256_tv_template[] = {
"\x6a\x04\x24\x26\x08\x95\x75\xc7"
"\x5a\x00\x3f\x08\x9d\x27\x39\x83"
"\x9d\xec\x58\xb9\x64\xec\x38\x43",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6443,6 +7873,7 @@ static const struct hash_testvec hmac_sha384_tv_template[] = {
"\xe4\x2e\xc3\x73\x63\x22\x44\x5e"
"\x8e\x22\x40\xca\x5e\x69\xe2\xc7"
"\x8b\x32\x39\xec\xfa\xb2\x16\x49",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6543,6 +7974,7 @@ static const struct hash_testvec hmac_sha512_tv_template[] = {
"\x6d\x03\x4f\x65\xf8\xf0\xe6\xfd"
"\xca\xea\xb1\xa3\x4d\x4a\x6b\x4b"
"\x63\x6e\x07\x0a\x38\xbc\xe7\x37",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6638,6 +8070,7 @@ static const struct hash_testvec hmac_sha3_224_tv_template[] = {
"\x1b\x79\x86\x34\xad\x38\x68\x11"
"\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b"
"\xba\xce\x5e\x66",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6725,6 +8158,7 @@ static const struct hash_testvec hmac_sha3_256_tv_template[] = {
"\x35\x96\xbb\xb0\xda\x73\xb8\x87"
"\xc9\x17\x1f\x93\x09\x5b\x29\x4a"
"\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6816,6 +8250,7 @@ static const struct hash_testvec hmac_sha3_384_tv_template[] = {
"\x3c\xa1\x35\x08\xa9\x32\x43\xce"
"\x48\xc0\x45\xdc\x00\x7f\x26\xa2"
"\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6915,6 +8350,7 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = {
"\xee\x7a\x0c\x31\xd0\x22\xa9\x5e"
"\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83"
"\x96\x02\x75\xbe\xb4\xe6\x20\x24",
+ .fips_skip = 1,
}, {
.key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
diff --git a/crypto/xts.c b/crypto/xts.c
index 6c12f30dbdd6..63c85b9e64e0 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -466,3 +466,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("XTS block cipher mode");
MODULE_ALIAS_CRYPTO("xts");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_SOFTDEP("pre: ecb");
diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
index 580ec796816b..78ca4987e619 100644
--- a/drivers/accessibility/speakup/speakup_dectlk.c
+++ b/drivers/accessibility/speakup/speakup_dectlk.c
@@ -44,6 +44,7 @@ static struct var_t vars[] = {
{ CAPS_START, .u.s = {"[:dv ap 160] " } },
{ CAPS_STOP, .u.s = {"[:dv ap 100 ] " } },
{ RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } },
+ { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } },
{ INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
{ VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } },
{ PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } },
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index ba45541b1f1f..1e34f846508f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -11,6 +11,7 @@ menuconfig ACPI
depends on ARCH_SUPPORTS_ACPI
select PNP
select NLS
+ select CRC32
default y if X86
help
Advanced Configuration and Power Interface (ACPI) support for
@@ -301,7 +302,7 @@ config ACPI_IPMI
help
This driver enables the ACPI to access the BMC controller. And it
uses the IPMI request/response message to communicate with BMC
- controller, which can be found on on the server.
+ controller, which can be found on the server.
To compile this driver as a module, choose M here:
the module will be called as acpi_ipmi.
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bb757148e7ba..b5a8d3e00a52 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -81,6 +81,9 @@ obj-$(CONFIG_ACPI_AC) += ac.o
obj-$(CONFIG_ACPI_BUTTON) += button.o
obj-$(CONFIG_ACPI_TINY_POWER_BUTTON) += tiny-power-button.o
obj-$(CONFIG_ACPI_FAN) += fan.o
+fan-objs := fan_core.o
+fan-objs += fan_attr.o
+
obj-$(CONFIG_ACPI_VIDEO) += video.o
obj-$(CONFIG_ACPI_TAD) += acpi_tad.o
obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index bcae0f03572b..fbe0756259c5 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -21,6 +21,7 @@
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/pwm.h>
+#include <linux/pxa2xx_ssp.h>
#include <linux/suspend.h>
#include <linux/delay.h>
@@ -82,7 +83,7 @@ struct lpss_device_desc {
const char *clk_con_id;
unsigned int prv_offset;
size_t prv_size_override;
- struct property_entry *properties;
+ const struct property_entry *properties;
void (*setup)(struct lpss_private_data *pdata);
bool resume_from_noirq;
};
@@ -219,10 +220,16 @@ static void bsw_pwm_setup(struct lpss_private_data *pdata)
pwm_add_table(bsw_pwm_lookup, ARRAY_SIZE(bsw_pwm_lookup));
}
-static const struct lpss_device_desc lpt_dev_desc = {
+static const struct property_entry lpt_spi_properties[] = {
+ PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_LPT_SSP),
+ { }
+};
+
+static const struct lpss_device_desc lpt_spi_dev_desc = {
.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR
| LPSS_SAVE_CTX,
.prv_offset = 0x800,
+ .properties = lpt_spi_properties,
};
static const struct lpss_device_desc lpt_i2c_dev_desc = {
@@ -282,9 +289,15 @@ static const struct lpss_device_desc bsw_uart_dev_desc = {
.properties = uart_properties,
};
+static const struct property_entry byt_spi_properties[] = {
+ PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BYT_SSP),
+ { }
+};
+
static const struct lpss_device_desc byt_spi_dev_desc = {
.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX,
.prv_offset = 0x400,
+ .properties = byt_spi_properties,
};
static const struct lpss_device_desc byt_sdio_dev_desc = {
@@ -305,11 +318,17 @@ static const struct lpss_device_desc bsw_i2c_dev_desc = {
.resume_from_noirq = true,
};
+static const struct property_entry bsw_spi_properties[] = {
+ PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BSW_SSP),
+ { }
+};
+
static const struct lpss_device_desc bsw_spi_dev_desc = {
.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX
| LPSS_NO_D3_DELAY,
.prv_offset = 0x400,
.setup = lpss_deassert_reset,
+ .properties = bsw_spi_properties,
};
static const struct x86_cpu_id lpss_cpu_ids[] = {
@@ -329,8 +348,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
{ "INTL9C60", LPSS_ADDR(lpss_dma_desc) },
/* Lynxpoint LPSS devices */
- { "INT33C0", LPSS_ADDR(lpt_dev_desc) },
- { "INT33C1", LPSS_ADDR(lpt_dev_desc) },
+ { "INT33C0", LPSS_ADDR(lpt_spi_dev_desc) },
+ { "INT33C1", LPSS_ADDR(lpt_spi_dev_desc) },
{ "INT33C2", LPSS_ADDR(lpt_i2c_dev_desc) },
{ "INT33C3", LPSS_ADDR(lpt_i2c_dev_desc) },
{ "INT33C4", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -356,8 +375,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
{ "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) },
/* Broadwell LPSS devices */
- { "INT3430", LPSS_ADDR(lpt_dev_desc) },
- { "INT3431", LPSS_ADDR(lpt_dev_desc) },
+ { "INT3430", LPSS_ADDR(lpt_spi_dev_desc) },
+ { "INT3431", LPSS_ADDR(lpt_spi_dev_desc) },
{ "INT3432", LPSS_ADDR(lpt_i2c_dev_desc) },
{ "INT3433", LPSS_ADDR(lpt_i2c_dev_desc) },
{ "INT3434", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -366,7 +385,7 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
{ "INT3437", },
/* Wildcat Point LPSS devices */
- { "INT3438", LPSS_ADDR(lpt_dev_desc) },
+ { "INT3438", LPSS_ADDR(lpt_spi_dev_desc) },
{ }
};
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 78d621290a35..de3cbf152dee 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -95,7 +95,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev,
* Name of the platform device will be the same as @adev's.
*/
struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
- struct property_entry *properties)
+ const struct property_entry *properties)
{
struct platform_device *pdev = NULL;
struct platform_device_info pdevinfo;
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index 915c2433463d..e7c30ce06e18 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type,
if (start_node == ACPI_ROOT_OBJECT) {
start_node = acpi_gbl_root_node;
+ if (!start_node) {
+ return_ACPI_STATUS(AE_NO_NAMESPACE);
+ }
}
/* Null child means "get first node" */
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 19e50fcbf4d6..598fd19b65fa 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -29,6 +29,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "BERT: " fmt
+#define ACPI_BERT_PRINT_MAX_LEN 1024
static int bert_disable;
@@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region,
}
pr_info_once("Error records from previous boot:\n");
-
- cper_estatus_print(KERN_INFO HW_ERR, estatus);
+ if (region_len < ACPI_BERT_PRINT_MAX_LEN)
+ cper_estatus_print(KERN_INFO HW_ERR, estatus);
+ else
+ pr_info_once("Max print length exceeded, table data is available at:\n"
+ "/sys/firmware/acpi/tables/data/BERT");
/*
* Because the boot error source is "one-time polled" type,
@@ -77,7 +81,7 @@ static int __init setup_bert_disable(char *str)
{
bert_disable = 1;
- return 0;
+ return 1;
}
__setup("bert_disable", setup_bert_disable);
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 242f3c2d5533..698d67cee052 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear);
static int __init setup_erst_disable(char *str)
{
erst_disable = 1;
- return 0;
+ return 1;
}
__setup("erst_disable", setup_erst_disable);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..d91ad378c00d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};
-static int __init ghes_init(void)
+void __init acpi_ghes_init(void)
{
int rc;
+ sdei_init();
+
if (acpi_disabled)
- return -ENODEV;
+ return;
switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}
if (ghes_disable) {
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
+ return;
}
ghes_nmi_init_cxt();
rc = platform_driver_register(&ghes_platform_driver);
if (rc)
- goto err;
+ return;
rc = apei_osc_setup();
if (rc == 0 && osc_sb_apei_support_acked)
@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
-
- return 0;
-err:
- return rc;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 0edc1ed47673..6aef1ee5e1bd 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -224,7 +224,7 @@ err:
static int __init setup_hest_disable(char *str)
{
hest_disable = HEST_DISABLED;
- return 0;
+ return 1;
}
__setup("hest_disable", setup_hest_disable);
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index 6dba187f4f2e..d4a72835f328 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -8,3 +8,13 @@ config ACPI_IORT
config ACPI_GTDT
bool
+
+config ACPI_AGDI
+ bool "Arm Generic Diagnostic Dump and Reset Device Interface"
+ depends on ARM_SDE_INTERFACE
+ help
+ Arm Generic Diagnostic Dump and Reset Device Interface (AGDI) is
+ a standard that enables issuing a non-maskable diagnostic dump and
+ reset command.
+
+ If set, the kernel parses AGDI table and listens for the command.
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 66acbe77f46e..7b9e4045659d 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ACPI_AGDI) += agdi.o
obj-$(CONFIG_ACPI_IORT) += iort.o
obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-y += dma.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
new file mode 100644
index 000000000000..4df337d545b7
--- /dev/null
+++ b/drivers/acpi/arm64/agdi.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file implements handling of
+ * Arm Generic Diagnostic Dump and Reset Interface table (AGDI)
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ */
+
+#define pr_fmt(fmt) "ACPI: AGDI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_sdei.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+struct agdi_data {
+ int sdei_event;
+};
+
+static int agdi_sdei_handler(u32 sdei_event, struct pt_regs *regs, void *arg)
+{
+ nmi_panic(regs, "Arm Generic Diagnostic Dump and Reset SDEI event issued");
+ return 0;
+}
+
+static int agdi_sdei_probe(struct platform_device *pdev,
+ struct agdi_data *adata)
+{
+ int err;
+
+ err = sdei_event_register(adata->sdei_event, agdi_sdei_handler, pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register for SDEI event %d",
+ adata->sdei_event);
+ return err;
+ }
+
+ err = sdei_event_enable(adata->sdei_event);
+ if (err) {
+ sdei_event_unregister(adata->sdei_event);
+ dev_err(&pdev->dev, "Failed to enable event %d\n",
+ adata->sdei_event);
+ return err;
+ }
+
+ return 0;
+}
+
+static int agdi_probe(struct platform_device *pdev)
+{
+ struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+
+ if (!adata)
+ return -EINVAL;
+
+ return agdi_sdei_probe(pdev, adata);
+}
+
+static int agdi_remove(struct platform_device *pdev)
+{
+ struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+ int err, i;
+
+ err = sdei_event_disable(adata->sdei_event);
+ if (err)
+ return err;
+
+ for (i = 0; i < 3; i++) {
+ err = sdei_event_unregister(adata->sdei_event);
+ if (err != -EINPROGRESS)
+ break;
+
+ schedule();
+ }
+
+ return err;
+}
+
+static struct platform_driver agdi_driver = {
+ .driver = {
+ .name = "agdi",
+ },
+ .probe = agdi_probe,
+ .remove = agdi_remove,
+};
+
+void __init acpi_agdi_init(void)
+{
+ struct acpi_table_agdi *agdi_table;
+ struct agdi_data pdata;
+ struct platform_device *pdev;
+ acpi_status status;
+
+ status = acpi_get_table(ACPI_SIG_AGDI, 0,
+ (struct acpi_table_header **) &agdi_table);
+ if (ACPI_FAILURE(status))
+ return;
+
+ if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) {
+ pr_warn("Interrupt signaling is not supported");
+ goto err_put_table;
+ }
+
+ pdata.sdei_event = agdi_table->sdei_event;
+
+ pdev = platform_device_register_data(NULL, "agdi", 0, &pdata, sizeof(pdata));
+ if (IS_ERR(pdev))
+ goto err_put_table;
+
+ if (platform_driver_register(&agdi_driver))
+ platform_device_unregister(pdev);
+
+err_put_table:
+ acpi_put_table((struct acpi_table_header *)agdi_table);
+}
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3b23fb775ac4..f2f8f05662de 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1361,9 +1361,17 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
res[0].start = pmcg->page0_base_address;
res[0].end = pmcg->page0_base_address + SZ_4K - 1;
res[0].flags = IORESOURCE_MEM;
- res[1].start = pmcg->page1_base_address;
- res[1].end = pmcg->page1_base_address + SZ_4K - 1;
- res[1].flags = IORESOURCE_MEM;
+ /*
+ * The initial version in DEN0049C lacked a way to describe register
+ * page 1, which makes it broken for most PMCG implementations; in
+ * that case, just let the driver fail gracefully if it expects to
+ * find a second memory resource.
+ */
+ if (node->revision > 0) {
+ res[1].start = pmcg->page1_base_address;
+ res[1].end = pmcg->page1_base_address + SZ_4K - 1;
+ res[1].flags = IORESOURCE_MEM;
+ }
if (pmcg->overflow_gsiv)
acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow",
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ea31ae01458b..dc208f5f5a1f 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
static const struct acpi_device_id battery_device_ids[] = {
{"PNP0C0A", 0},
+
+ /* Microsoft Surface Go 3 */
+ {"MSHW0146", 0},
+
{"", 0},
};
@@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"),
},
},
+ {
+ /* Microsoft Surface Go 3 */
+ .callback = battery_notification_delay_quirk,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
+ },
+ },
{},
};
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..3e58b613a2c4 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,6 +26,7 @@
#include <asm/mpspec.h>
#include <linux/dmi.h>
#endif
+#include <linux/acpi_agdi.h>
#include <linux/acpi_iort.h>
#include <linux/acpi_viot.h>
#include <linux/pci.h>
@@ -283,6 +284,8 @@ EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
bool osc_sb_native_usb4_support_confirmed;
EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed);
+bool osc_sb_cppc_not_supported;
+
static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
static void acpi_bus_osc_negotiate_platform_control(void)
{
@@ -332,21 +335,38 @@ static void acpi_bus_osc_negotiate_platform_control(void)
if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
return;
- kfree(context.ret.pointer);
+ capbuf_ret = context.ret.pointer;
+ if (context.ret.length <= OSC_SUPPORT_DWORD) {
+ kfree(context.ret.pointer);
+ return;
+ }
+
+#ifdef CONFIG_X86
+ if (boot_cpu_has(X86_FEATURE_HWP))
+ osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+ (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#endif
- /* Now run _OSC again with query flag clear */
+ /*
+ * Now run _OSC again with query flag clear and with the caps
+ * supported by both the OS and the platform.
+ */
capbuf[OSC_QUERY_DWORD] = 0;
+ capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
+ kfree(context.ret.pointer);
if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
return;
capbuf_ret = context.ret.pointer;
- osc_sb_apei_support_acked =
- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
- osc_pc_lpi_support_confirmed =
- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
- osc_sb_native_usb4_support_confirmed =
- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+ if (context.ret.length > OSC_SUPPORT_DWORD) {
+ osc_sb_apei_support_acked =
+ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+ osc_pc_lpi_support_confirmed =
+ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+ osc_sb_native_usb4_support_confirmed =
+ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+ }
kfree(context.ret.pointer);
}
@@ -1043,7 +1063,12 @@ struct bus_type acpi_bus_type = {
.remove = acpi_device_remove,
.uevent = acpi_device_uevent,
};
-EXPORT_SYMBOL_GPL(acpi_bus_type);
+
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data)
+{
+ return bus_for_each_dev(&acpi_bus_type, NULL, data, fn);
+}
+EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);
/* --------------------------------------------------------------------------
Initialization/Cleanup
@@ -1331,6 +1356,8 @@ static int __init acpi_init(void)
pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ acpi_ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
@@ -1339,6 +1366,7 @@ static int __init acpi_init(void)
acpi_debugger_init();
acpi_setup_sb_notify_handler();
acpi_viot_init();
+ acpi_agdi_init();
return 0;
}
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 866560cbb082..d418449194ee 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -633,8 +633,8 @@ static bool is_cppc_supported(int revision, int num_ent)
* )
*/
-#ifndef init_freq_invariance_cppc
-static inline void init_freq_invariance_cppc(void) { }
+#ifndef arch_init_invariance_cppc
+static inline void arch_init_invariance_cppc(void) { }
#endif
/**
@@ -656,6 +656,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
acpi_status status;
int ret = -EFAULT;
+ if (osc_sb_cppc_not_supported)
+ return -ENODEV;
+
/* Parse the ACPI _CPC table for this CPU. */
status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output,
ACPI_TYPE_PACKAGE);
@@ -816,7 +819,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
goto out_free;
}
- init_freq_invariance_cppc();
+ arch_init_invariance_cppc();
kfree(output.pointer);
return 0;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 0077d2c85df8..a1b871a418f8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -168,7 +168,7 @@ struct acpi_ec_query {
};
static int acpi_ec_submit_query(struct acpi_ec *ec);
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt);
+static void advance_transaction(struct acpi_ec *ec, bool interrupt);
static void acpi_ec_event_handler(struct work_struct *work);
struct acpi_ec *first_ec;
@@ -441,36 +441,35 @@ static bool acpi_ec_submit_flushable_request(struct acpi_ec *ec)
return true;
}
-static bool acpi_ec_submit_event(struct acpi_ec *ec)
+static void acpi_ec_submit_event(struct acpi_ec *ec)
{
+ /*
+ * It is safe to mask the events here, because acpi_ec_close_event()
+ * will run at least once after this.
+ */
acpi_ec_mask_events(ec);
if (!acpi_ec_event_enabled(ec))
- return false;
+ return;
- if (ec->event_state == EC_EVENT_READY) {
- ec_dbg_evt("Command(%s) submitted/blocked",
- acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
+ if (ec->event_state != EC_EVENT_READY)
+ return;
- ec->event_state = EC_EVENT_IN_PROGRESS;
- /*
- * If events_to_process is greqter than 0 at this point, the
- * while () loop in acpi_ec_event_handler() is still running
- * and incrementing events_to_process will cause it to invoke
- * acpi_ec_submit_query() once more, so it is not necessary to
- * queue up the event work to start the same loop again.
- */
- if (ec->events_to_process++ > 0)
- return true;
-
- ec->events_in_progress++;
- return queue_work(ec_wq, &ec->work);
- }
+ ec_dbg_evt("Command(%s) submitted/blocked",
+ acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
+ ec->event_state = EC_EVENT_IN_PROGRESS;
/*
- * The event handling work has not been completed yet, so it needs to be
- * flushed.
+ * If events_to_process is greater than 0 at this point, the while ()
+ * loop in acpi_ec_event_handler() is still running and incrementing
+ * events_to_process will cause it to invoke acpi_ec_submit_query() once
+ * more, so it is not necessary to queue up the event work to start the
+ * same loop again.
*/
- return true;
+ if (ec->events_to_process++ > 0)
+ return;
+
+ ec->events_in_progress++;
+ queue_work(ec_wq, &ec->work);
}
static void acpi_ec_complete_event(struct acpi_ec *ec)
@@ -655,11 +654,10 @@ static void acpi_ec_spurious_interrupt(struct acpi_ec *ec, struct transaction *t
acpi_ec_mask_events(ec);
}
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
+static void advance_transaction(struct acpi_ec *ec, bool interrupt)
{
struct transaction *t = ec->curr;
bool wakeup = false;
- bool ret = false;
u8 status;
ec_dbg_stm("%s (%d)", interrupt ? "IRQ" : "TASK", smp_processor_id());
@@ -724,12 +722,10 @@ static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
out:
if (status & ACPI_EC_FLAG_SCI)
- ret = acpi_ec_submit_event(ec);
+ acpi_ec_submit_event(ec);
if (wakeup && interrupt)
wake_up(&ec->wait);
-
- return ret;
}
static void start_transaction(struct acpi_ec *ec)
@@ -1242,6 +1238,7 @@ static void acpi_ec_event_handler(struct work_struct *work)
acpi_ec_submit_query(ec);
spin_lock_irq(&ec->lock);
+
ec->events_to_process--;
}
@@ -1250,27 +1247,30 @@ static void acpi_ec_event_handler(struct work_struct *work)
* event handling work again regardless of whether or not the query
* queued up above is processed successfully.
*/
- if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT)
+ if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT) {
+ bool guard_timeout;
+
acpi_ec_complete_event(ec);
- else
- acpi_ec_close_event(ec);
- spin_unlock_irq(&ec->lock);
+ ec_dbg_evt("Event stopped");
- ec_dbg_evt("Event stopped");
+ spin_unlock_irq(&ec->lock);
+
+ guard_timeout = !!ec_guard(ec);
- if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec_guard(ec)) {
spin_lock_irq(&ec->lock);
/* Take care of SCI_EVT unless someone else is doing that. */
- if (!ec->curr)
+ if (guard_timeout && !ec->curr)
advance_transaction(ec, false);
+ } else {
+ acpi_ec_close_event(ec);
- spin_unlock_irq(&ec->lock);
+ ec_dbg_evt("Event stopped");
}
- spin_lock_irq(&ec->lock);
ec->events_in_progress--;
+
spin_unlock_irq(&ec->lock);
}
@@ -2051,6 +2051,11 @@ void acpi_ec_set_gpe_wake_mask(u8 action)
acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
}
+static bool acpi_ec_work_in_progress(struct acpi_ec *ec)
+{
+ return ec->events_in_progress + ec->queries_in_progress > 0;
+}
+
bool acpi_ec_dispatch_gpe(void)
{
bool work_in_progress = false;
@@ -2066,13 +2071,27 @@ bool acpi_ec_dispatch_gpe(void)
return true;
/*
+ * Cancel the SCI wakeup and process all pending events in case there
+ * are any wakeup ones in there.
+ *
+ * Note that if any non-EC GPEs are active at this point, the SCI will
+ * retrigger after the rearming in acpi_s2idle_wake(), so no events
+ * should be missed by canceling the wakeup here.
+ */
+ pm_system_cancel_wakeup();
+
+ /*
* Dispatch the EC GPE in-band, but do not report wakeup in any case
* to allow the caller to process events properly after that.
*/
spin_lock_irq(&first_ec->lock);
- if (acpi_ec_gpe_status_set(first_ec))
- work_in_progress = advance_transaction(first_ec, false);
+ if (acpi_ec_gpe_status_set(first_ec)) {
+ pm_pr_dbg("ACPI EC GPE status set\n");
+
+ advance_transaction(first_ec, false);
+ work_in_progress = acpi_ec_work_in_progress(first_ec);
+ }
spin_unlock_irq(&first_ec->lock);
@@ -2089,8 +2108,7 @@ bool acpi_ec_dispatch_gpe(void)
spin_lock_irq(&first_ec->lock);
- work_in_progress = first_ec->events_in_progress +
- first_ec->queries_in_progress > 0;
+ work_in_progress = acpi_ec_work_in_progress(first_ec);
spin_unlock_irq(&first_ec->lock);
} while (work_in_progress && !pm_wakeup_pending());
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index dd9bb8ca2244..44728529a5b6 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -6,9 +6,53 @@
*
* Add new device IDs before the generic ACPI fan one.
*/
+
+#ifndef _ACPI_FAN_H_
+#define _ACPI_FAN_H_
+
#define ACPI_FAN_DEVICE_IDS \
{"INT3404", }, /* Fan */ \
{"INTC1044", }, /* Fan for Tiger Lake generation */ \
{"INTC1048", }, /* Fan for Alder Lake generation */ \
{"INTC10A2", }, /* Fan for Raptor Lake generation */ \
{"PNP0C0B", } /* Generic ACPI fan */
+
+#define ACPI_FPS_NAME_LEN 20
+
+struct acpi_fan_fps {
+ u64 control;
+ u64 trip_point;
+ u64 speed;
+ u64 noise_level;
+ u64 power;
+ char name[ACPI_FPS_NAME_LEN];
+ struct device_attribute dev_attr;
+};
+
+struct acpi_fan_fif {
+ u8 revision;
+ u8 fine_grain_ctrl;
+ u8 step_size;
+ u8 low_speed_notification;
+};
+
+struct acpi_fan_fst {
+ u64 revision;
+ u64 control;
+ u64 speed;
+};
+
+struct acpi_fan {
+ bool acpi4;
+ struct acpi_fan_fif fif;
+ struct acpi_fan_fps *fps;
+ int fps_count;
+ struct thermal_cooling_device *cdev;
+ struct device_attribute fst_speed;
+ struct device_attribute fine_grain_control;
+};
+
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst);
+int acpi_fan_create_attributes(struct acpi_device *device);
+void acpi_fan_delete_attributes(struct acpi_device *device);
+#endif
diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c
new file mode 100644
index 000000000000..f15157d40713
--- /dev/null
+++ b/drivers/acpi/fan_attr.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * fan_attr.c - Create extra attributes for ACPI Fan driver
+ *
+ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2022 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include "fan.h"
+
+MODULE_LICENSE("GPL");
+
+static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
+ int count;
+
+ if (fps->control == 0xFFFFFFFF || fps->control > 100)
+ count = scnprintf(buf, PAGE_SIZE, "not-defined:");
+ else
+ count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
+
+ if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+ else
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
+
+ if (fps->speed == 0xFFFFFFFF)
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+ else
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
+
+ if (fps->noise_level == 0xFFFFFFFF)
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+ else
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
+
+ if (fps->power == 0xFFFFFFFF)
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
+ else
+ count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
+
+ return count;
+}
+
+static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+ struct acpi_fan_fst fst;
+ int status;
+
+ status = acpi_fan_get_fst(acpi_dev, &fst);
+ if (status)
+ return status;
+
+ return sprintf(buf, "%lld\n", fst.speed);
+}
+
+static ssize_t show_fine_grain_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+ struct acpi_fan *fan = acpi_driver_data(acpi_dev);
+
+ return sprintf(buf, "%d\n", fan->fif.fine_grain_ctrl);
+}
+
+int acpi_fan_create_attributes(struct acpi_device *device)
+{
+ struct acpi_fan *fan = acpi_driver_data(device);
+ int i, status;
+
+ sysfs_attr_init(&fan->fine_grain_control.attr);
+ fan->fine_grain_control.show = show_fine_grain_control;
+ fan->fine_grain_control.store = NULL;
+ fan->fine_grain_control.attr.name = "fine_grain_control";
+ fan->fine_grain_control.attr.mode = 0444;
+ status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+ if (status)
+ return status;
+
+ /* _FST is present if we are here */
+ sysfs_attr_init(&fan->fst_speed.attr);
+ fan->fst_speed.show = show_fan_speed;
+ fan->fst_speed.store = NULL;
+ fan->fst_speed.attr.name = "fan_speed_rpm";
+ fan->fst_speed.attr.mode = 0444;
+ status = sysfs_create_file(&device->dev.kobj, &fan->fst_speed.attr);
+ if (status)
+ goto rem_fine_grain_attr;
+
+ for (i = 0; i < fan->fps_count; ++i) {
+ struct acpi_fan_fps *fps = &fan->fps[i];
+
+ snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
+ sysfs_attr_init(&fps->dev_attr.attr);
+ fps->dev_attr.show = show_state;
+ fps->dev_attr.store = NULL;
+ fps->dev_attr.attr.name = fps->name;
+ fps->dev_attr.attr.mode = 0444;
+ status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
+ if (status) {
+ int j;
+
+ for (j = 0; j < i; ++j)
+ sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
+ goto rem_fst_attr;
+ }
+ }
+
+ return 0;
+
+rem_fst_attr:
+ sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+
+rem_fine_grain_attr:
+ sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+
+ return status;
+}
+
+void acpi_fan_delete_attributes(struct acpi_device *device)
+{
+ struct acpi_fan *fan = acpi_driver_data(device);
+ int i;
+
+ for (i = 0; i < fan->fps_count; ++i)
+ sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+
+ sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+ sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+}
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan_core.c
index 5cd0ceb50bc8..b9a9a59ddcc1 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan_core.c
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * acpi_fan.c - ACPI Fan Driver ($Revision: 29 $)
+ * fan_core.c - ACPI Fan core Driver
*
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2022 Intel Corporation. All rights reserved.
*/
#include <linux/kernel.h>
@@ -45,33 +46,6 @@ static const struct dev_pm_ops acpi_fan_pm = {
#define FAN_PM_OPS_PTR NULL
#endif
-#define ACPI_FPS_NAME_LEN 20
-
-struct acpi_fan_fps {
- u64 control;
- u64 trip_point;
- u64 speed;
- u64 noise_level;
- u64 power;
- char name[ACPI_FPS_NAME_LEN];
- struct device_attribute dev_attr;
-};
-
-struct acpi_fan_fif {
- u64 revision;
- u64 fine_grain_ctrl;
- u64 step_size;
- u64 low_speed_notification;
-};
-
-struct acpi_fan {
- bool acpi4;
- struct acpi_fan_fif fif;
- struct acpi_fan_fps *fps;
- int fps_count;
- struct thermal_cooling_device *cdev;
-};
-
static struct platform_driver acpi_fan_driver = {
.probe = acpi_fan_probe,
.remove = acpi_fan_remove,
@@ -89,25 +63,29 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
struct acpi_device *device = cdev->devdata;
struct acpi_fan *fan = acpi_driver_data(device);
- if (fan->acpi4)
- *state = fan->fps_count - 1;
- else
+ if (fan->acpi4) {
+ if (fan->fif.fine_grain_ctrl)
+ *state = 100 / fan->fif.step_size;
+ else
+ *state = fan->fps_count - 1;
+ } else {
*state = 1;
+ }
+
return 0;
}
-static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_fan *fan = acpi_driver_data(device);
union acpi_object *obj;
acpi_status status;
- int control, i;
+ int ret = 0;
status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
if (ACPI_FAILURE(status)) {
dev_err(&device->dev, "Get fan state failed\n");
- return status;
+ return -ENODEV;
}
obj = buffer.pointer;
@@ -115,35 +93,52 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
obj->package.count != 3 ||
obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
dev_err(&device->dev, "Invalid _FST data\n");
- status = -EINVAL;
+ ret = -EINVAL;
goto err;
}
- control = obj->package.elements[1].integer.value;
+ fst->revision = obj->package.elements[0].integer.value;
+ fst->control = obj->package.elements[1].integer.value;
+ fst->speed = obj->package.elements[2].integer.value;
+
+err:
+ kfree(obj);
+ return ret;
+}
+
+static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+{
+ struct acpi_fan *fan = acpi_driver_data(device);
+ struct acpi_fan_fst fst;
+ int status, i;
+
+ status = acpi_fan_get_fst(device, &fst);
+ if (status)
+ return status;
+
+ if (fan->fif.fine_grain_ctrl) {
+ /* This control should be same what we set using _FSL by spec */
+ if (fst.control > 100) {
+ dev_dbg(&device->dev, "Invalid control value returned\n");
+ goto match_fps;
+ }
+
+ *state = (int) fst.control / fan->fif.step_size;
+ return 0;
+ }
+
+match_fps:
for (i = 0; i < fan->fps_count; i++) {
- /*
- * When Fine Grain Control is set, return the state
- * corresponding to maximum fan->fps[i].control
- * value compared to the current speed. Here the
- * fan->fps[] is sorted array with increasing speed.
- */
- if (fan->fif.fine_grain_ctrl && control < fan->fps[i].control) {
- i = (i > 0) ? i - 1 : 0;
+ if (fst.control == fan->fps[i].control)
break;
- } else if (control == fan->fps[i].control) {
- break;
- }
}
if (i == fan->fps_count) {
dev_dbg(&device->dev, "Invalid control value returned\n");
- status = -EINVAL;
- goto err;
+ return -EINVAL;
}
*state = i;
-err:
- kfree(obj);
return status;
}
@@ -187,15 +182,30 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
{
struct acpi_fan *fan = acpi_driver_data(device);
acpi_status status;
+ u64 value = state;
+ int max_state;
- if (state >= fan->fps_count)
+ if (fan->fif.fine_grain_ctrl)
+ max_state = 100 / fan->fif.step_size;
+ else
+ max_state = fan->fps_count - 1;
+
+ if (state > max_state)
return -EINVAL;
- status = acpi_execute_simple_method(device->handle, "_FSL",
- fan->fps[state].control);
+ if (fan->fif.fine_grain_ctrl) {
+ value *= fan->fif.step_size;
+ /* Spec allows compensate the last step only */
+ if (value + fan->fif.step_size > 100)
+ value = 100;
+ } else {
+ value = fan->fps[state].control;
+ }
+
+ status = acpi_execute_simple_method(device->handle, "_FSL", value);
if (ACPI_FAILURE(status)) {
dev_dbg(&device->dev, "Failed to set state by _FSL\n");
- return status;
+ return -ENODEV;
}
return 0;
@@ -237,7 +247,8 @@ static int acpi_fan_get_fif(struct acpi_device *device)
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_fan *fan = acpi_driver_data(device);
struct acpi_buffer format = { sizeof("NNNN"), "NNNN" };
- struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif };
+ u64 fields[4];
+ struct acpi_buffer fif = { sizeof(fields), fields };
union acpi_object *obj;
acpi_status status;
@@ -258,6 +269,17 @@ static int acpi_fan_get_fif(struct acpi_device *device)
status = -EINVAL;
}
+ fan->fif.revision = fields[0];
+ fan->fif.fine_grain_ctrl = fields[1];
+ fan->fif.step_size = fields[2];
+ fan->fif.low_speed_notification = fields[3];
+
+ /* If there is a bug in step size and set as 0, change to 1 */
+ if (!fan->fif.step_size)
+ fan->fif.step_size = 1;
+ /* If step size > 9, change to 9 (by spec valid values 1-9) */
+ else if (fan->fif.step_size > 9)
+ fan->fif.step_size = 9;
err:
kfree(obj);
return status;
@@ -270,39 +292,6 @@ static int acpi_fan_speed_cmp(const void *a, const void *b)
return fps1->speed - fps2->speed;
}
-static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
- int count;
-
- if (fps->control == 0xFFFFFFFF || fps->control > 100)
- count = scnprintf(buf, PAGE_SIZE, "not-defined:");
- else
- count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
-
- if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
- count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
- else
- count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
-
- if (fps->speed == 0xFFFFFFFF)
- count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
- else
- count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
-
- if (fps->noise_level == 0xFFFFFFFF)
- count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
- else
- count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
-
- if (fps->power == 0xFFFFFFFF)
- count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
- else
- count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
-
- return count;
-}
-
static int acpi_fan_get_fps(struct acpi_device *device)
{
struct acpi_fan *fan = acpi_driver_data(device);
@@ -347,25 +336,6 @@ static int acpi_fan_get_fps(struct acpi_device *device)
sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
acpi_fan_speed_cmp, NULL);
- for (i = 0; i < fan->fps_count; ++i) {
- struct acpi_fan_fps *fps = &fan->fps[i];
-
- snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
- sysfs_attr_init(&fps->dev_attr.attr);
- fps->dev_attr.show = show_state;
- fps->dev_attr.store = NULL;
- fps->dev_attr.attr.name = fps->name;
- fps->dev_attr.attr.mode = 0444;
- status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
- if (status) {
- int j;
-
- for (j = 0; j < i; ++j)
- sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
- break;
- }
- }
-
err:
kfree(obj);
return status;
@@ -396,6 +366,10 @@ static int acpi_fan_probe(struct platform_device *pdev)
if (result)
return result;
+ result = acpi_fan_create_attributes(device);
+ if (result)
+ return result;
+
fan->acpi4 = true;
} else {
result = acpi_device_update_power(device, NULL);
@@ -437,12 +411,8 @@ static int acpi_fan_probe(struct platform_device *pdev)
return 0;
err_end:
- if (fan->acpi4) {
- int i;
-
- for (i = 0; i < fan->fps_count; ++i)
- sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
- }
+ if (fan->acpi4)
+ acpi_fan_delete_attributes(device);
return result;
}
@@ -453,10 +423,8 @@ static int acpi_fan_remove(struct platform_device *pdev)
if (fan->acpi4) {
struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
- int i;
- for (i = 0; i < fan->fps_count; ++i)
- sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+ acpi_fan_delete_attributes(device);
}
sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
sysfs_remove_link(&fan->cdev->device.kobj, "device");
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 457e11d851b8..628bf8f18130 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -96,8 +96,6 @@ void acpi_scan_table_notify(void);
extern struct list_head acpi_bus_id_list;
-#define ACPI_MAX_DEVICE_INSTANCES 4096
-
struct acpi_device_bus_id {
const char *bus_id;
struct ida instance_ida;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 45c5c0e45e33..7a70c4bfc23c 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -642,22 +642,24 @@ u64 acpi_os_get_timer(void)
(ACPI_100NSEC_PER_SEC / HZ);
}
-acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width)
+acpi_status acpi_os_read_port(acpi_io_address port, u32 *value, u32 width)
{
u32 dummy;
- if (!value)
+ if (value)
+ *value = 0;
+ else
value = &dummy;
- *value = 0;
if (width <= 8) {
- *(u8 *) value = inb(port);
+ *value = inb(port);
} else if (width <= 16) {
- *(u16 *) value = inw(port);
+ *value = inw(port);
} else if (width <= 32) {
- *(u32 *) value = inl(port);
+ *value = inl(port);
} else {
- BUG();
+ pr_debug("%s: Access width %d not supported\n", __func__, width);
+ return AE_BAD_PARAMETER;
}
return AE_OK;
@@ -674,7 +676,8 @@ acpi_status acpi_os_write_port(acpi_io_address port, u32 value, u32 width)
} else if (width <= 32) {
outl(value, port);
} else {
- BUG();
+ pr_debug("%s: Access width %d not supported\n", __func__, width);
+ return AE_BAD_PARAMETER;
}
return AE_OK;
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index d54fb8e54671..58647051c948 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -185,7 +185,7 @@ static acpi_status acpi_pci_link_check_current(struct acpi_resource *resource,
if (!p || !p->interrupt_count) {
/*
* IRQ descriptors may have no IRQ# bits set,
- * particularly those those w/ _STA disabled
+ * particularly those w/ _STA disabled
*/
pr_debug("Blank _CRS IRQ resource\n");
return AE_OK;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..6f9e75d14808 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
#include <linux/slab.h>
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h> /* for acpi_hest_init() */
-
#include "internal.h"
#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -943,7 +941,6 @@ out_release_info:
void __init acpi_pci_root_init(void)
{
- acpi_hest_init();
if (acpi_pci_disabled)
return;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 86560a28751b..32b20efff5f8 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -96,6 +96,11 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
(void *)1},
+ /* T40 can not handle C3 idle state */
+ { set_max_cstate, "IBM ThinkPad T40", {
+ DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")},
+ (void *)2},
{},
};
@@ -1075,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr,
return 0;
}
+int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+ return -EOPNOTSUPP;
+}
+
static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
{
int ret, i;
@@ -1083,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
struct acpi_device *d = NULL;
struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
+ /* make sure our architecture has support */
+ ret = acpi_processor_ffh_lpi_probe(pr->id);
+ if (ret == -EOPNOTSUPP)
+ return ret;
+
if (!osc_pc_lpi_support_confirmed)
return -EOPNOTSUPP;
@@ -1134,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
return 0;
}
-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
- return -ENODEV;
-}
-
int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
{
return -ENODEV;
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index d0986bda2964..12bbfe833609 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -541,7 +541,8 @@ acpi_device_data_of_node(const struct fwnode_handle *fwnode)
if (is_acpi_device_node(fwnode)) {
const struct acpi_device *adev = to_acpi_device_node(fwnode);
return &adev->data;
- } else if (is_acpi_data_node(fwnode)) {
+ }
+ if (is_acpi_data_node(fwnode)) {
const struct acpi_data_node *dn = to_acpi_data_node(fwnode);
return &dn->data;
}
@@ -685,7 +686,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
*/
if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
if (index)
- return -EINVAL;
+ return -ENOENT;
device = acpi_fetch_acpi_dev(obj->reference.handle);
if (!device)
@@ -739,14 +740,19 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
return -EINVAL;
}
- /* assume following integer elements are all args */
+ /*
+ * Assume the following integer elements are all args.
+ * Stop counting on the first reference or end of the
+ * package arguments. In case of neither reference,
+ * nor integer, return an error, we can't parse it.
+ */
for (i = 0; element + i < end && i < num_args; i++) {
int type = element[i].type;
+ if (type == ACPI_TYPE_LOCAL_REFERENCE)
+ break;
if (type == ACPI_TYPE_INTEGER)
nargs++;
- else if (type == ACPI_TYPE_LOCAL_REFERENCE)
- break;
else
return -EINVAL;
}
@@ -950,7 +956,7 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
if (proptype != DEV_PROP_STRING && nval > obj->package.count)
return -EOVERFLOW;
- else if (nval <= 0)
+ if (nval == 0)
return -EINVAL;
items = obj->package.elements;
@@ -1012,14 +1018,10 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
const struct list_head *head;
struct list_head *next;
- if (!child || is_acpi_device_node(child)) {
+ if ((!child || is_acpi_device_node(child)) && adev) {
struct acpi_device *child_adev;
- if (adev)
- head = &adev->children;
- else
- goto nondev;
-
+ head = &adev->children;
if (list_empty(head))
goto nondev;
@@ -1089,7 +1091,8 @@ acpi_node_get_parent(const struct fwnode_handle *fwnode)
if (is_acpi_data_node(fwnode)) {
/* All data nodes have parent pointer so just return that */
return to_acpi_data_node(fwnode)->parent;
- } else if (is_acpi_device_node(fwnode)) {
+ }
+ if (is_acpi_device_node(fwnode)) {
struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
if (dev)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1331756d4cfc..5ffd87ac42b3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -477,7 +477,8 @@ static void acpi_device_del(struct acpi_device *device)
list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node)
if (!strcmp(acpi_device_bus_id->bus_id,
acpi_device_hid(device))) {
- ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no);
+ ida_free(&acpi_device_bus_id->instance_ida,
+ device->pnp.instance_no);
if (ida_is_empty(&acpi_device_bus_id->instance_ida)) {
list_del(&acpi_device_bus_id->node);
kfree_const(acpi_device_bus_id->bus_id);
@@ -642,7 +643,7 @@ static int acpi_device_set_name(struct acpi_device *device,
struct ida *instance_ida = &acpi_device_bus_id->instance_ida;
int result;
- result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL);
+ result = ida_alloc(instance_ida, GFP_KERNEL);
if (result < 0)
return result;
@@ -1377,11 +1378,11 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
if (info->valid & ACPI_VALID_HID) {
acpi_add_id(pnp, info->hardware_id.string);
pnp->type.platform_id = 1;
- if (info->valid & ACPI_VALID_CID) {
- cid_list = &info->compatible_id_list;
- for (i = 0; i < cid_list->count; i++)
- acpi_add_id(pnp, cid_list->ids[i].string);
- }
+ }
+ if (info->valid & ACPI_VALID_CID) {
+ cid_list = &info->compatible_id_list;
+ for (i = 0; i < cid_list->count; i++)
+ acpi_add_id(pnp, cid_list->ids[i].string);
}
if (info->valid & ACPI_VALID_ADR) {
pnp->bus_address = info->address;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a60ff5dfed3a..c992e57b2c79 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -736,21 +736,15 @@ bool acpi_s2idle_wake(void)
return true;
}
- /* Check non-EC GPE wakeups and dispatch the EC GPE. */
+ /*
+ * Check non-EC GPE wakeups and if there are none, cancel the
+ * SCI-related wakeup and dispatch the EC GPE.
+ */
if (acpi_ec_dispatch_gpe()) {
pm_pr_dbg("ACPI non-EC GPE wakeup\n");
return true;
}
- /*
- * Cancel the SCI wakeup and process all pending events in case
- * there are any wakeup ones in there.
- *
- * Note that if any non-EC GPEs are active at this point, the
- * SCI will retrigger after the rearming below, so no events
- * should be missed by canceling the wakeup here.
- */
- pm_system_cancel_wakeup();
acpi_os_wait_events_complete();
/*
@@ -764,6 +758,9 @@ bool acpi_s2idle_wake(void)
return true;
}
+ pm_pr_dbg("Rearming ACPI SCI for wakeup\n");
+
+ pm_wakeup_clear(acpi_sci_irq);
rearm_wake_irq(acpi_sci_irq);
}
@@ -874,12 +871,7 @@ static inline void acpi_sleep_syscore_init(void) {}
#ifdef CONFIG_HIBERNATION
static unsigned long s4_hardware_signature;
static struct acpi_table_facs *facs;
-static int sigcheck = -1; /* Default behaviour is just to warn */
-
-void __init acpi_check_s4_hw_signature(int check)
-{
- sigcheck = check;
-}
+int acpi_check_s4_hw_signature = -1; /* Default behaviour is just to warn */
static int acpi_hibernation_begin(pm_message_t stage)
{
@@ -1004,7 +996,7 @@ static void acpi_sleep_hibernate_setup(void)
hibernation_set_ops(old_suspend_ordering ?
&acpi_hibernation_ops_old : &acpi_hibernation_ops);
sleep_states[ACPI_STATE_S4] = 1;
- if (!sigcheck)
+ if (!acpi_check_s4_hw_signature)
return;
acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
@@ -1016,7 +1008,7 @@ static void acpi_sleep_hibernate_setup(void)
*/
s4_hardware_signature = facs->hardware_signature;
- if (sigcheck > 0) {
+ if (acpi_check_s4_hw_signature > 0) {
/*
* If we're actually obeying the ACPI specification
* then the signature is written out as part of the
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 0741a4933f62..ceee808f7f2a 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -400,7 +400,7 @@ int __init_or_acpilib acpi_table_parse_entries_array(
acpi_get_table(id, instance, &table_header);
if (!table_header) {
- pr_warn("%4.4s not present\n", id);
+ pr_debug("%4.4s not present\n", id);
return -ENODEV;
}
@@ -545,7 +545,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT,
ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
- ACPI_SIG_NHLT, ACPI_SIG_AEST };
+ ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI };
#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 4f64713e9917..becc198e4c22 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -415,6 +415,81 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "GA503"),
},
},
+ /*
+ * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a
+ * working native and video interface. However the default detection
+ * mechanism first registers the video interface before unregistering
+ * it again and switching to the native interface during boot. This
+ * results in a dangling SBIOS request for backlight change for some
+ * reason, causing the backlight to switch to ~2% once per boot on the
+ * first power cord connect or disconnect event. Setting the native
+ * interface explicitly circumvents this buggy behaviour, by avoiding
+ * the unregistering process.
+ */
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xRU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xRU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xRU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xRU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xRU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xNU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xNU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ .ident = "Clevo NL5xNU",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+ },
+ },
/*
* Desktops which falsely report a backlight and which our heuristics
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index abc06e7f89d8..ed889f827f53 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -424,15 +424,11 @@ static int lps0_device_attach(struct acpi_device *adev,
mem_sleep_current = PM_SUSPEND_TO_IDLE;
/*
- * Some Intel based LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U don't
- * use intel-hid or intel-vbtn but require the EC GPE to be enabled while
- * suspended for certain wakeup devices to work, so mark it as wakeup-capable.
- *
- * Only enable on !AMD as enabling this universally causes problems for a number
- * of AMD based systems.
+ * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the
+ * EC GPE to be enabled while suspended for certain wakeup devices to
+ * work, so mark it as wakeup-capable.
*/
- if (!acpi_s2idle_vendor_amd())
- acpi_ec_mark_gpe_for_wake();
+ acpi_ec_mark_gpe_for_wake();
return 0;
}
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index ffdeed5334d6..664070fc8349 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -285,6 +285,27 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
},
{
+ /* Lenovo Yoga Tablet 1050F/L */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+ DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+ /* Partial match on beginning of BIOS version */
+ DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ },
+ {
+ /* Nextbook Ares 8 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ },
+ {
/* Whitelabel (sold as various brands) TM800A550L */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index e1a5eca3ae3c..d3bd14aaabf6 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -370,6 +370,7 @@ int amba_driver_register(struct amba_driver *drv)
return driver_register(&drv->drv);
}
+EXPORT_SYMBOL(amba_driver_register);
/**
* amba_driver_unregister - remove an AMBA device driver
@@ -383,7 +384,7 @@ void amba_driver_unregister(struct amba_driver *drv)
{
driver_unregister(&drv->drv);
}
-
+EXPORT_SYMBOL(amba_driver_unregister);
static void amba_device_release(struct device *dev)
{
@@ -642,6 +643,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
return amba_device_add(dev, parent);
}
+EXPORT_SYMBOL(amba_device_register);
/**
* amba_device_put - put an AMBA device
@@ -668,66 +670,7 @@ void amba_device_unregister(struct amba_device *dev)
{
device_unregister(&dev->dev);
}
-
-
-struct find_data {
- struct amba_device *dev;
- struct device *parent;
- const char *busid;
- unsigned int id;
- unsigned int mask;
-};
-
-static int amba_find_match(struct device *dev, void *data)
-{
- struct find_data *d = data;
- struct amba_device *pcdev = to_amba_device(dev);
- int r;
-
- r = (pcdev->periphid & d->mask) == d->id;
- if (d->parent)
- r &= d->parent == dev->parent;
- if (d->busid)
- r &= strcmp(dev_name(dev), d->busid) == 0;
-
- if (r) {
- get_device(dev);
- d->dev = pcdev;
- }
-
- return r;
-}
-
-/**
- * amba_find_device - locate an AMBA device given a bus id
- * @busid: bus id for device (or NULL)
- * @parent: parent device (or NULL)
- * @id: peripheral ID (or 0)
- * @mask: peripheral ID mask (or 0)
- *
- * Return the AMBA device corresponding to the supplied parameters.
- * If no device matches, returns NULL.
- *
- * NOTE: When a valid device is found, its refcount is
- * incremented, and must be decremented before the returned
- * reference.
- */
-struct amba_device *
-amba_find_device(const char *busid, struct device *parent, unsigned int id,
- unsigned int mask)
-{
- struct find_data data;
-
- data.dev = NULL;
- data.parent = parent;
- data.busid = busid;
- data.id = id;
- data.mask = mask;
-
- bus_for_each_dev(&amba_bustype, NULL, &data, amba_find_match);
-
- return data.dev;
-}
+EXPORT_SYMBOL(amba_device_unregister);
/**
* amba_request_regions - request all mem regions associated with device
@@ -749,6 +692,7 @@ int amba_request_regions(struct amba_device *dev, const char *name)
return ret;
}
+EXPORT_SYMBOL(amba_request_regions);
/**
* amba_release_regions - release mem regions associated with device
@@ -763,11 +707,4 @@ void amba_release_regions(struct amba_device *dev)
size = resource_size(&dev->res);
release_mem_region(dev->res.start, size);
}
-
-EXPORT_SYMBOL(amba_driver_register);
-EXPORT_SYMBOL(amba_driver_unregister);
-EXPORT_SYMBOL(amba_device_register);
-EXPORT_SYMBOL(amba_device_unregister);
-EXPORT_SYMBOL(amba_find_device);
-EXPORT_SYMBOL(amba_request_regions);
EXPORT_SYMBOL(amba_release_regions);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 67f88027680a..0c854aebfe0b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2007,6 +2007,9 @@ static bool ata_log_supported(struct ata_device *dev, u8 log)
{
struct ata_port *ap = dev->link->ap;
+ if (dev->horkage & ATA_HORKAGE_NO_LOG_DIR)
+ return false;
+
if (ata_read_log_page(dev, ATA_LOG_DIRECTORY, 0, ap->sector_buf, 1))
return false;
return get_unaligned_le16(&ap->sector_buf[log * 2]) ? true : false;
@@ -2445,23 +2448,21 @@ static void ata_dev_config_cpr(struct ata_device *dev)
struct ata_cpr_log *cpr_log = NULL;
u8 *desc, *buf = NULL;
- if (!ata_identify_page_supported(dev,
- ATA_LOG_CONCURRENT_POSITIONING_RANGES))
+ if (ata_id_major_version(dev->id) < 11 ||
+ !ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES))
goto out;
/*
- * Read IDENTIFY DEVICE data log, page 0x47
- * (concurrent positioning ranges). We can have at most 255 32B range
- * descriptors plus a 64B header.
+ * Read the concurrent positioning ranges log (0x47). We can have at
+ * most 255 32B range descriptors plus a 64B header.
*/
buf_len = (64 + 255 * 32 + 511) & ~511;
buf = kzalloc(buf_len, GFP_KERNEL);
if (!buf)
goto out;
- err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
- ATA_LOG_CONCURRENT_POSITIONING_RANGES,
- buf, buf_len >> 9);
+ err_mask = ata_read_log_page(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES,
+ 0, buf, buf_len >> 9);
if (err_mask)
goto out;
@@ -4028,6 +4029,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
/* devices that don't properly handle TRIM commands */
{ "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, },
+ { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, },
/*
* As defined, the DRAT (Deterministic Read After Trim) and RZAT
@@ -4073,6 +4075,13 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "WDC WD3000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM },
{ "WDC WD3200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM },
+ /*
+ * This sata dom device goes on a walkabout when the ATA_LOG_DIRECTORY
+ * log page is accessed. Ensure we never ask for this log page with
+ * these devices.
+ */
+ { "SATADOM-ML 3ME", NULL, ATA_HORKAGE_NO_LOG_DIR },
+
/* End Marker */
{ }
};
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 7abc7e04f656..6fa4a2faf49c 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -920,6 +920,20 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
pci_write_config_byte(dev, 0x5a, irqmask);
/*
+ * HPT371 chips physically have only one channel, the secondary one,
+ * but the primary channel registers do exist! Go figure...
+ * So, we manually disable the non-existing channel here
+ * (if the BIOS hasn't done this already).
+ */
+ if (dev->device == PCI_DEVICE_ID_TTI_HPT371) {
+ u8 mcr1;
+
+ pci_read_config_byte(dev, 0x50, &mcr1);
+ mcr1 &= ~0x04;
+ pci_write_config_byte(dev, 0x50, mcr1);
+ }
+
+ /*
* default to pci clock. make sure MA15/16 are set to output
* to prevent drives having problems with 40-pin cables. Needed
* for some drives such as IBM-DTLA which will not enter ready
@@ -950,14 +964,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if ((freq >> 12) != 0xABCDE) {
int i;
- u8 sr;
+ u16 sr;
u32 total = 0;
dev_warn(&dev->dev, "BIOS has not set timing clocks\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
- pci_read_config_byte(dev, 0x78, &sr);
+ pci_read_config_word(dev, 0x78, &sr);
total += sr & 0x1FF;
udelay(15);
}
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 028329428b75..87c7c90676ca 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -128,6 +128,8 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res,
ap = host->ports[0];
ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL);
+ if (!ap->ops)
+ return -ENOMEM;
ap->ops->inherits = &ata_sff_port_ops;
ap->ops->cable_detect = ata_cable_unknown;
ap->ops->set_mode = pata_platform_set_mode;
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index da0152116d9f..556034a15430 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -322,7 +322,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host,
static ssize_t fsl_sata_intr_coalescing_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%d %d\n",
+ return sysfs_emit(buf, "%u %u\n",
intr_coalescing_count, intr_coalescing_ticks);
}
@@ -332,10 +332,8 @@ static ssize_t fsl_sata_intr_coalescing_store(struct device *dev,
{
unsigned int coalescing_count, coalescing_ticks;
- if (sscanf(buf, "%d%d",
- &coalescing_count,
- &coalescing_ticks) != 2) {
- printk(KERN_ERR "fsl-sata: wrong parameter format.\n");
+ if (sscanf(buf, "%u%u", &coalescing_count, &coalescing_ticks) != 2) {
+ dev_err(dev, "fsl-sata: wrong parameter format.\n");
return -EINVAL;
}
@@ -359,7 +357,7 @@ static ssize_t fsl_sata_rx_watermark_show(struct device *dev,
rx_watermark &= 0x1f;
spin_unlock_irqrestore(&host->lock, flags);
- return sysfs_emit(buf, "%d\n", rx_watermark);
+ return sysfs_emit(buf, "%u\n", rx_watermark);
}
static ssize_t fsl_sata_rx_watermark_store(struct device *dev,
@@ -373,8 +371,8 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev,
void __iomem *csr_base = host_priv->csr_base;
u32 temp;
- if (sscanf(buf, "%d", &rx_watermark) != 1) {
- printk(KERN_ERR "fsl-sata: wrong parameter format.\n");
+ if (kstrtouint(buf, 10, &rx_watermark) < 0) {
+ dev_err(dev, "fsl-sata: wrong parameter format.\n");
return -EINVAL;
}
@@ -382,8 +380,8 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev,
temp = ioread32(csr_base + TRANSCFG);
temp &= 0xffffffe0;
iowrite32(temp | rx_watermark, csr_base + TRANSCFG);
-
spin_unlock_irqrestore(&host->lock, flags);
+
return strlen(buf);
}
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 422753d52244..a31ffe16e626 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
skb_data3 = skb->data[3];
paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
+ return enq_next;
ENI_PRV_PADDR(skb) = paddr;
/* prepare DMA queue entries */
j = 0;
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 3bc3c314a467..4f67404fe64c 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1676,6 +1676,8 @@ static int fs_init(struct fs_dev *dev)
dev->hw_base = pci_resource_start(pci_dev, 0);
dev->base = ioremap(dev->hw_base, 0x1000);
+ if (!dev->base)
+ return 1;
reset_chip (dev);
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
index 38ba08628ccb..2578b2d45439 100644
--- a/drivers/auxdisplay/lcd2s.c
+++ b/drivers/auxdisplay/lcd2s.c
@@ -238,7 +238,7 @@ static int lcd2s_redefine_char(struct charlcd *lcd, char *esc)
if (buf[1] > 7)
return 1;
- i = 0;
+ i = 2;
shift = 0;
value = 0;
while (*esc && i < LCD2S_CHARACTER_SIZE + 2) {
@@ -298,6 +298,10 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
I2C_FUNC_SMBUS_WRITE_BLOCK_DATA))
return -EIO;
+ lcd2s = devm_kzalloc(&i2c->dev, sizeof(*lcd2s), GFP_KERNEL);
+ if (!lcd2s)
+ return -ENOMEM;
+
/* Test, if the display is responding */
err = lcd2s_i2c_smbus_write_byte(i2c, LCD2S_CMD_DISPLAY_OFF);
if (err < 0)
@@ -307,12 +311,6 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
if (!lcd)
return -ENOMEM;
- lcd2s = kzalloc(sizeof(struct lcd2s_data), GFP_KERNEL);
- if (!lcd2s) {
- err = -ENOMEM;
- goto fail1;
- }
-
lcd->drvdata = lcd2s;
lcd2s->i2c = i2c;
lcd2s->charlcd = lcd;
@@ -321,26 +319,24 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
err = device_property_read_u32(&i2c->dev, "display-height-chars",
&lcd->height);
if (err)
- goto fail2;
+ goto fail1;
err = device_property_read_u32(&i2c->dev, "display-width-chars",
&lcd->width);
if (err)
- goto fail2;
+ goto fail1;
lcd->ops = &lcd2s_ops;
err = charlcd_register(lcd2s->charlcd);
if (err)
- goto fail2;
+ goto fail1;
i2c_set_clientdata(i2c, lcd2s);
return 0;
-fail2:
- kfree(lcd2s);
fail1:
- kfree(lcd);
+ charlcd_free(lcd2s->charlcd);
return err;
}
@@ -349,7 +345,7 @@ static int lcd2s_i2c_remove(struct i2c_client *i2c)
struct lcd2s_data *lcd2s = i2c_get_clientdata(i2c);
charlcd_unregister(lcd2s->charlcd);
- kfree(lcd2s->charlcd);
+ charlcd_free(lcd2s->charlcd);
return 0;
}
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 976154140f0b..1d6636ebaac5 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -339,6 +339,46 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
return !ret;
}
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+void topology_init_cpu_capacity_cppc(void)
+{
+ struct cppc_perf_caps perf_caps;
+ int cpu;
+
+ if (likely(acpi_disabled || !acpi_cpc_valid()))
+ return;
+
+ raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
+ GFP_KERNEL);
+ if (!raw_capacity)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ if (!cppc_get_perf_caps(cpu, &perf_caps) &&
+ (perf_caps.highest_perf >= perf_caps.nominal_perf) &&
+ (perf_caps.highest_perf >= perf_caps.lowest_perf)) {
+ raw_capacity[cpu] = perf_caps.highest_perf;
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
+ cpu, raw_capacity[cpu]);
+ continue;
+ }
+
+ pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu);
+ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+ goto exit;
+ }
+
+ topology_normalize_cpu_scale();
+ schedule_work(&update_topology_flags_work);
+ pr_debug("cpu_capacity: cpu_capacity initialization done\n");
+
+exit:
+ free_raw_capacity();
+}
+#endif
+
#ifdef CONFIG_CPU_FREQ
static cpumask_var_t cpus_to_visit;
static void parsing_done_workfn(struct work_struct *work);
@@ -387,9 +427,8 @@ static int __init register_cpufreq_notifier(void)
int ret;
/*
- * on ACPI-based systems we need to use the default cpu capacity
- * until we have the necessary code to parse the cpu capacity, so
- * skip registering cpufreq notifier.
+ * On ACPI-based systems skip registering cpufreq notifier as cpufreq
+ * information is not needed for cpu capacity initialization.
*/
if (!acpi_disabled || !raw_capacity)
return -EINVAL;
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 7476f393df97..8feb85e186e3 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -16,7 +16,7 @@
#include <linux/kdev_t.h>
#include <linux/err.h>
#include <linux/slab.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "base.h"
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 7bb957b11861..3d6430eb0c6a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -21,7 +21,7 @@
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 5fc258073bc7..2ef23fce0860 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -275,7 +275,7 @@ static ssize_t print_cpus_isolated(struct device *dev,
return -ENOMEM;
cpumask_andnot(isolated, cpu_possible_mask,
- housekeeping_cpumask(HK_FLAG_DOMAIN));
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
free_cpumask_var(isolated);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 9eaaff2f556c..f47cab21430f 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -629,6 +629,9 @@ re_probe:
drv->remove(dev);
devres_release_all(dev);
+ arch_teardown_dma_ops(dev);
+ kfree(dev->dma_range_map);
+ dev->dma_range_map = NULL;
driver_sysfs_remove(dev);
dev->driver = NULL;
dev_set_drvdata(dev, NULL);
@@ -1209,6 +1212,8 @@ static void __device_release_driver(struct device *dev, struct device *parent)
devres_release_all(dev);
arch_teardown_dma_ops(dev);
+ kfree(dev->dma_range_map);
+ dev->dma_range_map = NULL;
dev->driver = NULL;
dev_set_drvdata(dev, NULL);
if (dev->pm_domain && dev->pm_domain->dismiss)
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index f41063ac1aee..db5a03a0618e 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -17,7 +17,7 @@
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/device.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include <linux/shmem_fs.h>
diff --git a/drivers/base/init.c b/drivers/base/init.c
index a9f57c22fb9e..d8d0fe687111 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -35,5 +35,6 @@ void __init driver_init(void)
auxiliary_bus_init();
cpu_dev_init();
memory_dev_init();
+ node_dev_init();
container_dev_init();
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 365cd4a7f239..7222ff9b5e05 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -215,6 +215,7 @@ static int memory_block_online(struct memory_block *mem)
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
nr_vmemmap_pages);
+ mem->zone = zone;
return ret;
}
@@ -225,6 +226,9 @@ static int memory_block_offline(struct memory_block *mem)
unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
int ret;
+ if (!mem->zone)
+ return -EINVAL;
+
/*
* Unaccount before offlining, such that unpopulated zone and kthreads
* can properly be torn down in offline_pages().
@@ -234,7 +238,7 @@ static int memory_block_offline(struct memory_block *mem)
-nr_vmemmap_pages);
ret = offline_pages(start_pfn + nr_vmemmap_pages,
- nr_pages - nr_vmemmap_pages, mem->group);
+ nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
if (ret) {
/* offline_pages() failed. Account back. */
if (nr_vmemmap_pages)
@@ -246,6 +250,7 @@ static int memory_block_offline(struct memory_block *mem)
if (nr_vmemmap_pages)
mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
+ mem->zone = NULL;
return ret;
}
@@ -411,11 +416,10 @@ static ssize_t valid_zones_show(struct device *dev,
*/
if (mem->state == MEM_ONLINE) {
/*
- * The block contains more than one zone can not be offlined.
- * This can happen e.g. for ZONE_DMA and ZONE_DMA32
+ * If !mem->zone, the memory block spans multiple zones and
+ * cannot get offlined.
*/
- default_zone = test_pages_in_a_zone(start_pfn,
- start_pfn + nr_pages);
+ default_zone = mem->zone;
if (!default_zone)
return sysfs_emit(buf, "%s\n", "none");
len += sysfs_emit_at(buf, len, "%s", default_zone->name);
@@ -555,6 +559,8 @@ static ssize_t hard_offline_page_store(struct device *dev,
return -EINVAL;
pfn >>= PAGE_SHIFT;
ret = memory_failure(pfn, 0);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
return ret ? ret : count;
}
@@ -613,11 +619,7 @@ static const struct attribute_group *memory_memblk_attr_groups[] = {
NULL,
};
-/*
- * register_memory - Setup a sysfs device for a memory block
- */
-static
-int register_memory(struct memory_block *memory)
+static int __add_memory_block(struct memory_block *memory)
{
int ret;
@@ -641,9 +643,85 @@ int register_memory(struct memory_block *memory)
return ret;
}
-static int init_memory_block(unsigned long block_id, unsigned long state,
- unsigned long nr_vmemmap_pages,
- struct memory_group *group)
+static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
+ int nid)
+{
+ const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+ const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+ struct zone *zone, *matching_zone = NULL;
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int i;
+
+ /*
+ * This logic only works for early memory, when the applicable zones
+ * already span the memory block. We don't expect overlapping zones on
+ * a single node for early memory. So if we're told that some PFNs
+ * of a node fall into this memory block, we can assume that all node
+ * zones that intersect with the memory block are actually applicable.
+ * No need to look at the memmap.
+ */
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ zone = pgdat->node_zones + i;
+ if (!populated_zone(zone))
+ continue;
+ if (!zone_intersects(zone, start_pfn, nr_pages))
+ continue;
+ if (!matching_zone) {
+ matching_zone = zone;
+ continue;
+ }
+ /* Spans multiple zones ... */
+ matching_zone = NULL;
+ break;
+ }
+ return matching_zone;
+}
+
+#ifdef CONFIG_NUMA
+/**
+ * memory_block_add_nid() - Indicate that system RAM falling into this memory
+ * block device (partially) belongs to the given node.
+ * @mem: The memory block device.
+ * @nid: The node id.
+ * @context: The memory initialization context.
+ *
+ * Indicate that system RAM falling into this memory block (partially) belongs
+ * to the given node. If the context indicates ("early") that we are adding the
+ * node during node device subsystem initialization, this will also properly
+ * set/adjust mem->zone based on the zone ranges of the given node.
+ */
+void memory_block_add_nid(struct memory_block *mem, int nid,
+ enum meminit_context context)
+{
+ if (context == MEMINIT_EARLY && mem->nid != nid) {
+ /*
+ * For early memory we have to determine the zone when setting
+ * the node id and handle multiple nodes spanning a single
+ * memory block by indicate via zone == NULL that we're not
+ * dealing with a single zone. So if we're setting the node id
+ * the first time, determine if there is a single zone. If we're
+ * setting the node id a second time to a different node,
+ * invalidate the single detected zone.
+ */
+ if (mem->nid == NUMA_NO_NODE)
+ mem->zone = early_node_zone_for_memory_block(mem, nid);
+ else
+ mem->zone = NULL;
+ }
+
+ /*
+ * If this memory block spans multiple nodes, we only indicate
+ * the last processed node. If we span multiple nodes (not applicable
+ * to hotplugged memory), zone == NULL will prohibit memory offlining
+ * and consequently unplug.
+ */
+ mem->nid = nid;
+}
+#endif
+
+static int add_memory_block(unsigned long block_id, unsigned long state,
+ unsigned long nr_vmemmap_pages,
+ struct memory_group *group)
{
struct memory_block *mem;
int ret = 0;
@@ -663,17 +741,30 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
mem->nr_vmemmap_pages = nr_vmemmap_pages;
INIT_LIST_HEAD(&mem->group_next);
+#ifndef CONFIG_NUMA
+ if (state == MEM_ONLINE)
+ /*
+ * MEM_ONLINE at this point implies early memory. With NUMA,
+ * we'll determine the zone when setting the node id via
+ * memory_block_add_nid(). Memory hotplug updated the zone
+ * manually when memory onlining/offlining succeeds.
+ */
+ mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
+#endif /* CONFIG_NUMA */
+
+ ret = __add_memory_block(mem);
+ if (ret)
+ return ret;
+
if (group) {
mem->group = group;
list_add(&mem->group_next, &group->memory_blocks);
}
- ret = register_memory(mem);
-
- return ret;
+ return 0;
}
-static int add_memory_block(unsigned long base_section_nr)
+static int __init add_boot_memory_block(unsigned long base_section_nr)
{
int section_count = 0;
unsigned long nr;
@@ -685,11 +776,18 @@ static int add_memory_block(unsigned long base_section_nr)
if (section_count == 0)
return 0;
- return init_memory_block(memory_block_id(base_section_nr),
- MEM_ONLINE, 0, NULL);
+ return add_memory_block(memory_block_id(base_section_nr),
+ MEM_ONLINE, 0, NULL);
+}
+
+static int add_hotplug_memory_block(unsigned long block_id,
+ unsigned long nr_vmemmap_pages,
+ struct memory_group *group)
+{
+ return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
}
-static void unregister_memory(struct memory_block *memory)
+static void remove_memory_block(struct memory_block *memory)
{
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
return;
@@ -728,8 +826,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages,
- group);
+ ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
if (ret)
break;
}
@@ -740,7 +837,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
mem = find_memory_block_by_id(block_id);
if (WARN_ON_ONCE(!mem))
continue;
- unregister_memory(mem);
+ remove_memory_block(mem);
}
}
return ret;
@@ -769,7 +866,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
if (WARN_ON_ONCE(!mem))
continue;
unregister_memory_block_under_nodes(mem);
- unregister_memory(mem);
+ remove_memory_block(mem);
}
}
@@ -829,7 +926,7 @@ void __init memory_dev_init(void)
*/
for (nr = 0; nr <= __highest_present_section_nr;
nr += sections_per_block) {
- ret = add_memory_block(nr);
+ ret = add_boot_memory_block(nr);
if (ret)
panic("%s() failed to add memory block: %d\n", __func__,
ret);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 87acc47e8951..ec8bb24a5a22 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -796,15 +796,12 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
}
static void do_register_memory_block_under_node(int nid,
- struct memory_block *mem_blk)
+ struct memory_block *mem_blk,
+ enum meminit_context context)
{
int ret;
- /*
- * If this memory block spans multiple nodes, we only indicate
- * the last processed node.
- */
- mem_blk->nid = nid;
+ memory_block_add_nid(mem_blk, nid, context);
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
@@ -857,7 +854,7 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
if (page_nid != nid)
continue;
- do_register_memory_block_under_node(nid, mem_blk);
+ do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY);
return 0;
}
/* mem section does not span the specified node */
@@ -873,7 +870,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
{
int nid = *(int *)arg;
- do_register_memory_block_under_node(nid, mem_blk);
+ do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
return 0;
}
@@ -892,8 +889,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
}
-void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
- enum meminit_context context)
+void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
+ unsigned long end_pfn,
+ enum meminit_context context)
{
walk_memory_blocks_func_t func;
@@ -1065,26 +1063,30 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
};
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
-static int __init register_node_type(void)
+void __init node_dev_init(void)
{
- int ret;
+ static struct notifier_block node_memory_callback_nb = {
+ .notifier_call = node_memory_callback,
+ .priority = NODE_CALLBACK_PRI,
+ };
+ int ret, i;
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
- if (!ret) {
- static struct notifier_block node_memory_callback_nb = {
- .notifier_call = node_memory_callback,
- .priority = NODE_CALLBACK_PRI,
- };
- register_hotmemory_notifier(&node_memory_callback_nb);
- }
+ if (ret)
+ panic("%s() failed to register subsystem: %d\n", __func__, ret);
+
+ register_hotmemory_notifier(&node_memory_callback_nb);
/*
- * Note: we're not going to unregister the node class if we fail
- * to register the node state class attribute files.
+ * Create all node devices, which will properly link the node
+ * to applicable memory block devices and already created cpu devices.
*/
- return ret;
+ for_each_online_node(i) {
+ ret = register_one_node(i);
+ if (ret)
+ panic("%s() failed to add node: %d\n", __func__, ret);
+ }
}
-postcore_initcall(register_node_type);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5db704f02e71..1ee878d126fd 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
atomic_read(&genpd->sd_count) > 0)
return -EBUSY;
+ /*
+ * The children must be in their deepest (powered-off) states to allow
+ * the parent to be powered off. Note that, there's no need for
+ * additional locking, as powering on a child, requires the parent's
+ * lock to be acquired first.
+ */
+ list_for_each_entry(link, &genpd->parent_links, parent_node) {
+ struct generic_pm_domain *child = link->child;
+ if (child->state_idx < child->state_count - 1)
+ return -EBUSY;
+ }
+
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
enum pm_qos_flags_status stat;
@@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
|| atomic_read(&genpd->sd_count) > 0)
return;
+ /* Check that the children are in their deepest (powered-off) state. */
+ list_for_each_entry(link, &genpd->parent_links, parent_node) {
+ struct generic_pm_domain *child = link->child;
+ if (child->state_idx < child->state_count - 1)
+ return;
+ }
+
/* Choose the deepest state when suspending */
genpd->state_idx = genpd->state_count - 1;
if (_genpd_power_off(genpd, false))
@@ -2058,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
kfree(link);
}
- genpd_debug_remove(genpd);
list_del(&genpd->gpd_list_node);
genpd_unlock(genpd);
+ genpd_debug_remove(genpd);
cancel_work_sync(&genpd->power_off_work);
if (genpd_is_cpu_domain(genpd))
free_cpumask_var(genpd->cpus);
@@ -2248,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np,
/* Parse genpd OPP table */
if (genpd->set_performance_state) {
ret = dev_pm_opp_of_add_table(&genpd->dev);
- if (ret) {
- if (ret != -EPROBE_DEFER)
- dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
- ret);
- return ret;
- }
+ if (ret)
+ return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n");
/*
* Save table for faster processing while setting performance
@@ -2312,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np,
if (genpd->set_performance_state) {
ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
if (ret) {
- if (ret != -EPROBE_DEFER)
- dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
- i, ret);
+ dev_err_probe(&genpd->dev, ret,
+ "Failed to add OPP table for index %d\n", i);
goto error;
}
@@ -2672,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
ret = genpd_add_device(pd, dev, base_dev);
mutex_unlock(&gpd_list_lock);
- if (ret < 0) {
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to add to PM domain %s: %d",
- pd->name, ret);
- return ret;
- }
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name);
dev->pm_domain->detach = genpd_dev_pm_detach;
dev->pm_domain->sync = genpd_dev_pm_sync;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 04ea92cbd9cf..c50139207794 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -485,7 +485,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
trace_device_pm_callback_start(dev, info, state.event);
error = cb(dev);
trace_device_pm_callback_end(dev, error);
- suspend_report_result(cb, error);
+ suspend_report_result(dev, cb, error);
initcall_debug_report(dev, calltime, cb, error);
@@ -1568,7 +1568,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
trace_device_pm_callback_start(dev, info, state.event);
error = cb(dev, state);
trace_device_pm_callback_end(dev, error);
- suspend_report_result(cb, error);
+ suspend_report_result(dev, cb, error);
initcall_debug_report(dev, calltime, cb, error);
@@ -1855,7 +1855,7 @@ unlock:
device_unlock(dev);
if (ret < 0) {
- suspend_report_result(callback, ret);
+ suspend_report_result(dev, callback, ret);
pm_runtime_put(dev);
return ret;
}
@@ -1960,10 +1960,10 @@ int dpm_suspend_start(pm_message_t state)
}
EXPORT_SYMBOL_GPL(dpm_suspend_start);
-void __suspend_report_result(const char *function, void *fn, int ret)
+void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
{
if (ret)
- pr_err("%s(): %pS returns %d\n", function, fn, ret);
+ dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret);
}
EXPORT_SYMBOL_GPL(__suspend_report_result);
@@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
void device_pm_check_callbacks(struct device *dev)
{
- spin_lock_irq(&dev->power.lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->power.lock, flags);
dev->power.no_pm_callbacks =
(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
!dev->bus->suspend && !dev->bus->resume)) &&
@@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev)
(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
!dev->driver->suspend && !dev->driver->resume));
- spin_unlock_irq(&dev->power.lock);
+ spin_unlock_irqrestore(&dev->power.lock, flags);
}
bool dev_pm_skip_suspend(struct device *dev)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 2f3cce17219b..d4059e6ffeae 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1476,11 +1476,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_enable);
static void pm_runtime_disable_action(void *data)
{
+ pm_runtime_dont_use_autosuspend(data);
pm_runtime_disable(data);
}
/**
* devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
+ *
+ * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for
+ * you at driver exit time if needed.
+ *
* @dev: Device to handle.
*/
int devm_pm_runtime_enable(struct device *dev)
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 0004db4a9d3b..d487a6bac630 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
*
* Enables wakeirq conditionally. We need to enable wake-up interrupt
* lazily on the first rpm_suspend(). This is needed as the consumer device
- * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would
+ * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
* otherwise try to disable already disabled wakeirq. The wake-up interrupt
* starts disabled with IRQ_NOAUTOEN set.
*
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 99bda0da23a8..a57d469676ca 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -34,7 +34,8 @@ suspend_state_t pm_suspend_target_state;
bool events_check_enabled __read_mostly;
/* First wakeup IRQ seen by the kernel in the last cycle. */
-unsigned int pm_wakeup_irq __read_mostly;
+static unsigned int wakeup_irq[2] __read_mostly;
+static DEFINE_RAW_SPINLOCK(wakeup_irq_lock);
/* If greater than 0 and the system is suspending, terminate the suspend. */
static atomic_t pm_abort_suspend __read_mostly;
@@ -586,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
* @ws: Wakeup source to handle.
*
* Update the @ws' statistics and, if @ws has just been activated, notify the PM
- * core of the event by incrementing the counter of of wakeup events being
+ * core of the event by incrementing the counter of the wakeup events being
* processed.
*/
static void wakeup_source_activate(struct wakeup_source *ws)
@@ -732,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
/*
* Increment the counter of registered wakeup events and decrement the
- * couter of wakeup events in progress simultaneously.
+ * counter of wakeup events in progress simultaneously.
*/
cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
trace_wakeup_source_deactivate(ws->name, cec);
@@ -942,19 +943,45 @@ void pm_system_cancel_wakeup(void)
atomic_dec_if_positive(&pm_abort_suspend);
}
-void pm_wakeup_clear(bool reset)
+void pm_wakeup_clear(unsigned int irq_number)
{
- pm_wakeup_irq = 0;
- if (reset)
+ raw_spin_lock_irq(&wakeup_irq_lock);
+
+ if (irq_number && wakeup_irq[0] == irq_number)
+ wakeup_irq[0] = wakeup_irq[1];
+ else
+ wakeup_irq[0] = 0;
+
+ wakeup_irq[1] = 0;
+
+ raw_spin_unlock_irq(&wakeup_irq_lock);
+
+ if (!irq_number)
atomic_set(&pm_abort_suspend, 0);
}
void pm_system_irq_wakeup(unsigned int irq_number)
{
- if (pm_wakeup_irq == 0) {
- pm_wakeup_irq = irq_number;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&wakeup_irq_lock, flags);
+
+ if (wakeup_irq[0] == 0)
+ wakeup_irq[0] = irq_number;
+ else if (wakeup_irq[1] == 0)
+ wakeup_irq[1] = irq_number;
+ else
+ irq_number = 0;
+
+ raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags);
+
+ if (irq_number)
pm_system_wakeup();
- }
+}
+
+unsigned int pm_wakeup_irq(void)
+{
+ return wakeup_irq[0];
}
/**
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index b1905916f7af..b4df36c7b17d 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -31,6 +31,7 @@ struct regmap_format {
size_t buf_size;
size_t reg_bytes;
size_t pad_bytes;
+ size_t reg_downshift;
size_t val_bytes;
void (*format_write)(struct regmap *map,
unsigned int reg, unsigned int val);
@@ -62,6 +63,7 @@ struct regmap {
regmap_unlock unlock;
void *lock_arg; /* This is passed to lock/unlock functions */
gfp_t alloc_flags;
+ unsigned int reg_base;
struct device *dev; /* Device we do I/O on */
void *work_buf; /* Scratch buffer used to format I/O */
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index d2656581a608..400c7412a7dc 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -189,11 +189,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
ret = regmap_write(map, reg, d->mask_buf[i]);
if (d->chip->clear_ack) {
if (d->chip->ack_invert && !ret)
- ret = regmap_write(map, reg,
- d->mask_buf[i]);
+ ret = regmap_write(map, reg, UINT_MAX);
else if (!ret)
- ret = regmap_write(map, reg,
- ~d->mask_buf[i]);
+ ret = regmap_write(map, reg, 0);
}
if (ret != 0)
dev_err(d->map->dev, "Failed to ack 0x%x: %d\n",
@@ -537,7 +535,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
/*
* Ignore masked IRQs and ack if we need to; we ack early so
- * there is no race between handling and acknowleding the
+ * there is no race between handling and acknowledging the
* interrupt. We assume that typically few of the interrupts
* will fire simultaneously so don't worry about overhead from
* doing a write per register.
@@ -556,11 +554,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
data->status_buf[i]);
if (chip->clear_ack) {
if (chip->ack_invert && !ret)
- ret = regmap_write(map, reg,
- data->status_buf[i]);
+ ret = regmap_write(map, reg, UINT_MAX);
else if (!ret)
- ret = regmap_write(map, reg,
- ~data->status_buf[i]);
+ ret = regmap_write(map, reg, 0);
}
if (ret != 0)
dev_err(map->dev, "Failed to ack 0x%x: %d\n",
@@ -817,13 +813,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
d->status_buf[i] & d->mask_buf[i]);
if (chip->clear_ack) {
if (chip->ack_invert && !ret)
- ret = regmap_write(map, reg,
- (d->status_buf[i] &
- d->mask_buf[i]));
+ ret = regmap_write(map, reg, UINT_MAX);
else if (!ret)
- ret = regmap_write(map, reg,
- ~(d->status_buf[i] &
- d->mask_buf[i]));
+ ret = regmap_write(map, reg, 0);
}
if (ret != 0) {
dev_err(map->dev, "Failed to ack 0x%x: %d\n",
@@ -1053,7 +1045,7 @@ int devm_regmap_add_irq_chip_fwnode(struct device *dev,
EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip_fwnode);
/**
- * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip()
+ * devm_regmap_add_irq_chip() - Resource managed regmap_add_irq_chip()
*
* @dev: The device pointer on which irq_chip belongs to.
* @map: The regmap for the device.
@@ -1082,7 +1074,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
/**
* devm_regmap_del_irq_chip() - Resource managed regmap_del_irq_chip()
*
- * @dev: Device for which which resource was allocated.
+ * @dev: Device for which the resource was allocated.
* @irq: Primary IRQ for the device.
* @data: &regmap_irq_chip_data allocated by regmap_add_irq_chip().
*
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 8f9fe5fd4707..5e12f7cb5147 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -821,8 +821,11 @@ struct regmap *__regmap_init(struct device *dev,
else
map->alloc_flags = GFP_KERNEL;
+ map->reg_base = config->reg_base;
+
map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
map->format.pad_bytes = config->pad_bits / 8;
+ map->format.reg_downshift = config->reg_downshift;
map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);
map->format.buf_size = DIV_ROUND_UP(config->reg_bits +
config->val_bits + config->pad_bits, 8);
@@ -1735,6 +1738,8 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
return ret;
}
+ reg += map->reg_base;
+ reg >>= map->format.reg_downshift;
map->format.format_reg(map->work_buf, reg, map->reg_shift);
regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
map->write_flag_mask);
@@ -1905,6 +1910,8 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
return ret;
}
+ reg += map->reg_base;
+ reg >>= map->format.reg_downshift;
map->format.format_write(map, reg, val);
trace_regmap_hw_write_start(map, reg, 1);
@@ -2346,6 +2353,8 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
unsigned int reg = regs[i].reg;
unsigned int val = regs[i].def;
trace_regmap_hw_write_start(map, reg, 1);
+ reg += map->reg_base;
+ reg >>= map->format.reg_downshift;
map->format.format_reg(u8, reg, map->reg_shift);
u8 += reg_bytes + pad_bytes;
map->format.format_val(u8, val, 0);
@@ -2673,6 +2682,8 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
return ret;
}
+ reg += map->reg_base;
+ reg >>= map->format.reg_downshift;
map->format.format_reg(map->work_buf, reg, map->reg_shift);
regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
map->read_flag_mask);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fc24e89f9592..e9d1efcda89b 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -14,11 +14,11 @@
#include <linux/hardirq.h>
#include <linux/topology.h>
-#define define_id_show_func(name) \
+#define define_id_show_func(name, fmt) \
static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
- return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
+ return sysfs_emit(buf, fmt "\n", topology_##name(dev->id)); \
}
#define define_siblings_read_func(name, mask) \
@@ -42,22 +42,25 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
off, count); \
}
-define_id_show_func(physical_package_id);
+define_id_show_func(physical_package_id, "%d");
static DEVICE_ATTR_RO(physical_package_id);
#ifdef TOPOLOGY_DIE_SYSFS
-define_id_show_func(die_id);
+define_id_show_func(die_id, "%d");
static DEVICE_ATTR_RO(die_id);
#endif
#ifdef TOPOLOGY_CLUSTER_SYSFS
-define_id_show_func(cluster_id);
+define_id_show_func(cluster_id, "%d");
static DEVICE_ATTR_RO(cluster_id);
#endif
-define_id_show_func(core_id);
+define_id_show_func(core_id, "%d");
static DEVICE_ATTR_RO(core_id);
+define_id_show_func(ppin, "0x%llx");
+static DEVICE_ATTR_ADMIN_RO(ppin);
+
define_siblings_read_func(thread_siblings, sibling_cpumask);
static BIN_ATTR_RO(thread_siblings, 0);
static BIN_ATTR_RO(thread_siblings_list, 0);
@@ -87,7 +90,7 @@ static BIN_ATTR_RO(package_cpus, 0);
static BIN_ATTR_RO(package_cpus_list, 0);
#ifdef TOPOLOGY_BOOK_SYSFS
-define_id_show_func(book_id);
+define_id_show_func(book_id, "%d");
static DEVICE_ATTR_RO(book_id);
define_siblings_read_func(book_siblings, book_cpumask);
static BIN_ATTR_RO(book_siblings, 0);
@@ -95,7 +98,7 @@ static BIN_ATTR_RO(book_siblings_list, 0);
#endif
#ifdef TOPOLOGY_DRAWER_SYSFS
-define_id_show_func(drawer_id);
+define_id_show_func(drawer_id, "%d");
static DEVICE_ATTR_RO(drawer_id);
define_siblings_read_func(drawer_siblings, drawer_cpumask);
static BIN_ATTR_RO(drawer_siblings, 0);
@@ -145,6 +148,7 @@ static struct attribute *default_attrs[] = {
#ifdef TOPOLOGY_DRAWER_SYSFS
&dev_attr_drawer_id.attr,
#endif
+ &dev_attr_ppin.attr,
NULL
};
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 52484bcdedb9..8a91fcac6f82 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -12,7 +12,6 @@
#include <linux/ioctl.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
-#include <linux/genhd.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 6af111f568e4..384073ef2323 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -10,7 +10,6 @@
#include <linux/blk-mq.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#include <linux/genhd.h>
#include <linux/moduleparam.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
@@ -1019,9 +1018,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
iter.bi_size = cnt;
__bio_for_each_segment(bv, bio, iter, iter) {
- char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+ char *p = bvec_kmap_local(&bv);
skb_copy_bits(skb, soff, p, bv.bv_len);
- kunmap_atomic(p);
+ kunmap_local(p);
soff += bv.bv_len;
}
}
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 72cf7603d51f..f5bcded3640d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -138,15 +138,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_FUA | REQ_PREFLUSH;
op_flags |= REQ_SYNC;
- bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
- bio_set_dev(bio, bdev->md_bdev);
+ bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO,
+ &drbd_md_io_bio_set);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
bio->bi_private = device;
bio->bi_end_io = drbd_md_endio;
- bio_set_op_attrs(bio, op, op_flags);
if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index c1f816f896a8..df25eecf80af 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -976,12 +976,13 @@ static void drbd_bm_endio(struct bio *bio)
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
- struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
struct drbd_device *device = ctx->device;
+ unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
+ struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
+ GFP_NOIO, &drbd_md_io_bio_set);
struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
- unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
sector_t on_disk_sector =
device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1006,14 +1007,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
- bio_set_dev(bio, device->ldev->md_bdev);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
bio->bi_end_io = drbd_bm_endio;
- bio_set_op_attrs(bio, op, 0);
if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio_io_error(bio);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f27d5b0f9a0b..4b55e864a0a3 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -27,7 +27,6 @@
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
-#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/dynamic_debug.h>
#include <net/tcp.h>
@@ -638,9 +637,6 @@ enum {
STATE_SENT, /* Do not change state/UUIDs while this is set */
CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
* pending, from drbd worker context.
- * If set, bdi_write_congested() returns true,
- * so shrink_page_list() would not recurse into,
- * and potentially deadlock on, this drbd worker.
*/
DISCONNECT_SENT,
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6df2539e215b..fa00cf2ea952 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1279,16 +1279,16 @@ static void one_flush_endio(struct bio *bio)
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
{
- struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
+ REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
- if (!bio || !octx) {
- drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
+
+ if (!octx) {
+ drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
/* FIXME: what else can I do now? disconnecting or detaching
* really does not help to improve the state of the world, either.
*/
- kfree(octx);
- if (bio)
- bio_put(bio);
+ bio_put(bio);
ctx->error = -ENOMEM;
put_ldev(device);
@@ -1298,10 +1298,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
device->flush_jif = jiffies;
set_bit(FLUSH_PENDING, &device->flags);
@@ -1646,7 +1644,6 @@ int drbd_submit_peer_request(struct drbd_device *device,
unsigned data_size = peer_req->i.size;
unsigned n_bios = 0;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int err = -ENOMEM;
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@@ -1687,15 +1684,10 @@ int drbd_submit_peer_request(struct drbd_device *device,
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio:
- bio = bio_alloc(GFP_NOIO, nr_pages);
- if (!bio) {
- drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
- goto fail;
- }
+ bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags,
+ GFP_NOIO);
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, device->ldev->backing_bdev);
- bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -1726,14 +1718,6 @@ next_bio:
drbd_submit_bio_noacct(device, fault_type, bio);
} while (bios);
return 0;
-
-fail:
- while (bios) {
- bio = bios;
- bios = bios->bi_next;
- bio_put(bio);
- }
- return err;
}
static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
@@ -2033,10 +2017,10 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
bio_for_each_segment(bvec, bio, iter) {
- void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+ void *mapped = bvec_kmap_local(&bvec);
expect = min_t(int, data_size, bvec.bv_len);
err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
- kunmap(bvec.bv_page);
+ kunmap_local(mapped);
if (err)
return err;
data_size -= expect;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3235532ae077..2f608525a879 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -30,7 +30,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
return NULL;
memset(req, 0, sizeof(*req));
- req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
+ req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
+ GFP_NOIO, &drbd_io_bio_set);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
@@ -909,8 +910,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
switch (rbm) {
case RB_CONGESTED_REMOTE:
- return bdi_read_congested(
- device->ldev->backing_bdev->bd_disk->bdi);
+ return 0;
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
@@ -1151,8 +1151,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
- bio_set_dev(bio, device->ldev->backing_bdev);
-
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
* In case the last activity log transaction failed to get on
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64563bfdf0da..1b48c8172a07 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -326,9 +326,9 @@ void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
bio_for_each_segment(bvec, bio, iter) {
u8 *src;
- src = kmap_atomic(bvec.bv_page);
- crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
- kunmap_atomic(src);
+ src = bvec_kmap_local(&bvec);
+ crypto_shash_update(desc, src, bvec.bv_len);
+ kunmap_local(src);
/* REQ_OP_WRITE_SAME has only one segment,
* checksum the payload only once. */
@@ -1523,9 +1523,9 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
drbd_al_begin_io(device, &req->i);
- req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO,
+ req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
+ req->master_bio, GFP_NOIO,
&drbd_io_bio_set);
- bio_set_dev(req->private_bio, device->ldev->backing_bdev);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
submit_bio_noacct(req->private_bio);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e611411a934c..8c647532e3ce 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2485,11 +2485,9 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
}
if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
- memcpy_to_page(bv.bv_page, bv.bv_offset, dma_buffer,
- size);
+ memcpy_to_bvec(&bv, dma_buffer);
else
- memcpy_from_page(dma_buffer, bv.bv_page, bv.bv_offset,
- size);
+ memcpy_from_bvec(dma_buffer, &bv);
remaining -= size;
dma_buffer += size;
@@ -4129,15 +4127,13 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
- bio_init(&bio, &bio_vec, 1);
- bio_set_dev(&bio, bdev);
+ bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ);
bio_add_page(&bio, page, block_size(bdev), 0);
bio.bi_iter.bi_sector = 0;
bio.bi_flags |= (1 << BIO_QUIET);
bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb;
- bio_set_op_attrs(&bio, REQ_OP_READ, 0);
init_completion(&cbdata.complete);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 01cbbfc4e9e2..3e636a75c83a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -79,12 +79,14 @@
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
+#include <linux/statfs.h>
#include "loop.h"
#include <linux/uaccess.h>
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
+#define LOOP_DEFAULT_HW_Q_DEPTH (128)
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
@@ -308,12 +310,11 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
* a.k.a. discard/zerorange.
*/
struct file *file = lo->lo_backing_file;
- struct request_queue *q = lo->lo_queue;
int ret;
mode |= FALLOC_FL_KEEP_SIZE;
- if (!blk_queue_discard(q)) {
+ if (!blk_queue_discard(lo->lo_queue)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -327,8 +328,7 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
static int lo_req_flush(struct loop_device *lo, struct request *rq)
{
- struct file *file = lo->lo_backing_file;
- int ret = vfs_fsync(file, 0);
+ int ret = vfs_fsync(lo->lo_backing_file, 0);
if (unlikely(ret && ret != -EINVAL))
ret = -EIO;
@@ -680,33 +680,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
}
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
{
int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
- return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
}
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
{
int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
- return sprintf(buf, "%s\n", dio ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
@@ -774,8 +774,13 @@ static void loop_config_discard(struct loop_device *lo)
granularity = 0;
} else {
+ struct kstatfs sbuf;
+
max_discard_sectors = UINT_MAX >> 9;
- granularity = inode->i_sb->s_blocksize;
+ if (!vfs_statfs(&file->f_path, &sbuf))
+ granularity = sbuf.f_bsize;
+ else
+ max_discard_sectors = 0;
}
if (max_discard_sectors) {
@@ -1082,7 +1087,7 @@ out_putf:
return error;
}
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
@@ -1144,6 +1149,8 @@ static void __loop_clr_fd(struct loop_device *lo)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1158,44 @@ static void __loop_clr_fd(struct loop_device *lo)
if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
int err;
- mutex_lock(&lo->lo_disk->open_mutex);
+ /*
+ * open_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (!release)
+ mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- mutex_unlock(&lo->lo_disk->open_mutex);
+ if (!release)
+ mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
/* Device is gone, no point in returning error */
}
+ /*
+ * lo->lo_state is set to Lo_unbound here after above partscan has
+ * finished. There cannot be anybody else entering __loop_clr_fd() as
+ * Lo_rundown state protects us from all the other places trying to
+ * change the 'lo' device.
+ */
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
- fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
- module_put(THIS_MODULE);
-}
-static void loop_rundown_workfn(struct work_struct *work)
-{
- struct loop_device *lo = container_of(work, struct loop_device,
- rundown_work);
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __loop_clr_fd(lo);
- kobject_put(&bdev->bd_device.kobj);
- module_put(disk->fops->owner);
- loop_rundown_completed(lo);
-}
-
-static void loop_schedule_rundown(struct loop_device *lo)
-{
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __module_get(disk->fops->owner);
- kobject_get(&bdev->bd_device.kobj);
- INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
- queue_work(system_long_wq, &lo->rundown_work);
+ /*
+ * Need not hold lo_mutex to fput backing file. Calling fput holding
+ * lo_mutex triggers a circular lock dependency possibility warning as
+ * fput can take open_mutex which is usually taken before lo_mutex.
+ */
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1227,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- __loop_clr_fd(lo);
- loop_rundown_completed(lo);
+ __loop_clr_fd(lo, false);
return 0;
}
@@ -1262,7 +1260,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
/* If any pages were dirtied after invalidate_bdev(), try again */
err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
@@ -1482,7 +1480,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
/* invalidate_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
@@ -1754,7 +1752,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- loop_schedule_rundown(lo);
+ __loop_clr_fd(lo, true);
return;
} else if (lo->lo_state == Lo_bound) {
/*
@@ -1787,6 +1785,24 @@ module_param(max_loop, int, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
+
+static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
+
+static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
+{
+ int ret = kstrtoint(s, 10, &hw_queue_depth);
+
+ return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
+}
+
+static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
+ .set = loop_set_hw_queue_depth,
+ .get = param_get_int,
+};
+
+device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
+
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
@@ -1981,7 +1997,7 @@ static int loop_add(int i)
lo->tag_set.ops = &loop_mq_ops;
lo->tag_set.nr_hw_queues = 1;
- lo->tag_set.queue_depth = 128;
+ lo->tag_set.queue_depth = hw_queue_depth;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
@@ -2075,6 +2091,7 @@ static void loop_remove(struct loop_device *lo)
del_gendisk(lo->lo_disk);
blk_cleanup_disk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
+
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
- struct work_struct rundown_work;
};
struct loop_cmd {
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e6005c232328..4fbaf0b4958b 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -19,7 +19,6 @@
#include <linux/compat.h>
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>
@@ -161,9 +160,7 @@ static bool mtip_check_surprise_removal(struct driver_data *dd)
static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
unsigned int tag)
{
- struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
-
- return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag));
+ return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(dd->tags.tags[0], tag));
}
/*
@@ -4112,7 +4109,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
- blk_set_queue_dying(dd->queue);
+ blk_mark_disk_dead(dd->disk);
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
/* Clean up the block layer. */
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 88f4206310e4..6816beb45352 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -15,7 +15,6 @@
#include <linux/rwsem.h>
#include <linux/ata.h>
#include <linux/interrupt.h>
-#include <linux/genhd.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 13004beb48ca..05b1120e6623 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -431,9 +431,10 @@ static ssize_t nullb_device_power_store(struct config_item *item,
if (!dev->power && newp) {
if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
return count;
- if (null_add_dev(dev)) {
+ ret = null_add_dev(dev);
+ if (ret) {
clear_bit(NULLB_DEV_FL_UP, &dev->flags);
- return -ENOMEM;
+ return ret;
}
set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
@@ -719,26 +720,25 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
return NULL;
}
-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
{
struct nullb_cmd *cmd;
DEFINE_WAIT(wait);
- cmd = __alloc_cmd(nq);
- if (cmd || !can_wait)
- return cmd;
-
do {
- prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+ /*
+ * This avoids multiple return statements, multiple calls to
+ * __alloc_cmd() and a fast path call to prepare_to_wait().
+ */
cmd = __alloc_cmd(nq);
- if (cmd)
- break;
-
+ if (cmd) {
+ cmd->bio = bio;
+ return cmd;
+ }
+ prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
io_schedule();
+ finish_wait(&nq->wait, &wait);
} while (1);
-
- finish_wait(&nq->wait, &wait);
- return cmd;
}
static void end_cmd(struct nullb_cmd *cmd)
@@ -777,24 +777,22 @@ static void null_complete_rq(struct request *rq)
end_cmd(blk_mq_rq_to_pdu(rq));
}
-static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+static struct nullb_page *null_alloc_page(void)
{
struct nullb_page *t_page;
- t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+ t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
if (!t_page)
- goto out;
+ return NULL;
- t_page->page = alloc_pages(gfp_flags, 0);
- if (!t_page->page)
- goto out_freepage;
+ t_page->page = alloc_pages(GFP_NOIO, 0);
+ if (!t_page->page) {
+ kfree(t_page);
+ return NULL;
+ }
memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
return t_page;
-out_freepage:
- kfree(t_page);
-out:
- return NULL;
}
static void null_free_page(struct nullb_page *t_page)
@@ -932,7 +930,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb,
spin_unlock_irq(&nullb->lock);
- t_page = null_alloc_page(GFP_NOIO);
+ t_page = null_alloc_page();
if (!t_page)
goto out_lock;
@@ -1476,12 +1474,8 @@ static void null_submit_bio(struct bio *bio)
sector_t nr_sectors = bio_sectors(bio);
struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
struct nullb_queue *nq = nullb_to_queue(nullb);
- struct nullb_cmd *cmd;
-
- cmd = alloc_cmd(nq, 1);
- cmd->bio = bio;
- null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
+ null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
}
static bool should_timeout_request(struct request *rq)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 2b6b70a39e76..e745fc29e55d 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1020,9 +1020,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
- bio_reset(bio);
+ bio_reset(bio, pd->bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1034,7 +1033,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
BUG();
atomic_inc(&pkt->io_wait);
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
pkt_queue_bio(pd, bio);
frames_read++;
}
@@ -1235,9 +1233,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
int f;
- bio_reset(pkt->w_bio);
+ bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -1270,7 +1267,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
/* Start the write request */
atomic_set(&pkt->io_wait, 1);
- bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
pkt_queue_bio(pd, pkt->w_bio);
}
@@ -2298,12 +2294,12 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
+ struct bio *cloned_bio =
+ bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
psd->pd = pd;
psd->bio = bio;
- bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@@ -2404,18 +2400,11 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
static void pkt_submit_bio(struct bio *bio)
{
- struct pktcdvd_device *pd;
- char b[BDEVNAME_SIZE];
+ struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
struct bio *split;
blk_queue_split(&bio);
- pd = bio->bi_bdev->bd_disk->queue->queuedata;
- if (!pd) {
- pr_err("%s incorrect request queue\n", bio_devname(bio, b));
- goto end_io;
- }
-
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
(unsigned long long)bio_end_sector(bio));
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4203cdab8abf..b844432bad20 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
* IO to complete/fail.
*/
blk_mq_freeze_queue(rbd_dev->disk->queue);
- blk_set_queue_dying(rbd_dev->disk->queue);
+ blk_mark_disk_dead(rbd_dev->disk);
}
del_gendisk(rbd_dev->disk);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index c08971de369f..2f378684b735 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -23,7 +23,6 @@ MODULE_LICENSE("GPL");
static int rnbd_client_major;
static DEFINE_IDA(index_ida);
-static DEFINE_MUTEX(ida_lock);
static DEFINE_MUTEX(sess_lock);
static LIST_HEAD(sess_list);
@@ -55,9 +54,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev)
if (!refcount_dec_and_test(&dev->refcount))
return;
- mutex_lock(&ida_lock);
- ida_simple_remove(&index_ida, dev->clt_device_id);
- mutex_unlock(&ida_lock);
+ ida_free(&index_ida, dev->clt_device_id);
kfree(dev->hw_queues);
kfree(dev->pathname);
rnbd_clt_put_sess(dev->sess);
@@ -87,7 +84,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
dev->discard_granularity = le32_to_cpu(rsp->discard_granularity);
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
- dev->rotational = rsp->rotational;
dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
dev->fua = !!(rsp->cache_policy & RNBD_FUA);
@@ -1262,9 +1258,9 @@ find_and_get_or_create_sess(const char *sessname,
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
- if (sess == ERR_PTR(-ENOMEM))
+ if (sess == ERR_PTR(-ENOMEM)) {
return ERR_PTR(-ENOMEM);
- else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ } else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
/*
* A device MUST have its own session to use the polling-mode.
* It must fail to map new device with the same session.
@@ -1343,7 +1339,7 @@ static inline void rnbd_init_hw_queue(struct rnbd_clt_dev *dev,
static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
{
- int i;
+ unsigned long i;
struct blk_mq_hw_ctx *hctx;
struct rnbd_queue *q;
@@ -1410,8 +1406,10 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
dev->read_only = false;
}
- if (!dev->rotational)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
+ /*
+ * Network device does not need rotational
+ */
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
err = add_disk(dev->gd);
if (err)
blk_cleanup_disk(dev->gd);
@@ -1459,10 +1457,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
goto out_alloc;
}
- mutex_lock(&ida_lock);
- ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS),
- GFP_KERNEL);
- mutex_unlock(&ida_lock);
+ ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS),
+ GFP_KERNEL);
if (ret < 0) {
pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",
pathname, sess->sessname, ret);
@@ -1610,13 +1606,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
}
rnbd_clt_info(dev,
- "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n",
+ "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
dev->gd->disk_name, dev->nsectors,
dev->logical_block_size, dev->physical_block_size,
dev->max_write_same_sectors, dev->max_discard_sectors,
dev->discard_granularity, dev->discard_alignment,
dev->secure_discard, dev->max_segments,
- dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua);
+ dev->max_hw_sectors, dev->wc, dev->fua);
mutex_unlock(&dev->lock);
rnbd_clt_put_sess(sess);
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 0c2cae7f39b9..62bf7c3fa63c 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -118,7 +118,6 @@ struct rnbd_clt_dev {
enum rnbd_access_mode access_mode;
u32 nr_poll_queues;
bool read_only;
- bool rotational;
bool wc;
bool fua;
u32 max_hw_sectors;
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index de5d5a8df81d..c4a68b3a1cbe 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -128,7 +128,7 @@ enum rnbd_cache_policy {
* @logical_block_size: logical block size device supports in bytes
* @max_segments: max segments hardware support in one transfer
* @secure_discard: supports secure discard
- * @rotation: is a rotational disc?
+ * @obsolete_rotational: obsolete, not in used.
* @cache_policy: support write-back caching or FUA?
*/
struct rnbd_msg_open_rsp {
@@ -144,7 +144,7 @@ struct rnbd_msg_open_rsp {
__le16 logical_block_size;
__le16 max_segments;
__le16 secure_discard;
- u8 rotational;
+ u8 obsolete_rotational;
u8 cache_policy;
u8 reserved[10];
};
diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
index b241a099aeae..c5d0a0391165 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.c
+++ b/drivers/block/rnbd/rnbd-srv-dev.c
@@ -12,8 +12,7 @@
#include "rnbd-srv-dev.h"
#include "rnbd-log.h"
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
- struct bio_set *bs)
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags)
{
struct rnbd_dev *dev;
int ret;
@@ -30,7 +29,6 @@ struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
dev->blk_open_flags = flags;
bdevname(dev->bdev, dev->name);
- dev->ibd_bio_set = bs;
return dev;
@@ -44,60 +42,3 @@ void rnbd_dev_close(struct rnbd_dev *dev)
blkdev_put(dev->bdev, dev->blk_open_flags);
kfree(dev);
}
-
-void rnbd_dev_bi_end_io(struct bio *bio)
-{
- struct rnbd_dev_blk_io *io = bio->bi_private;
-
- rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
- bio_put(bio);
-}
-
-/**
- * rnbd_bio_map_kern - map kernel address into bio
- * @data: pointer to buffer to map
- * @bs: bio_set to use.
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
- unsigned int len, gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- int offset, i;
- struct bio *bio;
-
- bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (bio_add_page(bio, virt_to_page(data), bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- return bio;
-}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
index 0eb23850afb9..2c3df02b5e8e 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ b/drivers/block/rnbd/rnbd-srv-dev.h
@@ -14,25 +14,16 @@
struct rnbd_dev {
struct block_device *bdev;
- struct bio_set *ibd_bio_set;
fmode_t blk_open_flags;
char name[BDEVNAME_SIZE];
};
-struct rnbd_dev_blk_io {
- struct rnbd_dev *dev;
- void *priv;
- /* have to be last member for front_pad usage of bioset_init */
- struct bio bio;
-};
-
/**
* rnbd_dev_open() - Open a device
+ * @path: path to open
* @flags: open flags
- * @bs: bio_set to use during block io,
*/
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
- struct bio_set *bs);
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags);
/**
* rnbd_dev_close() - Close a device
@@ -41,11 +32,6 @@ void rnbd_dev_close(struct rnbd_dev *dev);
void rnbd_endio(void *priv, int error);
-void rnbd_dev_bi_end_io(struct bio *bio);
-
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
- unsigned int len, gfp_t gfp_mask);
-
static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
{
return queue_max_segments(bdev_get_queue(dev->bdev));
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index 4db98e0e76f0..feaa76c5a342 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -13,7 +13,6 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/stat.h>
-#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/moduleparam.h>
#include <linux/device.h>
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 1ee808fc600c..6499efae5c43 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -114,6 +114,12 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
return sess_dev;
}
+static void rnbd_dev_bi_end_io(struct bio *bio)
+{
+ rnbd_endio(bio->bi_private, blk_status_to_errno(bio->bi_status));
+ bio_put(bio);
+}
+
static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rtrs_srv_op *id, void *data, u32 datalen,
const void *usr, size_t usrlen)
@@ -123,7 +129,6 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_sess_dev *sess_dev;
u32 dev_id;
int err;
- struct rnbd_dev_blk_io *io;
struct bio *bio;
short prio;
@@ -144,33 +149,29 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
- /* Generate bio with pages pointing to the rdma buffer */
- bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL);
- if (IS_ERR(bio)) {
- err = PTR_ERR(bio);
- rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err);
- goto sess_dev_put;
+ bio = bio_alloc(sess_dev->rnbd_dev->bdev, 1,
+ rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
+ if (bio_add_page(bio, virt_to_page(data), datalen,
+ offset_in_page(data)) != datalen) {
+ rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
+ err = -EINVAL;
+ goto bio_put;
}
- io = container_of(bio, struct rnbd_dev_blk_io, bio);
- io->dev = sess_dev->rnbd_dev;
- io->priv = priv;
-
bio->bi_end_io = rnbd_dev_bi_end_io;
- bio->bi_private = io;
- bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
+ bio->bi_private = priv;
bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR ||
usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio);
bio_set_prio(bio, prio);
- bio_set_dev(bio, sess_dev->rnbd_dev->bdev);
submit_bio(bio);
return 0;
-sess_dev_put:
+bio_put:
+ bio_put(bio);
rnbd_put_sess_dev(sess_dev);
err:
kfree(priv);
@@ -251,7 +252,6 @@ static void destroy_sess(struct rnbd_srv_session *srv_sess)
out:
xa_destroy(&srv_sess->index_idr);
- bioset_exit(&srv_sess->sess_bio_set);
pr_info("RTRS Session %s disconnected\n", srv_sess->sessname);
@@ -280,16 +280,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
return -ENOMEM;
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
- err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth,
- offsetof(struct rnbd_dev_blk_io, bio),
- BIOSET_NEED_BVECS);
- if (err) {
- pr_err("Allocating srv_session for path %s failed\n",
- pathname);
- kfree(srv_sess);
- return err;
- }
-
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
INIT_LIST_HEAD(&srv_sess->sess_dev_list);
mutex_init(&srv_sess->lock);
@@ -568,7 +558,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev));
rsp->secure_discard =
cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
- rsp->rotational = !blk_queue_nonrot(q);
rsp->cache_policy = 0;
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
rsp->cache_policy |= RNBD_WRITEBACK;
@@ -738,8 +727,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- rnbd_dev = rnbd_dev_open(full_path, open_flags,
- &srv_sess->sess_bio_set);
+ rnbd_dev = rnbd_dev_open(full_path, open_flags);
if (IS_ERR(rnbd_dev)) {
pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n",
full_path, srv_sess->sessname, PTR_ERR(rnbd_dev));
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index e5604bce123a..be2ae486d407 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -23,7 +23,6 @@ struct rnbd_srv_session {
struct rtrs_srv_sess *rtrs;
char sessname[NAME_MAX];
int queue_depth;
- struct bio_set sess_bio_set;
struct xarray index_idr;
/* List of struct rnbd_srv_sess_dev */
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 146d85d80e0e..dd0a1a6fed29 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/blk-mq.h>
#include <linux/hdreg.h>
-#include <linux/genhd.h>
#include <linux/cdrom.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c443cd64fc9b..a8bcf3f664af 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -69,16 +69,6 @@ struct virtio_blk {
/* Process context for config space updates */
struct work_struct config_work;
- /*
- * Tracks references from block_device_operations open/release and
- * virtio_driver probe/remove so this object can be freed once no
- * longer in use.
- */
- refcount_t refs;
-
- /* What host tells us, plus 2 for header & tailer. */
- unsigned int sg_elems;
-
/* Ida index - used to track minor number allocations. */
int index;
@@ -322,8 +312,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t status;
int err;
- BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
status = virtblk_setup_cmd(vblk->vdev, req, vbr);
if (unlikely(status))
return status;
@@ -391,43 +379,6 @@ out:
return err;
}
-static void virtblk_get(struct virtio_blk *vblk)
-{
- refcount_inc(&vblk->refs);
-}
-
-static void virtblk_put(struct virtio_blk *vblk)
-{
- if (refcount_dec_and_test(&vblk->refs)) {
- ida_simple_remove(&vd_index_ida, vblk->index);
- mutex_destroy(&vblk->vdev_mutex);
- kfree(vblk);
- }
-}
-
-static int virtblk_open(struct block_device *bd, fmode_t mode)
-{
- struct virtio_blk *vblk = bd->bd_disk->private_data;
- int ret = 0;
-
- mutex_lock(&vblk->vdev_mutex);
-
- if (vblk->vdev)
- virtblk_get(vblk);
- else
- ret = -ENXIO;
-
- mutex_unlock(&vblk->vdev_mutex);
- return ret;
-}
-
-static void virtblk_release(struct gendisk *disk, fmode_t mode)
-{
- struct virtio_blk *vblk = disk->private_data;
-
- virtblk_put(vblk);
-}
-
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{
@@ -460,11 +411,19 @@ out:
return ret;
}
+static void virtblk_free_disk(struct gendisk *disk)
+{
+ struct virtio_blk *vblk = disk->private_data;
+
+ ida_simple_remove(&vd_index_ida, vblk->index);
+ mutex_destroy(&vblk->vdev_mutex);
+ kfree(vblk);
+}
+
static const struct block_device_operations virtblk_fops = {
- .owner = THIS_MODULE,
- .open = virtblk_open,
- .release = virtblk_release,
- .getgeo = virtblk_getgeo,
+ .owner = THIS_MODULE,
+ .getgeo = virtblk_getgeo,
+ .free_disk = virtblk_free_disk,
};
static int index_to_minor(int index)
@@ -783,20 +742,15 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
- /* We need extra sg elements at head and tail. */
- sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) {
err = -ENOMEM;
goto out_free_index;
}
- /* This reference is dropped in virtblk_remove(). */
- refcount_set(&vblk->refs, 1);
mutex_init(&vblk->vdev_mutex);
vblk->vdev = vdev;
- vblk->sg_elems = sg_elems;
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -853,7 +807,7 @@ static int virtblk_probe(struct virtio_device *vdev)
set_disk_ro(vblk->disk, 1);
/* We can handle whatever the host told us to handle. */
- blk_queue_max_segments(q, vblk->sg_elems-2);
+ blk_queue_max_segments(q, sg_elems);
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
@@ -925,9 +879,15 @@ static int virtblk_probe(struct virtio_device *vdev)
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
&v);
+
+ /*
+ * max_discard_seg == 0 is out of spec but we always
+ * handled it.
+ */
+ if (!v)
+ v = sg_elems;
blk_queue_max_discard_segments(q,
- min_not_zero(v,
- MAX_DISCARD_SEGMENTS));
+ min(v, MAX_DISCARD_SEGMENTS));
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
@@ -970,7 +930,7 @@ static void virtblk_remove(struct virtio_device *vdev)
flush_work(&vblk->config_work);
del_gendisk(vblk->disk);
- blk_cleanup_disk(vblk->disk);
+ blk_cleanup_queue(vblk->disk->queue);
blk_mq_free_tag_set(&vblk->tag_set);
mutex_lock(&vblk->vdev_mutex);
@@ -986,7 +946,7 @@ static void virtblk_remove(struct virtio_device *vdev)
mutex_unlock(&vblk->vdev_mutex);
- virtblk_put(vblk);
+ put_disk(vblk->disk);
}
#ifdef CONFIG_PM_SLEEP
@@ -1060,7 +1020,7 @@ static struct virtio_driver virtio_blk = {
#endif
};
-static int __init init(void)
+static int __init virtio_blk_init(void)
{
int error;
@@ -1086,14 +1046,14 @@ out_destroy_workqueue:
return error;
}
-static void __exit fini(void)
+static void __exit virtio_blk_fini(void)
{
unregister_virtio_driver(&virtio_blk);
unregister_blkdev(major, "virtblk");
destroy_workqueue(virtblk_wq);
}
-module_init(init);
-module_exit(fini);
+module_init(virtio_blk_init);
+module_exit(virtio_blk_fini);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio block driver");
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 14e452896d04..d1e26461a64e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1326,16 +1326,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
pages[i]->page,
seg[i].nsec << 9,
seg[i].offset) == 0)) {
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i),
+ operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
- bio_set_op_attrs(bio, operation, operation_flags);
}
preq.sector_number += seg[i].nsec;
@@ -1345,15 +1342,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
if (!bio) {
BUG_ON(operation_flags != REQ_PREFLUSH);
- bio = bio_alloc(GFP_KERNEL, 0);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, 0, operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
- bio_set_op_attrs(bio, operation, operation_flags);
}
atomic_set(&pending_req->pendcnt, nbio);
@@ -1381,14 +1374,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
-
- fail_put_bio:
- for (i = 0; i < nbio; i++)
- bio_put(biolist[i]);
- atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, BLK_STS_RESOURCE);
- msleep(1); /* back off a bit */
- return -EIO;
}
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 62125fd4af4a..f09040435e2e 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/kthread.h>
+#include <linux/pagemap.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include "common.h"
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ccd0dd0c6b83..85fc550508cc 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1288,7 +1288,8 @@ free_shadow:
rinfo->ring_ref[i] = GRANT_INVALID_REF;
}
}
- free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
+ free_pages_exact(rinfo->ring.sring,
+ info->nr_ring_pages * XEN_PAGE_SIZE);
rinfo->ring.sring = NULL;
if (rinfo->irq)
@@ -1372,9 +1373,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
return BLKIF_RSP_OKAY;
}
-static bool blkif_completion(unsigned long *id,
- struct blkfront_ring_info *rinfo,
- struct blkif_response *bret)
+/*
+ * Return values:
+ * 1 response processed.
+ * 0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
+ struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
@@ -1397,7 +1404,7 @@ static bool blkif_completion(unsigned long *id,
/* Wait the second response if not yet here. */
if (s2->status < REQ_DONE)
- return false;
+ return 0;
bret->status = blkif_get_final_status(s->status,
s2->status);
@@ -1448,42 +1455,43 @@ static bool blkif_completion(unsigned long *id,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < num_grant; i++) {
- if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+ if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
/*
* If the grant is still mapped by the backend (the
* backend has chosen to make this grant persistent)
* we add it at the head of the list, so it will be
* reused first.
*/
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->grants_used[i]->gref);
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->grants_used[i]->gref);
+ return -1;
+ }
list_add(&s->grants_used[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
/*
- * If the grant is not mapped by the backend we end the
- * foreign access and add it to the tail of the list,
- * so it will not be picked again unless we run out of
- * persistent grants.
+ * If the grant is not mapped by the backend we add it
+ * to the tail of the list, so it will not be picked
+ * again unless we run out of persistent grants.
*/
- gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
s->grants_used[i]->gref = GRANT_INVALID_REF;
list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
- if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->indirect_grants[i]->gref);
+ if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->indirect_grants[i]->gref);
+ return -1;
+ }
list_add(&s->indirect_grants[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
- gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
/*
* Add the used indirect page back to the list of
* available pages for indirect grefs.
@@ -1498,7 +1506,7 @@ static bool blkif_completion(unsigned long *id,
}
}
- return true;
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1564,12 +1572,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
}
if (bret.operation != BLKIF_OP_DISCARD) {
+ int ret;
+
/*
* We may need to wait for an extra response if the
* I/O request is split in 2
*/
- if (!blkif_completion(&id, rinfo, &bret))
+ ret = blkif_completion(&id, rinfo, &bret);
+ if (!ret)
continue;
+ if (unlikely(ret < 0))
+ goto err;
}
if (add_id_to_freelist(rinfo, id)) {
@@ -1676,8 +1689,7 @@ static int setup_blkring(struct xenbus_device *dev,
for (i = 0; i < info->nr_ring_pages; i++)
rinfo->ring_ref[i] = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
- get_order(ring_size));
+ sring = alloc_pages_exact(ring_size, GFP_NOIO);
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
@@ -1687,7 +1699,7 @@ static int setup_blkring(struct xenbus_device *dev,
err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
- free_pages((unsigned long)sring, get_order(ring_size));
+ free_pages_exact(sring, ring_size);
rinfo->ring.sring = NULL;
goto fail;
}
@@ -2126,7 +2138,7 @@ static void blkfront_closing(struct blkfront_info *info)
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- blk_set_queue_dying(info->rq);
+ blk_mark_disk_dead(info->gd);
set_capacity(info->gd, 0);
for_each_rinfo(info, rinfo, i) {
@@ -2521,6 +2533,7 @@ static void purge_persistent_grants(struct blkfront_info *info)
for_each_rinfo(info, rinfo, i) {
struct grant *gnt_list_entry, *tmp;
+ LIST_HEAD(grants);
spin_lock_irqsave(&rinfo->ring_lock, flags);
@@ -2532,16 +2545,17 @@ static void purge_persistent_grants(struct blkfront_info *info)
list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
node) {
if (gnt_list_entry->gref == GRANT_INVALID_REF ||
- gnttab_query_foreign_access(gnt_list_entry->gref))
+ !gnttab_try_end_foreign_access(gnt_list_entry->gref))
continue;
list_del(&gnt_list_entry->node);
- gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
rinfo->persistent_gnts_c--;
gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+ list_add_tail(&gnt_list_entry->node, &grants);
}
+ list_splice_tail(&grants, &rinfo->grants);
+
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
}
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index cb253d80d72b..e9474b02012d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -22,7 +22,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -617,24 +616,21 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
{
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, 1);
+ bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
+ GFP_NOIO);
if (!bio)
return -ENOMEM;
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
bio_put(bio);
return -EIO;
}
- if (!parent) {
- bio->bi_opf = REQ_OP_READ;
+ if (!parent)
bio->bi_end_io = zram_page_end_io;
- } else {
- bio->bi_opf = parent->bi_opf;
+ else
bio_chain(bio, parent);
- }
submit_bio(bio);
return 1;
@@ -747,10 +743,9 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
- bio_init(&bio, &bio_vec, 1);
- bio_set_dev(&bio, zram->bdev);
+ bio_init(&bio, zram->bdev, &bio_vec, 1,
+ REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
- bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset);
@@ -1336,12 +1331,10 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
goto out;
if (is_partial_io(bvec)) {
- void *dst = kmap_atomic(bvec->bv_page);
void *src = kmap_atomic(page);
- memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
+ memcpy_to_bvec(bvec, src + offset);
kunmap_atomic(src);
- kunmap_atomic(dst);
}
out:
if (is_partial_io(bvec))
@@ -1472,7 +1465,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
{
int ret;
struct page *page = NULL;
- void *src;
struct bio_vec vec;
vec = *bvec;
@@ -1490,11 +1482,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (ret)
goto out;
- src = kmap_atomic(bvec->bv_page);
dst = kmap_atomic(page);
- memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
+ memcpy_from_bvec(dst + offset, bvec);
kunmap_atomic(dst);
- kunmap_atomic(src);
vec.bv_page = page;
vec.bv_len = PAGE_SIZE;
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 3a258a677df8..b79895810c52 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -366,6 +366,7 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
.config = &modem_foxconn_sdx55_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
.dma_data_width = 32,
+ .mru_default = 32768,
.sideband_wake = false,
};
@@ -401,6 +402,7 @@ static const struct mhi_pci_dev_info mhi_mv31_info = {
.config = &modem_mv31_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
.dma_data_width = 32,
+ .mru_default = 32768,
};
static const struct mhi_channel_config mhi_sierra_em919x_channels[] = {
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index fd87a59837fa..5eb0fe73ddc4 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -815,7 +815,7 @@ static int moxtet_probe(struct spi_device *spi)
return 0;
}
-static int moxtet_remove(struct spi_device *spi)
+static void moxtet_remove(struct spi_device *spi)
{
struct moxtet *moxtet = spi_get_drvdata(spi);
@@ -828,8 +828,6 @@ static int moxtet_remove(struct spi_device *spi)
device_for_each_child(moxtet->dev, NULL, __unregister);
mutex_destroy(&moxtet->lock);
-
- return 0;
}
static const struct of_device_id moxtet_dt_ids[] = {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index faead41709bc..8e78b37d0f6a 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/cdrom.h>
-#include <linux/genhd.h>
#include <linux/bio.h>
#include <linux/blk-mq.h>
#include <linux/interrupt.h>
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 9704963f9d50..a087156a5818 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -401,7 +401,7 @@ config HW_RANDOM_MESON
config HW_RANDOM_CAVIUM
tristate "Cavium ThunderX Random Number Generator support"
- depends on HW_RANDOM && PCI && ARM64
+ depends on HW_RANDOM && PCI && ARCH_THUNDER
default HW_RANDOM
help
This driver provides kernel-side support for the Random Number
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index ecb71c4317a5..b8effe77d80f 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -13,13 +13,16 @@
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/hw_random.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#define TRNG_CR 0x00
#define TRNG_MR 0x04
#define TRNG_ISR 0x1c
+#define TRNG_ISR_DATRDY BIT(0)
#define TRNG_ODATA 0x50
#define TRNG_KEY 0x524e4700 /* RNG */
@@ -34,37 +37,79 @@ struct atmel_trng {
struct clk *clk;
void __iomem *base;
struct hwrng rng;
+ bool has_half_rate;
};
+static bool atmel_trng_wait_ready(struct atmel_trng *trng, bool wait)
+{
+ int ready;
+
+ ready = readl(trng->base + TRNG_ISR) & TRNG_ISR_DATRDY;
+ if (!ready && wait)
+ readl_poll_timeout(trng->base + TRNG_ISR, ready,
+ ready & TRNG_ISR_DATRDY, 1000, 20000);
+
+ return !!ready;
+}
+
static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
bool wait)
{
struct atmel_trng *trng = container_of(rng, struct atmel_trng, rng);
u32 *data = buf;
+ int ret;
- /* data ready? */
- if (readl(trng->base + TRNG_ISR) & 1) {
- *data = readl(trng->base + TRNG_ODATA);
- /*
- ensure data ready is only set again AFTER the next data
- word is ready in case it got set between checking ISR
- and reading ODATA, so we don't risk re-reading the
- same word
- */
- readl(trng->base + TRNG_ISR);
- return 4;
- } else
- return 0;
+ ret = pm_runtime_get_sync((struct device *)trng->rng.priv);
+ if (ret < 0) {
+ pm_runtime_put_sync((struct device *)trng->rng.priv);
+ return ret;
+ }
+
+ ret = atmel_trng_wait_ready(trng, wait);
+ if (!ret)
+ goto out;
+
+ *data = readl(trng->base + TRNG_ODATA);
+ /*
+ * ensure data ready is only set again AFTER the next data word is ready
+ * in case it got set between checking ISR and reading ODATA, so we
+ * don't risk re-reading the same word
+ */
+ readl(trng->base + TRNG_ISR);
+ ret = 4;
+
+out:
+ pm_runtime_mark_last_busy((struct device *)trng->rng.priv);
+ pm_runtime_put_sync_autosuspend((struct device *)trng->rng.priv);
+ return ret;
}
-static void atmel_trng_enable(struct atmel_trng *trng)
+static int atmel_trng_init(struct atmel_trng *trng)
{
+ unsigned long rate;
+ int ret;
+
+ ret = clk_prepare_enable(trng->clk);
+ if (ret)
+ return ret;
+
+ if (trng->has_half_rate) {
+ rate = clk_get_rate(trng->clk);
+
+ /* if peripheral clk is above 100MHz, set HALFR */
+ if (rate > 100000000)
+ writel(TRNG_HALFR, trng->base + TRNG_MR);
+ }
+
writel(TRNG_KEY | 1, trng->base + TRNG_CR);
+
+ return 0;
}
-static void atmel_trng_disable(struct atmel_trng *trng)
+static void atmel_trng_cleanup(struct atmel_trng *trng)
{
writel(TRNG_KEY, trng->base + TRNG_CR);
+ clk_disable_unprepare(trng->clk);
}
static int atmel_trng_probe(struct platform_device *pdev)
@@ -88,32 +133,31 @@ static int atmel_trng_probe(struct platform_device *pdev)
if (!data)
return -ENODEV;
- if (data->has_half_rate) {
- unsigned long rate = clk_get_rate(trng->clk);
-
- /* if peripheral clk is above 100MHz, set HALFR */
- if (rate > 100000000)
- writel(TRNG_HALFR, trng->base + TRNG_MR);
- }
-
- ret = clk_prepare_enable(trng->clk);
- if (ret)
- return ret;
-
- atmel_trng_enable(trng);
+ trng->has_half_rate = data->has_half_rate;
trng->rng.name = pdev->name;
trng->rng.read = atmel_trng_read;
+ trng->rng.priv = (unsigned long)&pdev->dev;
+ platform_set_drvdata(pdev, trng);
- ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+#ifndef CONFIG_PM
+ ret = atmel_trng_init(trng);
if (ret)
- goto err_register;
+ return ret;
+#endif
- platform_set_drvdata(pdev, trng);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
- return 0;
+ ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+ if (ret) {
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+#ifndef CONFIG_PM
+ atmel_trng_cleanup(trng);
+#endif
+ }
-err_register:
- clk_disable_unprepare(trng->clk);
return ret;
}
@@ -121,43 +165,35 @@ static int atmel_trng_remove(struct platform_device *pdev)
{
struct atmel_trng *trng = platform_get_drvdata(pdev);
-
- atmel_trng_disable(trng);
- clk_disable_unprepare(trng->clk);
+ atmel_trng_cleanup(trng);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
return 0;
}
-#ifdef CONFIG_PM
-static int atmel_trng_suspend(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_suspend(struct device *dev)
{
struct atmel_trng *trng = dev_get_drvdata(dev);
- atmel_trng_disable(trng);
- clk_disable_unprepare(trng->clk);
+ atmel_trng_cleanup(trng);
return 0;
}
-static int atmel_trng_resume(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_resume(struct device *dev)
{
struct atmel_trng *trng = dev_get_drvdata(dev);
- int ret;
- ret = clk_prepare_enable(trng->clk);
- if (ret)
- return ret;
-
- atmel_trng_enable(trng);
-
- return 0;
+ return atmel_trng_init(trng);
}
-static const struct dev_pm_ops atmel_trng_pm_ops = {
- .suspend = atmel_trng_suspend,
- .resume = atmel_trng_resume,
+static const struct dev_pm_ops __maybe_unused atmel_trng_pm_ops = {
+ SET_RUNTIME_PM_OPS(atmel_trng_runtime_suspend,
+ atmel_trng_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
};
-#endif /* CONFIG_PM */
static const struct atmel_trng_data at91sam9g45_config = {
.has_half_rate = false,
@@ -185,9 +221,7 @@ static struct platform_driver atmel_trng_driver = {
.remove = atmel_trng_remove,
.driver = {
.name = "atmel-trng",
-#ifdef CONFIG_PM
- .pm = &atmel_trng_pm_ops,
-#endif /* CONFIG_PM */
+ .pm = pm_ptr(&atmel_trng_pm_ops),
.of_match_table = atmel_trng_dt_ids,
},
};
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 6f66919652bf..7c55f4cf4a8b 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -179,7 +179,7 @@ static int cavium_map_pf_regs(struct cavium_rng *rng)
pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
PCI_DEVID_CAVIUM_RNG_PF, NULL);
if (!pdev) {
- dev_err(&pdev->dev, "Cannot find RNG PF device\n");
+ pr_err("Cannot find RNG PF device\n");
return -EIO;
}
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a3db27916256..16f227b995e8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hw_random.h>
+#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/sched/signal.h>
@@ -31,7 +32,7 @@ static struct hwrng *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
static struct task_struct *hwrng_fill;
-/* list of registered rngs, sorted decending by quality */
+/* list of registered rngs */
static LIST_HEAD(rng_list);
/* Protects rng_list and current_rng */
static DEFINE_MUTEX(rng_mutex);
@@ -44,14 +45,14 @@ static unsigned short default_quality; /* = 0; default to "off" */
module_param(current_quality, ushort, 0644);
MODULE_PARM_DESC(current_quality,
- "current hwrng entropy estimation per 1024 bits of input");
+ "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
module_param(default_quality, ushort, 0644);
MODULE_PARM_DESC(default_quality,
"default entropy content of hwrng per 1024 bits of input");
static void drop_current_rng(void);
static int hwrng_init(struct hwrng *rng);
-static void start_khwrngd(void);
+static void hwrng_manage_rngd(struct hwrng *rng);
static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
int wait);
@@ -64,13 +65,12 @@ static size_t rng_buffer_size(void)
static void add_early_randomness(struct hwrng *rng)
{
int bytes_read;
- size_t size = min_t(size_t, 16, rng_buffer_size());
mutex_lock(&reading_mutex);
- bytes_read = rng_get_data(rng, rng_buffer, size, 0);
+ bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
mutex_unlock(&reading_mutex);
if (bytes_read > 0)
- add_device_randomness(rng_buffer, bytes_read);
+ add_device_randomness(rng_fillbuf, bytes_read);
}
static inline void cleanup_rng(struct kref *kref)
@@ -161,14 +161,13 @@ static int hwrng_init(struct hwrng *rng)
reinit_completion(&rng->cleanup_done);
skip_init:
- current_quality = rng->quality ? : default_quality;
- if (current_quality > 1024)
- current_quality = 1024;
+ if (!rng->quality)
+ rng->quality = default_quality;
+ if (rng->quality > 1024)
+ rng->quality = 1024;
+ current_quality = rng->quality; /* obsolete */
- if (current_quality == 0 && hwrng_fill)
- kthread_stop(hwrng_fill);
- if (current_quality > 0 && !hwrng_fill)
- start_khwrngd();
+ hwrng_manage_rngd(rng);
return 0;
}
@@ -298,24 +297,28 @@ static struct miscdevice rng_miscdev = {
static int enable_best_rng(void)
{
+ struct hwrng *rng, *new_rng = NULL;
int ret = -ENODEV;
BUG_ON(!mutex_is_locked(&rng_mutex));
- /* rng_list is sorted by quality, use the best (=first) one */
- if (!list_empty(&rng_list)) {
- struct hwrng *new_rng;
-
- new_rng = list_entry(rng_list.next, struct hwrng, list);
- ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
- if (!ret)
- cur_rng_set_by_user = 0;
- } else {
+ /* no rng to use? */
+ if (list_empty(&rng_list)) {
drop_current_rng();
cur_rng_set_by_user = 0;
- ret = 0;
+ return 0;
+ }
+
+ /* use the rng which offers the best quality */
+ list_for_each_entry(rng, &rng_list, list) {
+ if (!new_rng || rng->quality > new_rng->quality)
+ new_rng = rng;
}
+ ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
+ if (!ret)
+ cur_rng_set_by_user = 0;
+
return ret;
}
@@ -336,8 +339,9 @@ static ssize_t rng_current_store(struct device *dev,
} else {
list_for_each_entry(rng, &rng_list, list) {
if (sysfs_streq(rng->name, buf)) {
- cur_rng_set_by_user = 1;
err = set_current_rng(rng);
+ if (!err)
+ cur_rng_set_by_user = 1;
break;
}
}
@@ -399,14 +403,76 @@ static ssize_t rng_selected_show(struct device *dev,
return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
}
+static ssize_t rng_quality_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret;
+ struct hwrng *rng;
+
+ rng = get_current_rng();
+ if (IS_ERR(rng))
+ return PTR_ERR(rng);
+
+ if (!rng) /* no need to put_rng */
+ return -ENODEV;
+
+ ret = sysfs_emit(buf, "%hu\n", rng->quality);
+ put_rng(rng);
+
+ return ret;
+}
+
+static ssize_t rng_quality_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ u16 quality;
+ int ret = -EINVAL;
+
+ if (len < 2)
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&rng_mutex);
+ if (ret)
+ return -ERESTARTSYS;
+
+ ret = kstrtou16(buf, 0, &quality);
+ if (ret || quality > 1024) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!current_rng) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ current_rng->quality = quality;
+ current_quality = quality; /* obsolete */
+
+ /* the best available RNG may have changed */
+ ret = enable_best_rng();
+
+ /* start/stop rngd if necessary */
+ if (current_rng)
+ hwrng_manage_rngd(current_rng);
+
+out:
+ mutex_unlock(&rng_mutex);
+ return ret ? ret : len;
+}
+
static DEVICE_ATTR_RW(rng_current);
static DEVICE_ATTR_RO(rng_available);
static DEVICE_ATTR_RO(rng_selected);
+static DEVICE_ATTR_RW(rng_quality);
static struct attribute *rng_dev_attrs[] = {
&dev_attr_rng_current.attr,
&dev_attr_rng_available.attr,
&dev_attr_rng_selected.attr,
+ &dev_attr_rng_quality.attr,
NULL
};
@@ -424,9 +490,11 @@ static int __init register_miscdev(void)
static int hwrng_fillfn(void *unused)
{
+ size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */
long rc;
while (!kthread_should_stop()) {
+ unsigned short quality;
struct hwrng *rng;
rng = get_current_rng();
@@ -435,27 +503,49 @@ static int hwrng_fillfn(void *unused)
mutex_lock(&reading_mutex);
rc = rng_get_data(rng, rng_fillbuf,
rng_buffer_size(), 1);
+ if (current_quality != rng->quality)
+ rng->quality = current_quality; /* obsolete */
+ quality = rng->quality;
mutex_unlock(&reading_mutex);
put_rng(rng);
+
+ if (!quality)
+ break;
+
if (rc <= 0) {
pr_warn("hwrng: no data available\n");
msleep_interruptible(10000);
continue;
}
+
+ /* If we cannot credit at least one bit of entropy,
+ * keep track of the remainder for the next iteration
+ */
+ entropy = rc * quality * 8 + entropy_credit;
+ if ((entropy >> 10) == 0)
+ entropy_credit = entropy;
+
/* Outside lock, sure, but y'know: randomness. */
add_hwgenerator_randomness((void *)rng_fillbuf, rc,
- rc * current_quality * 8 >> 10);
+ entropy >> 10);
}
hwrng_fill = NULL;
return 0;
}
-static void start_khwrngd(void)
+static void hwrng_manage_rngd(struct hwrng *rng)
{
- hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
- if (IS_ERR(hwrng_fill)) {
- pr_err("hwrng_fill thread creation failed\n");
- hwrng_fill = NULL;
+ if (WARN_ON(!mutex_is_locked(&rng_mutex)))
+ return;
+
+ if (rng->quality == 0 && hwrng_fill)
+ kthread_stop(hwrng_fill);
+ if (rng->quality > 0 && !hwrng_fill) {
+ hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
+ if (IS_ERR(hwrng_fill)) {
+ pr_err("hwrng_fill thread creation failed\n");
+ hwrng_fill = NULL;
+ }
}
}
@@ -463,7 +553,6 @@ int hwrng_register(struct hwrng *rng)
{
int err = -EINVAL;
struct hwrng *tmp;
- struct list_head *rng_list_ptr;
bool is_new_current = false;
if (!rng->name || (!rng->data_read && !rng->read))
@@ -477,18 +566,11 @@ int hwrng_register(struct hwrng *rng)
if (strcmp(tmp->name, rng->name) == 0)
goto out_unlock;
}
+ list_add_tail(&rng->list, &rng_list);
init_completion(&rng->cleanup_done);
complete(&rng->cleanup_done);
- /* rng_list is sorted by decreasing quality */
- list_for_each(rng_list_ptr, &rng_list) {
- tmp = list_entry(rng_list_ptr, struct hwrng, list);
- if (tmp->quality < rng->quality)
- break;
- }
- list_add_tail(&rng->list, rng_list_ptr);
-
if (!current_rng ||
(!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
/*
@@ -638,7 +720,7 @@ static void __exit hwrng_modexit(void)
unregister_miscdev();
}
-module_init(hwrng_modinit);
+fs_initcall(hwrng_modinit); /* depends on misc_register() */
module_exit(hwrng_modexit);
MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 67947a19aa22..e8f9621e7954 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -65,14 +65,14 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
out_release:
amba_release_regions(dev);
out_clk:
- clk_disable(rng_clk);
+ clk_disable_unprepare(rng_clk);
return ret;
}
static void nmk_rng_remove(struct amba_device *dev)
{
amba_release_regions(dev);
- clk_disable(rng_clk);
+ clk_disable_unprepare(rng_clk);
}
static const struct amba_id nmk_rng_ids[] = {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 68613f0b6887..66ce7c03a142 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1,320 +1,28 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * random.c -- A strong random number generator
- *
* Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
* Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
- *
- * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All
- * rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions. (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
-/*
- * (now, with legal B.S. out of the way.....)
- *
- * This routine gathers environmental noise from device drivers, etc.,
- * and returns good random numbers, suitable for cryptographic use.
- * Besides the obvious cryptographic uses, these numbers are also good
- * for seeding TCP sequence numbers, and other places where it is
- * desirable to have numbers which are not only random, but hard to
- * predict by an attacker.
- *
- * Theory of operation
- * ===================
- *
- * Computers are very predictable devices. Hence it is extremely hard
- * to produce truly random numbers on a computer --- as opposed to
- * pseudo-random numbers, which can easily generated by using a
- * algorithm. Unfortunately, it is very easy for attackers to guess
- * the sequence of pseudo-random number generators, and for some
- * applications this is not acceptable. So instead, we must try to
- * gather "environmental noise" from the computer's environment, which
- * must be hard for outside attackers to observe, and use that to
- * generate random numbers. In a Unix environment, this is best done
- * from inside the kernel.
- *
- * Sources of randomness from the environment include inter-keyboard
- * timings, inter-interrupt timings from some interrupts, and other
- * events which are both (a) non-deterministic and (b) hard for an
- * outside observer to measure. Randomness from these sources are
- * added to an "entropy pool", which is mixed using a CRC-like function.
- * This is not cryptographically strong, but it is adequate assuming
- * the randomness is not chosen maliciously, and it is fast enough that
- * the overhead of doing it on every interrupt is very reasonable.
- * As random bytes are mixed into the entropy pool, the routines keep
- * an *estimate* of how many bits of randomness have been stored into
- * the random number generator's internal state.
- *
- * When random bytes are desired, they are obtained by taking the BLAKE2s
- * hash of the contents of the "entropy pool". The BLAKE2s hash avoids
- * exposing the internal state of the entropy pool. It is believed to
- * be computationally infeasible to derive any useful information
- * about the input of BLAKE2s from its output. Even if it is possible to
- * analyze BLAKE2s in some clever way, as long as the amount of data
- * returned from the generator is less than the inherent entropy in
- * the pool, the output data is totally unpredictable. For this
- * reason, the routine decreases its internal estimate of how many
- * bits of "true randomness" are contained in the entropy pool as it
- * outputs random numbers.
- *
- * If this estimate goes to zero, the routine can still generate
- * random numbers; however, an attacker may (at least in theory) be
- * able to infer the future output of the generator from prior
- * outputs. This requires successful cryptanalysis of BLAKE2s, which is
- * not believed to be feasible, but there is a remote possibility.
- * Nonetheless, these numbers should be useful for the vast majority
- * of purposes.
- *
- * Exported interfaces ---- output
- * ===============================
- *
- * There are four exported interfaces; two for use within the kernel,
- * and two for use from userspace.
- *
- * Exported interfaces ---- userspace output
- * -----------------------------------------
- *
- * The userspace interfaces are two character devices /dev/random and
- * /dev/urandom. /dev/random is suitable for use when very high
- * quality randomness is desired (for example, for key generation or
- * one-time pads), as it will only return a maximum of the number of
- * bits of randomness (as estimated by the random number generator)
- * contained in the entropy pool.
- *
- * The /dev/urandom device does not have this limit, and will return
- * as many bytes as are requested. As more and more random bytes are
- * requested without giving time for the entropy pool to recharge,
- * this will result in random numbers that are merely cryptographically
- * strong. For many applications, however, this is acceptable.
- *
- * Exported interfaces ---- kernel output
- * --------------------------------------
- *
- * The primary kernel interface is
- *
- * void get_random_bytes(void *buf, int nbytes);
- *
- * This interface will return the requested number of random bytes,
- * and place it in the requested buffer. This is equivalent to a
- * read from /dev/urandom.
- *
- * For less critical applications, there are the functions:
- *
- * u32 get_random_u32()
- * u64 get_random_u64()
- * unsigned int get_random_int()
- * unsigned long get_random_long()
- *
- * These are produced by a cryptographic RNG seeded from get_random_bytes,
- * and so do not deplete the entropy pool as much. These are recommended
- * for most in-kernel operations *if the result is going to be stored in
- * the kernel*.
- *
- * Specifically, the get_random_int() family do not attempt to do
- * "anti-backtracking". If you capture the state of the kernel (e.g.
- * by snapshotting the VM), you can figure out previous get_random_int()
- * return values. But if the value is stored in the kernel anyway,
- * this is not a problem.
- *
- * It *is* safe to expose get_random_int() output to attackers (e.g. as
- * network cookies); given outputs 1..n, it's not feasible to predict
- * outputs 0 or n+1. The only concern is an attacker who breaks into
- * the kernel later; the get_random_int() engine is not reseeded as
- * often as the get_random_bytes() one.
- *
- * get_random_bytes() is needed for keys that need to stay secret after
- * they are erased from the kernel. For example, any key that will
- * be wrapped and stored encrypted. And session encryption keys: we'd
- * like to know that after the session is closed and the keys erased,
- * the plaintext is unrecoverable to someone who recorded the ciphertext.
- *
- * But for network ports/cookies, stack canaries, PRNG seeds, address
- * space layout randomization, session *authentication* keys, or other
- * applications where the sensitive data is stored in the kernel in
- * plaintext for as long as it's sensitive, the get_random_int() family
- * is just fine.
- *
- * Consider ASLR. We want to keep the address space secret from an
- * outside attacker while the process is running, but once the address
- * space is torn down, it's of no use to an attacker any more. And it's
- * stored in kernel data structures as long as it's alive, so worrying
- * about an attacker's ability to extrapolate it from the get_random_int()
- * CRNG is silly.
- *
- * Even some cryptographic keys are safe to generate with get_random_int().
- * In particular, keys for SipHash are generally fine. Here, knowledge
- * of the key authorizes you to do something to a kernel object (inject
- * packets to a network connection, or flood a hash table), and the
- * key is stored with the object being protected. Once it goes away,
- * we no longer care if anyone knows the key.
- *
- * prandom_u32()
- * -------------
- *
- * For even weaker applications, see the pseudorandom generator
- * prandom_u32(), prandom_max(), and prandom_bytes(). If the random
- * numbers aren't security-critical at all, these are *far* cheaper.
- * Useful for self-tests, random error simulation, randomized backoffs,
- * and any other application where you trust that nobody is trying to
- * maliciously mess with you by guessing the "random" numbers.
- *
- * Exported interfaces ---- input
- * ==============================
- *
- * The current exported interfaces for gathering environmental noise
- * from the devices are:
- *
- * void add_device_randomness(const void *buf, unsigned int size);
- * void add_input_randomness(unsigned int type, unsigned int code,
- * unsigned int value);
- * void add_interrupt_randomness(int irq);
- * void add_disk_randomness(struct gendisk *disk);
- * void add_hwgenerator_randomness(const char *buffer, size_t count,
- * size_t entropy);
- * void add_bootloader_randomness(const void *buf, unsigned int size);
- *
- * add_device_randomness() is for adding data to the random pool that
- * is likely to differ between two devices (or possibly even per boot).
- * This would be things like MAC addresses or serial numbers, or the
- * read-out of the RTC. This does *not* add any actual entropy to the
- * pool, but it initializes the pool to different values for devices
- * that might otherwise be identical and have very little entropy
- * available to them (particularly common in the embedded world).
- *
- * add_input_randomness() uses the input layer interrupt timing, as well as
- * the event type information from the hardware.
- *
- * add_interrupt_randomness() uses the interrupt timing as random
- * inputs to the entropy pool. Using the cycle counters and the irq source
- * as inputs, it feeds the randomness roughly once a second.
- *
- * add_disk_randomness() uses what amounts to the seek time of block
- * layer request events, on a per-disk_devt basis, as input to the
- * entropy pool. Note that high-speed solid state drives with very low
- * seek times do not make for good sources of entropy, as their seek
- * times are usually fairly consistent.
- *
- * All of these routines try to estimate how many bits of randomness a
- * particular randomness source. They do this by keeping track of the
- * first and second order deltas of the event timings.
- *
- * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
- * entropy as specified by the caller. If the entropy pool is full it will
- * block until more entropy is needed.
- *
- * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
- * add_device_randomness(), depending on whether or not the configuration
- * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
- *
- * Ensuring unpredictability at system startup
- * ============================================
- *
- * When any operating system starts up, it will go through a sequence
- * of actions that are fairly predictable by an adversary, especially
- * if the start-up does not involve interaction with a human operator.
- * This reduces the actual number of bits of unpredictability in the
- * entropy pool below the value in entropy_count. In order to
- * counteract this effect, it helps to carry information in the
- * entropy pool across shut-downs and start-ups. To do this, put the
- * following lines an appropriate script which is run during the boot
- * sequence:
- *
- * echo "Initializing random number generator..."
- * random_seed=/var/run/random-seed
- * # Carry a random seed from start-up to start-up
- * # Load and then save the whole entropy pool
- * if [ -f $random_seed ]; then
- * cat $random_seed >/dev/urandom
- * else
- * touch $random_seed
- * fi
- * chmod 600 $random_seed
- * dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * and the following lines in an appropriate script which is run as
- * the system is shutdown:
- *
- * # Carry a random seed from shut-down to start-up
- * # Save the whole entropy pool
- * echo "Saving random seed..."
- * random_seed=/var/run/random-seed
- * touch $random_seed
- * chmod 600 $random_seed
- * dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * For example, on most modern systems using the System V init
- * scripts, such code fragments would be found in
- * /etc/rc.d/init.d/random. On older Linux systems, the correct script
- * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
- *
- * Effectively, these commands cause the contents of the entropy pool
- * to be saved at shut-down time and reloaded into the entropy pool at
- * start-up. (The 'dd' in the addition to the bootup script is to
- * make sure that /etc/random-seed is different for every start-up,
- * even if the system crashes without executing rc.0.) Even with
- * complete knowledge of the start-up activities, predicting the state
- * of the entropy pool requires knowledge of the previous history of
- * the system.
- *
- * Configuring the /dev/random driver under Linux
- * ==============================================
- *
- * The /dev/random driver under Linux uses minor numbers 8 and 9 of
- * the /dev/mem major number (#1). So if your system does not have
- * /dev/random and /dev/urandom created already, they can be created
- * by using the commands:
- *
- * mknod /dev/random c 1 8
- * mknod /dev/urandom c 1 9
- *
- * Acknowledgements:
- * =================
- *
- * Ideas for constructing this random number generator were derived
- * from Pretty Good Privacy's random number generator, and from private
- * discussions with Phil Karn. Colin Plumb provided a faster random
- * number generator, which speed up the mixing function of the entropy
- * pool, taken from PGPfone. Dale Worley has also contributed many
- * useful ideas and suggestions to improve this driver.
- *
- * Any flaws in the design are solely my responsibility, and should
- * not be attributed to the Phil, Colin, or any of authors of PGP.
- *
- * Further background information on this topic may be obtained from
- * RFC 1750, "Randomness Recommendations for Security", by Donald
- * Eastlake, Steve Crocker, and Jeff Schiller.
+ * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
+ *
+ * This driver produces cryptographically secure pseudorandom data. It is divided
+ * into roughly six sections, each with a section header:
+ *
+ * - Initialization and readiness waiting.
+ * - Fast key erasure RNG, the "crng".
+ * - Entropy accumulation and extraction routines.
+ * - Entropy collection routines.
+ * - Userspace reader/writer interfaces.
+ * - Sysctl interface.
+ *
+ * The high level overview is that there is one input pool, into which
+ * various pieces of data are hashed. Some of that data is then "credited" as
+ * having a certain number of bits of entropy. When enough bits of entropy are
+ * available, the hash is finalized and handed as a key to a stream cipher that
+ * expands it indefinitely for various consumers. This key is periodically
+ * refreshed as the various entropy collectors, described below, add data to the
+ * input pool and credit it. There is currently no Fortuna-like scheduler
+ * involved, which can lead to malicious entropy sources causing a premature
+ * reseed, and the entropy estimates are, at best, conservative guesses.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -330,7 +38,7 @@
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/nodemask.h>
@@ -344,744 +52,947 @@
#include <linux/syscalls.h>
#include <linux/completion.h>
#include <linux/uuid.h>
+#include <linux/uaccess.h>
#include <crypto/chacha.h>
#include <crypto/blake2s.h>
-
#include <asm/processor.h>
-#include <linux/uaccess.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/io.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/random.h>
-
-/* #define ADD_INTERRUPT_BENCH */
-
-/*
- * If the entropy count falls under this number of bits, then we
- * should wake up processes which are selecting or polling on write
- * access to /dev/random.
- */
-static int random_write_wakeup_bits = 28 * (1 << 5);
-
-/*
- * Originally, we used a primitive polynomial of degree .poolwords
- * over GF(2). The taps for various sizes are defined below. They
- * were chosen to be evenly spaced except for the last tap, which is 1
- * to get the twisting happening as fast as possible.
- *
- * For the purposes of better mixing, we use the CRC-32 polynomial as
- * well to make a (modified) twisted Generalized Feedback Shift
- * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR
- * generators. ACM Transactions on Modeling and Computer Simulation
- * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted
- * GFSR generators II. ACM Transactions on Modeling and Computer
- * Simulation 4:254-266)
+/*********************************************************************
*
- * Thanks to Colin Plumb for suggesting this.
+ * Initialization and readiness waiting.
*
- * The mixing operation is much less sensitive than the output hash,
- * where we use BLAKE2s. All that we want of mixing operation is that
- * it be a good non-cryptographic hash; i.e. it not produce collisions
- * when fed "random" data of the sort we expect to see. As long as
- * the pool state differs for different inputs, we have preserved the
- * input entropy and done a good job. The fact that an intelligent
- * attacker can construct inputs that will produce controlled
- * alterations to the pool's state is not important because we don't
- * consider such inputs to contribute any randomness. The only
- * property we need with respect to them is that the attacker can't
- * increase his/her knowledge of the pool's state. Since all
- * additions are reversible (knowing the final state and the input,
- * you can reconstruct the initial state), if an attacker has any
- * uncertainty about the initial state, he/she can only shuffle that
- * uncertainty about, but never cause any collisions (which would
- * decrease the uncertainty).
+ * Much of the RNG infrastructure is devoted to various dependencies
+ * being able to wait until the RNG has collected enough entropy and
+ * is ready for safe consumption.
*
- * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and
- * Videau in their paper, "The Linux Pseudorandom Number Generator
- * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their
- * paper, they point out that we are not using a true Twisted GFSR,
- * since Matsumoto & Kurita used a trinomial feedback polynomial (that
- * is, with only three taps, instead of the six that we are using).
- * As a result, the resulting polynomial is neither primitive nor
- * irreducible, and hence does not have a maximal period over
- * GF(2**32). They suggest a slight change to the generator
- * polynomial which improves the resulting TGFSR polynomial to be
- * irreducible, which we have made here.
- */
-enum poolinfo {
- POOL_WORDS = 128,
- POOL_WORDMASK = POOL_WORDS - 1,
- POOL_BYTES = POOL_WORDS * sizeof(u32),
- POOL_BITS = POOL_BYTES * 8,
- POOL_BITSHIFT = ilog2(POOL_BITS),
-
- /* To allow fractional bits to be tracked, the entropy_count field is
- * denominated in units of 1/8th bits. */
- POOL_ENTROPY_SHIFT = 3,
-#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
- POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
-
- /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
- POOL_TAP1 = 104,
- POOL_TAP2 = 76,
- POOL_TAP3 = 51,
- POOL_TAP4 = 25,
- POOL_TAP5 = 1,
-
- EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2
-};
-
-/*
- * Static global variables
- */
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static struct fasync_struct *fasync;
-
-static DEFINE_SPINLOCK(random_ready_list_lock);
-static LIST_HEAD(random_ready_list);
-
-struct crng_state {
- u32 state[16];
- unsigned long init_time;
- spinlock_t lock;
-};
-
-static struct crng_state primary_crng = {
- .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
- .state[0] = CHACHA_CONSTANT_EXPA,
- .state[1] = CHACHA_CONSTANT_ND_3,
- .state[2] = CHACHA_CONSTANT_2_BY,
- .state[3] = CHACHA_CONSTANT_TE_K,
-};
+ *********************************************************************/
/*
* crng_init = 0 --> Uninitialized
* 1 --> Initialized
* 2 --> Initialized from input_pool
*
- * crng_init is protected by primary_crng->lock, and only increases
+ * crng_init is protected by base_crng->lock, and only increases
* its value (from 0->1->2).
*/
static int crng_init = 0;
-static bool crng_need_final_init = false;
#define crng_ready() (likely(crng_init > 1))
-static int crng_init_cnt = 0;
-static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]);
-static void _crng_backtrack_protect(struct crng_state *crng,
- u8 tmp[CHACHA_BLOCK_SIZE], int used);
-static void process_random_ready_list(void);
-static void _get_random_bytes(void *buf, int nbytes);
+/* Various types of waiters for crng_init->2 transition. */
+static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+static struct fasync_struct *fasync;
+static DEFINE_SPINLOCK(random_ready_chain_lock);
+static RAW_NOTIFIER_HEAD(random_ready_chain);
+/* Control how we warn userspace. */
static struct ratelimit_state unseeded_warning =
RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
static struct ratelimit_state urandom_warning =
RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
-
static int ratelimit_disable __read_mostly;
-
module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
-/**********************************************************************
- *
- * OS independent entropy store. Here are the functions which handle
- * storing entropy in an entropy pool.
+/*
+ * Returns whether or not the input pool has been seeded and thus guaranteed
+ * to supply cryptographically secure random numbers. This applies to: the
+ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+ * ,u64,int,long} family of functions.
*
- **********************************************************************/
-
-static u32 input_pool_data[POOL_WORDS] __latent_entropy;
-
-static struct {
- spinlock_t lock;
- u16 add_ptr;
- u16 input_rotate;
- int entropy_count;
-} input_pool = {
- .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
-};
+ * Returns: true if the input pool has been seeded.
+ * false if the input pool has not been seeded.
+ */
+bool rng_is_initialized(void)
+{
+ return crng_ready();
+}
+EXPORT_SYMBOL(rng_is_initialized);
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min);
-static ssize_t _extract_entropy(void *buf, size_t nbytes);
+/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
+static void try_to_generate_entropy(void);
-static void crng_reseed(struct crng_state *crng, bool use_input_pool);
+/*
+ * Wait for the input pool to be seeded and thus guaranteed to supply
+ * cryptographically secure random numbers. This applies to: the /dev/urandom
+ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+ * family of functions. Using any of these functions without first calling
+ * this function forfeits the guarantee of security.
+ *
+ * Returns: 0 if the input pool has been seeded.
+ * -ERESTARTSYS if the function was interrupted by a signal.
+ */
+int wait_for_random_bytes(void)
+{
+ while (!crng_ready()) {
+ int ret;
-static const u32 twist_table[8] = {
- 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
- 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+ try_to_generate_entropy();
+ ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
+ if (ret)
+ return ret > 0 ? 0 : ret;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(wait_for_random_bytes);
/*
- * This function adds bytes into the entropy "pool". It does not
- * update the entropy estimate. The caller should call
- * credit_entropy_bits if this is appropriate.
+ * Add a callback function that will be invoked when the input
+ * pool is initialised.
*
- * The pool is stirred with a primitive polynomial of the appropriate
- * degree, and then twisted. We twist by three bits at a time because
- * it's cheap to do so and helps slightly in the expected case where
- * the entropy is concentrated in the low-order bits.
+ * returns: 0 if callback is successfully added
+ * -EALREADY if pool is already initialised (callback not called)
*/
-static void _mix_pool_bytes(const void *in, int nbytes)
+int register_random_ready_notifier(struct notifier_block *nb)
{
- unsigned long i;
- int input_rotate;
- const u8 *bytes = in;
- u32 w;
-
- input_rotate = input_pool.input_rotate;
- i = input_pool.add_ptr;
-
- /* mix one byte at a time to simplify size handling and churn faster */
- while (nbytes--) {
- w = rol32(*bytes++, input_rotate);
- i = (i - 1) & POOL_WORDMASK;
-
- /* XOR in the various taps */
- w ^= input_pool_data[i];
- w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK];
- w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK];
- w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK];
- w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK];
- w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK];
-
- /* Mix the result back in with a twist */
- input_pool_data[i] = (w >> 3) ^ twist_table[w & 7];
+ unsigned long flags;
+ int ret = -EALREADY;
- /*
- * Normally, we add 7 bits of rotation to the pool.
- * At the beginning of the pool, add an extra 7 bits
- * rotation, so that successive passes spread the
- * input bits across the pool evenly.
- */
- input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
- }
+ if (crng_ready())
+ return ret;
- input_pool.input_rotate = input_rotate;
- input_pool.add_ptr = i;
+ spin_lock_irqsave(&random_ready_chain_lock, flags);
+ if (!crng_ready())
+ ret = raw_notifier_chain_register(&random_ready_chain, nb);
+ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+ return ret;
}
-static void __mix_pool_bytes(const void *in, int nbytes)
+/*
+ * Delete a previously registered readiness callback function.
+ */
+int unregister_random_ready_notifier(struct notifier_block *nb)
{
- trace_mix_pool_bytes_nolock(nbytes, _RET_IP_);
- _mix_pool_bytes(in, nbytes);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&random_ready_chain_lock, flags);
+ ret = raw_notifier_chain_unregister(&random_ready_chain, nb);
+ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+ return ret;
}
-static void mix_pool_bytes(const void *in, int nbytes)
+static void process_random_ready_list(void)
{
unsigned long flags;
- trace_mix_pool_bytes(nbytes, _RET_IP_);
- spin_lock_irqsave(&input_pool.lock, flags);
- _mix_pool_bytes(in, nbytes);
- spin_unlock_irqrestore(&input_pool.lock, flags);
+ spin_lock_irqsave(&random_ready_chain_lock, flags);
+ raw_notifier_call_chain(&random_ready_chain, 0, NULL);
+ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
}
-struct fast_pool {
- u32 pool[4];
- unsigned long last;
- u16 reg_idx;
- u8 count;
-};
+#define warn_unseeded_randomness(previous) \
+ _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-/*
- * This is a fast mixing routine used by the interrupt randomness
- * collector. It's hardcoded for an 128 bit pool and assumes that any
- * locks that might be needed are taken by the caller.
- */
-static void fast_mix(struct fast_pool *f)
+static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
{
- u32 a = f->pool[0], b = f->pool[1];
- u32 c = f->pool[2], d = f->pool[3];
+#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+ const bool print_once = false;
+#else
+ static bool print_once __read_mostly;
+#endif
- a += b; c += d;
- b = rol32(b, 6); d = rol32(d, 27);
- d ^= a; b ^= c;
+ if (print_once || crng_ready() ||
+ (previous && (caller == READ_ONCE(*previous))))
+ return;
+ WRITE_ONCE(*previous, caller);
+#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+ print_once = true;
+#endif
+ if (__ratelimit(&unseeded_warning))
+ printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
+ func_name, caller, crng_init);
+}
- a += b; c += d;
- b = rol32(b, 16); d = rol32(d, 14);
- d ^= a; b ^= c;
- a += b; c += d;
- b = rol32(b, 6); d = rol32(d, 27);
- d ^= a; b ^= c;
+/*********************************************************************
+ *
+ * Fast key erasure RNG, the "crng".
+ *
+ * These functions expand entropy from the entropy extractor into
+ * long streams for external consumption using the "fast key erasure"
+ * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
+ *
+ * There are a few exported interfaces for use by other drivers:
+ *
+ * void get_random_bytes(void *buf, size_t nbytes)
+ * u32 get_random_u32()
+ * u64 get_random_u64()
+ * unsigned int get_random_int()
+ * unsigned long get_random_long()
+ *
+ * These interfaces will return the requested number of random bytes
+ * into the given buffer or as a return value. This is equivalent to
+ * a read from /dev/urandom. The integer family of functions may be
+ * higher performance for one-off random integers, because they do a
+ * bit of buffering.
+ *
+ *********************************************************************/
- a += b; c += d;
- b = rol32(b, 16); d = rol32(d, 14);
- d ^= a; b ^= c;
+enum {
+ CRNG_RESEED_INTERVAL = 300 * HZ,
+ CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE
+};
- f->pool[0] = a; f->pool[1] = b;
- f->pool[2] = c; f->pool[3] = d;
- f->count++;
-}
+static struct {
+ u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
+ unsigned long birth;
+ unsigned long generation;
+ spinlock_t lock;
+} base_crng = {
+ .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
+};
-static void process_random_ready_list(void)
+struct crng {
+ u8 key[CHACHA_KEY_SIZE];
+ unsigned long generation;
+ local_lock_t lock;
+};
+
+static DEFINE_PER_CPU(struct crng, crngs) = {
+ .generation = ULONG_MAX,
+ .lock = INIT_LOCAL_LOCK(crngs.lock),
+};
+
+/* Used by crng_reseed() to extract a new seed from the input pool. */
+static bool drain_entropy(void *buf, size_t nbytes, bool force);
+
+/*
+ * This extracts a new crng key from the input pool, but only if there is a
+ * sufficient amount of entropy available or force is true, in order to
+ * mitigate bruteforcing of newly added bits.
+ */
+static void crng_reseed(bool force)
{
unsigned long flags;
- struct random_ready_callback *rdy, *tmp;
+ unsigned long next_gen;
+ u8 key[CHACHA_KEY_SIZE];
+ bool finalize_init = false;
- spin_lock_irqsave(&random_ready_list_lock, flags);
- list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
- struct module *owner = rdy->owner;
+ /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */
+ if (!drain_entropy(key, sizeof(key), force))
+ return;
- list_del_init(&rdy->list);
- rdy->func(rdy);
- module_put(owner);
+ /*
+ * We copy the new key into the base_crng, overwriting the old one,
+ * and update the generation counter. We avoid hitting ULONG_MAX,
+ * because the per-cpu crngs are initialized to ULONG_MAX, so this
+ * forces new CPUs that come online to always initialize.
+ */
+ spin_lock_irqsave(&base_crng.lock, flags);
+ memcpy(base_crng.key, key, sizeof(base_crng.key));
+ next_gen = base_crng.generation + 1;
+ if (next_gen == ULONG_MAX)
+ ++next_gen;
+ WRITE_ONCE(base_crng.generation, next_gen);
+ WRITE_ONCE(base_crng.birth, jiffies);
+ if (!crng_ready()) {
+ crng_init = 2;
+ finalize_init = true;
+ }
+ spin_unlock_irqrestore(&base_crng.lock, flags);
+ memzero_explicit(key, sizeof(key));
+ if (finalize_init) {
+ process_random_ready_list();
+ wake_up_interruptible(&crng_init_wait);
+ kill_fasync(&fasync, SIGIO, POLL_IN);
+ pr_notice("crng init done\n");
+ if (unseeded_warning.missed) {
+ pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
+ unseeded_warning.missed);
+ unseeded_warning.missed = 0;
+ }
+ if (urandom_warning.missed) {
+ pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
+ urandom_warning.missed);
+ urandom_warning.missed = 0;
+ }
}
- spin_unlock_irqrestore(&random_ready_list_lock, flags);
}
/*
- * Credit (or debit) the entropy store with n bits of entropy.
- * Use credit_entropy_bits_safe() if the value comes from userspace
- * or otherwise should be checked for extreme values.
+ * This generates a ChaCha block using the provided key, and then
+ * immediately overwites that key with half the block. It returns
+ * the resultant ChaCha state to the user, along with the second
+ * half of the block containing 32 bytes of random data that may
+ * be used; random_data_len may not be greater than 32.
*/
-static void credit_entropy_bits(int nbits)
+static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
+ u32 chacha_state[CHACHA_STATE_WORDS],
+ u8 *random_data, size_t random_data_len)
{
- int entropy_count, entropy_bits, orig;
- int nfrac = nbits << POOL_ENTROPY_SHIFT;
-
- /* Ensure that the multiplication can avoid being 64 bits wide. */
- BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31);
+ u8 first_block[CHACHA_BLOCK_SIZE];
- if (!nbits)
- return;
+ BUG_ON(random_data_len > 32);
-retry:
- entropy_count = orig = READ_ONCE(input_pool.entropy_count);
- if (nfrac < 0) {
- /* Debit */
- entropy_count += nfrac;
- } else {
- /*
- * Credit: we have to account for the possibility of
- * overwriting already present entropy. Even in the
- * ideal case of pure Shannon entropy, new contributions
- * approach the full value asymptotically:
- *
- * entropy <- entropy + (pool_size - entropy) *
- * (1 - exp(-add_entropy/pool_size))
- *
- * For add_entropy <= pool_size/2 then
- * (1 - exp(-add_entropy/pool_size)) >=
- * (add_entropy/pool_size)*0.7869...
- * so we can approximate the exponential with
- * 3/4*add_entropy/pool_size and still be on the
- * safe side by adding at most pool_size/2 at a time.
- *
- * The use of pool_size-2 in the while statement is to
- * prevent rounding artifacts from making the loop
- * arbitrarily long; this limits the loop to log2(pool_size)*2
- * turns no matter how large nbits is.
- */
- int pnfrac = nfrac;
- const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2;
- /* The +2 corresponds to the /4 in the denominator */
-
- do {
- unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2);
- unsigned int add =
- ((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s;
-
- entropy_count += add;
- pnfrac -= anfrac;
- } while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac));
- }
+ chacha_init_consts(chacha_state);
+ memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
+ memset(&chacha_state[12], 0, sizeof(u32) * 4);
+ chacha20_block(chacha_state, first_block);
- if (WARN_ON(entropy_count < 0)) {
- pr_warn("negative entropy/overflow: count %d\n", entropy_count);
- entropy_count = 0;
- } else if (entropy_count > POOL_FRACBITS)
- entropy_count = POOL_FRACBITS;
- if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
- goto retry;
-
- trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_);
+ memcpy(key, first_block, CHACHA_KEY_SIZE);
+ memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
+ memzero_explicit(first_block, sizeof(first_block));
+}
- entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT;
- if (crng_init < 2 && entropy_bits >= 128)
- crng_reseed(&primary_crng, true);
+/*
+ * Return whether the crng seed is considered to be sufficiently
+ * old that a reseeding might be attempted. This happens if the last
+ * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at
+ * an interval proportional to the uptime.
+ */
+static bool crng_has_old_seed(void)
+{
+ static bool early_boot = true;
+ unsigned long interval = CRNG_RESEED_INTERVAL;
+
+ if (unlikely(READ_ONCE(early_boot))) {
+ time64_t uptime = ktime_get_seconds();
+ if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
+ WRITE_ONCE(early_boot, false);
+ else
+ interval = max_t(unsigned int, 5 * HZ,
+ (unsigned int)uptime / 2 * HZ);
+ }
+ return time_after(jiffies, READ_ONCE(base_crng.birth) + interval);
}
-static int credit_entropy_bits_safe(int nbits)
+/*
+ * This function returns a ChaCha state that you may use for generating
+ * random data. It also returns up to 32 bytes on its own of random data
+ * that may be used; random_data_len may not be greater than 32.
+ */
+static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
+ u8 *random_data, size_t random_data_len)
{
- if (nbits < 0)
- return -EINVAL;
+ unsigned long flags;
+ struct crng *crng;
- /* Cap the value to avoid overflows */
- nbits = min(nbits, POOL_BITS);
+ BUG_ON(random_data_len > 32);
- credit_entropy_bits(nbits);
- return 0;
-}
+ /*
+ * For the fast path, we check whether we're ready, unlocked first, and
+ * then re-check once locked later. In the case where we're really not
+ * ready, we do fast key erasure with the base_crng directly, because
+ * this is what crng_pre_init_inject() mutates during early init.
+ */
+ if (!crng_ready()) {
+ bool ready;
+
+ spin_lock_irqsave(&base_crng.lock, flags);
+ ready = crng_ready();
+ if (!ready)
+ crng_fast_key_erasure(base_crng.key, chacha_state,
+ random_data, random_data_len);
+ spin_unlock_irqrestore(&base_crng.lock, flags);
+ if (!ready)
+ return;
+ }
-/*********************************************************************
- *
- * CRNG using CHACHA20
- *
- *********************************************************************/
+ /*
+ * If the base_crng is old enough, we try to reseed, which in turn
+ * bumps the generation counter that we check below.
+ */
+ if (unlikely(crng_has_old_seed()))
+ crng_reseed(false);
-#define CRNG_RESEED_INTERVAL (300 * HZ)
+ local_lock_irqsave(&crngs.lock, flags);
+ crng = raw_cpu_ptr(&crngs);
-static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+ /*
+ * If our per-cpu crng is older than the base_crng, then it means
+ * somebody reseeded the base_crng. In that case, we do fast key
+ * erasure on the base_crng, and use its output as the new key
+ * for our per-cpu crng. This brings us up to date with base_crng.
+ */
+ if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
+ spin_lock(&base_crng.lock);
+ crng_fast_key_erasure(base_crng.key, chacha_state,
+ crng->key, sizeof(crng->key));
+ crng->generation = base_crng.generation;
+ spin_unlock(&base_crng.lock);
+ }
+
+ /*
+ * Finally, when we've made it this far, our per-cpu crng has an up
+ * to date key, and we can do fast key erasure with it to produce
+ * some random data and a ChaCha state for the caller. All other
+ * branches of this function are "unlikely", so most of the time we
+ * should wind up here immediately.
+ */
+ crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
+ local_unlock_irqrestore(&crngs.lock, flags);
+}
/*
- * Hack to deal with crazy userspace progams when they are all trying
- * to access /dev/urandom in parallel. The programs are almost
- * certainly doing something terribly wrong, but we'll work around
- * their brain damage.
+ * This function is for crng_init == 0 only. It loads entropy directly
+ * into the crng's key, without going through the input pool. It is,
+ * generally speaking, not very safe, but we use this only at early
+ * boot time when it's better to have something there rather than
+ * nothing.
+ *
+ * If account is set, then the crng_init_cnt counter is incremented.
+ * This shouldn't be set by functions like add_device_randomness(),
+ * where we can't trust the buffer passed to it is guaranteed to be
+ * unpredictable (so it might not have any entropy at all).
+ *
+ * Returns the number of bytes processed from input, which is bounded
+ * by CRNG_INIT_CNT_THRESH if account is true.
*/
-static struct crng_state **crng_node_pool __read_mostly;
+static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
+{
+ static int crng_init_cnt = 0;
+ struct blake2s_state hash;
+ unsigned long flags;
-static void invalidate_batched_entropy(void);
-static void numa_crng_init(void);
+ blake2s_init(&hash, sizeof(base_crng.key));
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static int __init parse_trust_cpu(char *arg)
-{
- return kstrtobool(arg, &trust_cpu);
-}
-early_param("random.trust_cpu", parse_trust_cpu);
+ spin_lock_irqsave(&base_crng.lock, flags);
+ if (crng_init != 0) {
+ spin_unlock_irqrestore(&base_crng.lock, flags);
+ return 0;
+ }
-static bool crng_init_try_arch(struct crng_state *crng)
-{
- int i;
- bool arch_init = true;
- unsigned long rv;
+ if (account)
+ len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
- for (i = 4; i < 16; i++) {
- if (!arch_get_random_seed_long(&rv) &&
- !arch_get_random_long(&rv)) {
- rv = random_get_entropy();
- arch_init = false;
+ blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+ blake2s_update(&hash, input, len);
+ blake2s_final(&hash, base_crng.key);
+
+ if (account) {
+ crng_init_cnt += len;
+ if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+ ++base_crng.generation;
+ crng_init = 1;
}
- crng->state[i] ^= rv;
}
- return arch_init;
+ spin_unlock_irqrestore(&base_crng.lock, flags);
+
+ if (crng_init == 1)
+ pr_notice("fast init done\n");
+
+ return len;
}
-static bool __init crng_init_try_arch_early(struct crng_state *crng)
+static void _get_random_bytes(void *buf, size_t nbytes)
{
- int i;
- bool arch_init = true;
- unsigned long rv;
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u8 tmp[CHACHA_BLOCK_SIZE];
+ size_t len;
- for (i = 4; i < 16; i++) {
- if (!arch_get_random_seed_long_early(&rv) &&
- !arch_get_random_long_early(&rv)) {
- rv = random_get_entropy();
- arch_init = false;
+ if (!nbytes)
+ return;
+
+ len = min_t(size_t, 32, nbytes);
+ crng_make_state(chacha_state, buf, len);
+ nbytes -= len;
+ buf += len;
+
+ while (nbytes) {
+ if (nbytes < CHACHA_BLOCK_SIZE) {
+ chacha20_block(chacha_state, tmp);
+ memcpy(buf, tmp, nbytes);
+ memzero_explicit(tmp, sizeof(tmp));
+ break;
}
- crng->state[i] ^= rv;
+
+ chacha20_block(chacha_state, buf);
+ if (unlikely(chacha_state[12] == 0))
+ ++chacha_state[13];
+ nbytes -= CHACHA_BLOCK_SIZE;
+ buf += CHACHA_BLOCK_SIZE;
}
- return arch_init;
+ memzero_explicit(chacha_state, sizeof(chacha_state));
}
-static void crng_initialize_secondary(struct crng_state *crng)
+/*
+ * This function is the exported kernel interface. It returns some
+ * number of good random numbers, suitable for key generation, seeding
+ * TCP sequence numbers, etc. It does not rely on the hardware random
+ * number generator. For random bytes direct from the hardware RNG
+ * (when available), use get_random_bytes_arch(). In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
+ */
+void get_random_bytes(void *buf, size_t nbytes)
{
- chacha_init_consts(crng->state);
- _get_random_bytes(&crng->state[4], sizeof(u32) * 12);
- crng_init_try_arch(crng);
- crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
-}
+ static void *previous;
-static void __init crng_initialize_primary(struct crng_state *crng)
-{
- _extract_entropy(&crng->state[4], sizeof(u32) * 12);
- if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) {
- invalidate_batched_entropy();
- numa_crng_init();
- crng_init = 2;
- pr_notice("crng init done (trusting CPU's manufacturer)\n");
- }
- crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+ warn_unseeded_randomness(&previous);
+ _get_random_bytes(buf, nbytes);
}
+EXPORT_SYMBOL(get_random_bytes);
-static void crng_finalize_init(struct crng_state *crng)
+static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
{
- if (crng != &primary_crng || crng_init >= 2)
- return;
- if (!system_wq) {
- /* We can't call numa_crng_init until we have workqueues,
- * so mark this for processing later. */
- crng_need_final_init = true;
- return;
- }
+ bool large_request = nbytes > 256;
+ ssize_t ret = 0;
+ size_t len;
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u8 output[CHACHA_BLOCK_SIZE];
- invalidate_batched_entropy();
- numa_crng_init();
- crng_init = 2;
- process_random_ready_list();
- wake_up_interruptible(&crng_init_wait);
- kill_fasync(&fasync, SIGIO, POLL_IN);
- pr_notice("crng init done\n");
- if (unseeded_warning.missed) {
- pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
- unseeded_warning.missed);
- unseeded_warning.missed = 0;
- }
- if (urandom_warning.missed) {
- pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
- urandom_warning.missed);
- urandom_warning.missed = 0;
- }
-}
+ if (!nbytes)
+ return 0;
-static void do_numa_crng_init(struct work_struct *work)
-{
- int i;
- struct crng_state *crng;
- struct crng_state **pool;
-
- pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL);
- for_each_online_node(i) {
- crng = kmalloc_node(sizeof(struct crng_state),
- GFP_KERNEL | __GFP_NOFAIL, i);
- spin_lock_init(&crng->lock);
- crng_initialize_secondary(crng);
- pool[i] = crng;
- }
- /* pairs with READ_ONCE() in select_crng() */
- if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) {
- for_each_node(i)
- kfree(pool[i]);
- kfree(pool);
- }
-}
+ len = min_t(size_t, 32, nbytes);
+ crng_make_state(chacha_state, output, len);
-static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init);
+ if (copy_to_user(buf, output, len))
+ return -EFAULT;
+ nbytes -= len;
+ buf += len;
+ ret += len;
-static void numa_crng_init(void)
-{
- if (IS_ENABLED(CONFIG_NUMA))
- schedule_work(&numa_crng_init_work);
-}
+ while (nbytes) {
+ if (large_request && need_resched()) {
+ if (signal_pending(current))
+ break;
+ schedule();
+ }
-static struct crng_state *select_crng(void)
-{
- if (IS_ENABLED(CONFIG_NUMA)) {
- struct crng_state **pool;
- int nid = numa_node_id();
-
- /* pairs with cmpxchg_release() in do_numa_crng_init() */
- pool = READ_ONCE(crng_node_pool);
- if (pool && pool[nid])
- return pool[nid];
+ chacha20_block(chacha_state, output);
+ if (unlikely(chacha_state[12] == 0))
+ ++chacha_state[13];
+
+ len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
+ if (copy_to_user(buf, output, len)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ nbytes -= len;
+ buf += len;
+ ret += len;
}
- return &primary_crng;
+ memzero_explicit(chacha_state, sizeof(chacha_state));
+ memzero_explicit(output, sizeof(output));
+ return ret;
}
/*
- * crng_fast_load() can be called by code in the interrupt service
- * path. So we can't afford to dilly-dally. Returns the number of
- * bytes processed from cp.
+ * Batched entropy returns random integers. The quality of the random
+ * number is good as /dev/urandom. In order to ensure that the randomness
+ * provided by this function is okay, the function wait_for_random_bytes()
+ * should be called and return 0 at least once at any point prior.
*/
-static size_t crng_fast_load(const u8 *cp, size_t len)
+struct batched_entropy {
+ union {
+ /*
+ * We make this 1.5x a ChaCha block, so that we get the
+ * remaining 32 bytes from fast key erasure, plus one full
+ * block from the detached ChaCha state. We can increase
+ * the size of this later if needed so long as we keep the
+ * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
+ */
+ u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
+ u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
+ };
+ local_lock_t lock;
+ unsigned long generation;
+ unsigned int position;
+};
+
+
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
+ .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
+ .position = UINT_MAX
+};
+
+u64 get_random_u64(void)
{
+ u64 ret;
unsigned long flags;
- u8 *p;
- size_t ret = 0;
+ struct batched_entropy *batch;
+ static void *previous;
+ unsigned long next_gen;
- if (!spin_trylock_irqsave(&primary_crng.lock, flags))
- return 0;
- if (crng_init != 0) {
- spin_unlock_irqrestore(&primary_crng.lock, flags);
- return 0;
- }
- p = (u8 *)&primary_crng.state[4];
- while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
- p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
- cp++; crng_init_cnt++; len--; ret++;
- }
- spin_unlock_irqrestore(&primary_crng.lock, flags);
- if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
- invalidate_batched_entropy();
- crng_init = 1;
- pr_notice("fast init done\n");
+ warn_unseeded_randomness(&previous);
+
+ local_lock_irqsave(&batched_entropy_u64.lock, flags);
+ batch = raw_cpu_ptr(&batched_entropy_u64);
+
+ next_gen = READ_ONCE(base_crng.generation);
+ if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
+ next_gen != batch->generation) {
+ _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
+ batch->position = 0;
+ batch->generation = next_gen;
}
+
+ ret = batch->entropy_u64[batch->position];
+ batch->entropy_u64[batch->position] = 0;
+ ++batch->position;
+ local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
return ret;
}
+EXPORT_SYMBOL(get_random_u64);
-/*
- * crng_slow_load() is called by add_device_randomness, which has two
- * attributes. (1) We can't trust the buffer passed to it is
- * guaranteed to be unpredictable (so it might not have any entropy at
- * all), and (2) it doesn't have the performance constraints of
- * crng_fast_load().
- *
- * So we do something more comprehensive which is guaranteed to touch
- * all of the primary_crng's state, and which uses a LFSR with a
- * period of 255 as part of the mixing algorithm. Finally, we do
- * *not* advance crng_init_cnt since buffer we may get may be something
- * like a fixed DMI table (for example), which might very well be
- * unique to the machine, but is otherwise unvarying.
- */
-static int crng_slow_load(const u8 *cp, size_t len)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
+ .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
+ .position = UINT_MAX
+};
+
+u32 get_random_u32(void)
{
+ u32 ret;
unsigned long flags;
- static u8 lfsr = 1;
- u8 tmp;
- unsigned int i, max = CHACHA_KEY_SIZE;
- const u8 *src_buf = cp;
- u8 *dest_buf = (u8 *)&primary_crng.state[4];
+ struct batched_entropy *batch;
+ static void *previous;
+ unsigned long next_gen;
- if (!spin_trylock_irqsave(&primary_crng.lock, flags))
- return 0;
- if (crng_init != 0) {
- spin_unlock_irqrestore(&primary_crng.lock, flags);
- return 0;
- }
- if (len > max)
- max = len;
-
- for (i = 0; i < max; i++) {
- tmp = lfsr;
- lfsr >>= 1;
- if (tmp & 1)
- lfsr ^= 0xE1;
- tmp = dest_buf[i % CHACHA_KEY_SIZE];
- dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
- lfsr += (tmp << 3) | (tmp >> 5);
+ warn_unseeded_randomness(&previous);
+
+ local_lock_irqsave(&batched_entropy_u32.lock, flags);
+ batch = raw_cpu_ptr(&batched_entropy_u32);
+
+ next_gen = READ_ONCE(base_crng.generation);
+ if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
+ next_gen != batch->generation) {
+ _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
+ batch->position = 0;
+ batch->generation = next_gen;
}
- spin_unlock_irqrestore(&primary_crng.lock, flags);
- return 1;
+
+ ret = batch->entropy_u32[batch->position];
+ batch->entropy_u32[batch->position] = 0;
+ ++batch->position;
+ local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
+ return ret;
}
+EXPORT_SYMBOL(get_random_u32);
-static void crng_reseed(struct crng_state *crng, bool use_input_pool)
+#ifdef CONFIG_SMP
+/*
+ * This function is called when the CPU is coming up, with entry
+ * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
+ */
+int random_prepare_cpu(unsigned int cpu)
{
- unsigned long flags;
- int i, num;
- union {
- u8 block[CHACHA_BLOCK_SIZE];
- u32 key[8];
- } buf;
+ /*
+ * When the cpu comes back online, immediately invalidate both
+ * the per-cpu crng and all batches, so that we serve fresh
+ * randomness.
+ */
+ per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
+ per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
+ per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
+ return 0;
+}
+#endif
- if (use_input_pool) {
- num = extract_entropy(&buf, 32, 16);
- if (num == 0)
- return;
- } else {
- _extract_crng(&primary_crng, buf.block);
- _crng_backtrack_protect(&primary_crng, buf.block,
- CHACHA_KEY_SIZE);
- }
- spin_lock_irqsave(&crng->lock, flags);
- for (i = 0; i < 8; i++) {
- unsigned long rv;
- if (!arch_get_random_seed_long(&rv) &&
- !arch_get_random_long(&rv))
- rv = random_get_entropy();
- crng->state[i + 4] ^= buf.key[i] ^ rv;
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start: The smallest acceptable address the caller will take.
+ * @range: The size of the area, starting at @start, within which the
+ * random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
+ *
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned. We now align it regardless.
+ *
+ * Return: A page aligned address within [start, start + range). On error,
+ * @start is returned.
+ */
+unsigned long randomize_page(unsigned long start, unsigned long range)
+{
+ if (!PAGE_ALIGNED(start)) {
+ range -= PAGE_ALIGN(start) - start;
+ start = PAGE_ALIGN(start);
}
- memzero_explicit(&buf, sizeof(buf));
- WRITE_ONCE(crng->init_time, jiffies);
- spin_unlock_irqrestore(&crng->lock, flags);
- crng_finalize_init(crng);
+
+ if (start > ULONG_MAX - range)
+ range = ULONG_MAX - start;
+
+ range >>= PAGE_SHIFT;
+
+ if (range == 0)
+ return start;
+
+ return start + (get_random_long() % range << PAGE_SHIFT);
}
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
+/*
+ * This function will use the architecture-specific hardware random
+ * number generator if it is available. It is not recommended for
+ * use. Use get_random_bytes() instead. It returns the number of
+ * bytes filled in.
+ */
+size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
{
- unsigned long flags, init_time;
+ size_t left = nbytes;
+ u8 *p = buf;
- if (crng_ready()) {
- init_time = READ_ONCE(crng->init_time);
- if (time_after(READ_ONCE(crng_global_init_time), init_time) ||
- time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
- crng_reseed(crng, crng == &primary_crng);
+ while (left) {
+ unsigned long v;
+ size_t chunk = min_t(size_t, left, sizeof(unsigned long));
+
+ if (!arch_get_random_long(&v))
+ break;
+
+ memcpy(p, &v, chunk);
+ p += chunk;
+ left -= chunk;
}
- spin_lock_irqsave(&crng->lock, flags);
- chacha20_block(&crng->state[0], out);
- if (crng->state[12] == 0)
- crng->state[13]++;
- spin_unlock_irqrestore(&crng->lock, flags);
+
+ return nbytes - left;
}
+EXPORT_SYMBOL(get_random_bytes_arch);
-static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
+
+/**********************************************************************
+ *
+ * Entropy accumulation and extraction routines.
+ *
+ * Callers may add entropy via:
+ *
+ * static void mix_pool_bytes(const void *in, size_t nbytes)
+ *
+ * After which, if added entropy should be credited:
+ *
+ * static void credit_entropy_bits(size_t nbits)
+ *
+ * Finally, extract entropy via these two, with the latter one
+ * setting the entropy count to zero and extracting only if there
+ * is POOL_MIN_BITS entropy credited prior or force is true:
+ *
+ * static void extract_entropy(void *buf, size_t nbytes)
+ * static bool drain_entropy(void *buf, size_t nbytes, bool force)
+ *
+ **********************************************************************/
+
+enum {
+ POOL_BITS = BLAKE2S_HASH_SIZE * 8,
+ POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+};
+
+/* For notifying userspace should write into /dev/random. */
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+
+static struct {
+ struct blake2s_state hash;
+ spinlock_t lock;
+ unsigned int entropy_count;
+} input_pool = {
+ .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
+ BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
+ BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
+ .hash.outlen = BLAKE2S_HASH_SIZE,
+ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+};
+
+static void _mix_pool_bytes(const void *in, size_t nbytes)
{
- _extract_crng(select_crng(), out);
+ blake2s_update(&input_pool.hash, in, nbytes);
}
/*
- * Use the leftover bytes from the CRNG block output (if there is
- * enough) to mutate the CRNG key to provide backtracking protection.
+ * This function adds bytes into the entropy "pool". It does not
+ * update the entropy estimate. The caller should call
+ * credit_entropy_bits if this is appropriate.
*/
-static void _crng_backtrack_protect(struct crng_state *crng,
- u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void mix_pool_bytes(const void *in, size_t nbytes)
{
unsigned long flags;
- u32 *s, *d;
- int i;
- used = round_up(used, sizeof(u32));
- if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
- extract_crng(tmp);
- used = 0;
- }
- spin_lock_irqsave(&crng->lock, flags);
- s = (u32 *)&tmp[used];
- d = &crng->state[4];
- for (i = 0; i < 8; i++)
- *d++ ^= *s++;
- spin_unlock_irqrestore(&crng->lock, flags);
+ spin_lock_irqsave(&input_pool.lock, flags);
+ _mix_pool_bytes(in, nbytes);
+ spin_unlock_irqrestore(&input_pool.lock, flags);
}
-static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void credit_entropy_bits(size_t nbits)
{
- _crng_backtrack_protect(select_crng(), tmp, used);
+ unsigned int entropy_count, orig, add;
+
+ if (!nbits)
+ return;
+
+ add = min_t(size_t, nbits, POOL_BITS);
+
+ do {
+ orig = READ_ONCE(input_pool.entropy_count);
+ entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
+ } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
+
+ if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
+ crng_reseed(false);
}
-static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
+/*
+ * This is an HKDF-like construction for using the hashed collected entropy
+ * as a PRF key, that's then expanded block-by-block.
+ */
+static void extract_entropy(void *buf, size_t nbytes)
{
- ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
- u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
- int large_request = (nbytes > 256);
+ unsigned long flags;
+ u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
+ struct {
+ unsigned long rdseed[32 / sizeof(long)];
+ size_t counter;
+ } block;
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
+ if (!arch_get_random_seed_long(&block.rdseed[i]) &&
+ !arch_get_random_long(&block.rdseed[i]))
+ block.rdseed[i] = random_get_entropy();
+ }
- while (nbytes) {
- if (large_request && need_resched()) {
- if (signal_pending(current)) {
- if (ret == 0)
- ret = -ERESTARTSYS;
- break;
- }
- schedule();
- }
+ spin_lock_irqsave(&input_pool.lock, flags);
- extract_crng(tmp);
- i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
- if (copy_to_user(buf, tmp, i)) {
- ret = -EFAULT;
- break;
- }
+ /* seed = HASHPRF(last_key, entropy_input) */
+ blake2s_final(&input_pool.hash, seed);
+
+ /* next_key = HASHPRF(seed, RDSEED || 0) */
+ block.counter = 0;
+ blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
+ blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
+
+ spin_unlock_irqrestore(&input_pool.lock, flags);
+ memzero_explicit(next_key, sizeof(next_key));
+ while (nbytes) {
+ i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
+ /* output = HASHPRF(seed, RDSEED || ++counter) */
+ ++block.counter;
+ blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
nbytes -= i;
buf += i;
- ret += i;
}
- crng_backtrack_protect(tmp, i);
- /* Wipe data just written to memory */
- memzero_explicit(tmp, sizeof(tmp));
+ memzero_explicit(seed, sizeof(seed));
+ memzero_explicit(&block, sizeof(block));
+}
- return ret;
+/*
+ * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force
+ * is true, and then we set the entropy count to zero (but don't actually touch
+ * any data). Only then can we extract a new key with extract_entropy().
+ */
+static bool drain_entropy(void *buf, size_t nbytes, bool force)
+{
+ unsigned int entropy_count;
+ do {
+ entropy_count = READ_ONCE(input_pool.entropy_count);
+ if (!force && entropy_count < POOL_MIN_BITS)
+ return false;
+ } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+ extract_entropy(buf, nbytes);
+ wake_up_interruptible(&random_write_wait);
+ kill_fasync(&fasync, SIGIO, POLL_OUT);
+ return true;
}
-/*********************************************************************
+
+/**********************************************************************
*
- * Entropy input management
+ * Entropy collection routines.
*
- *********************************************************************/
+ * The following exported functions are used for pushing entropy into
+ * the above entropy accumulation routines:
+ *
+ * void add_device_randomness(const void *buf, size_t size);
+ * void add_input_randomness(unsigned int type, unsigned int code,
+ * unsigned int value);
+ * void add_disk_randomness(struct gendisk *disk);
+ * void add_hwgenerator_randomness(const void *buffer, size_t count,
+ * size_t entropy);
+ * void add_bootloader_randomness(const void *buf, size_t size);
+ * void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+ * void add_interrupt_randomness(int irq);
+ *
+ * add_device_randomness() adds data to the input pool that
+ * is likely to differ between two devices (or possibly even per boot).
+ * This would be things like MAC addresses or serial numbers, or the
+ * read-out of the RTC. This does *not* credit any actual entropy to
+ * the pool, but it initializes the pool to different values for devices
+ * that might otherwise be identical and have very little entropy
+ * available to them (particularly common in the embedded world).
+ *
+ * add_input_randomness() uses the input layer interrupt timing, as well
+ * as the event type information from the hardware.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
+ *
+ * The above two routines try to estimate how many bits of entropy
+ * to credit. They do this by keeping track of the first and second
+ * order deltas of the event timings.
+ *
+ * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
+ * entropy as specified by the caller. If the entropy pool is full it will
+ * block until more entropy is needed.
+ *
+ * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
+ * add_device_randomness(), depending on whether or not the configuration
+ * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
+ *
+ * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
+ * representing the current instance of a VM to the pool, without crediting,
+ * and then force-reseeds the crng so that it takes effect immediately.
+ *
+ * add_interrupt_randomness() uses the interrupt timing as random
+ * inputs to the entropy pool. Using the cycle counters and the irq source
+ * as inputs, it feeds the input pool roughly once a second or after 64
+ * interrupts, crediting 1 bit of entropy for whichever comes first.
+ *
+ **********************************************************************/
-/* There is one of these per entropy source */
-struct timer_rand_state {
- cycles_t last_time;
- long last_delta, last_delta2;
-};
+static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static int __init parse_trust_cpu(char *arg)
+{
+ return kstrtobool(arg, &trust_cpu);
+}
+early_param("random.trust_cpu", parse_trust_cpu);
-#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
+/*
+ * The first collection of entropy occurs at system boot while interrupts
+ * are still turned off. Here we push in RDSEED, a timestamp, and utsname().
+ * Depending on the above configuration knob, RDSEED may be considered
+ * sufficient for initialization. Note that much earlier setup may already
+ * have pushed entropy into the input pool by the time we get here.
+ */
+int __init rand_initialize(void)
+{
+ size_t i;
+ ktime_t now = ktime_get_real();
+ bool arch_init = true;
+ unsigned long rv;
+
+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
+ if (!arch_get_random_seed_long_early(&rv) &&
+ !arch_get_random_long_early(&rv)) {
+ rv = random_get_entropy();
+ arch_init = false;
+ }
+ _mix_pool_bytes(&rv, sizeof(rv));
+ }
+ _mix_pool_bytes(&now, sizeof(now));
+ _mix_pool_bytes(utsname(), sizeof(*(utsname())));
+
+ extract_entropy(base_crng.key, sizeof(base_crng.key));
+ ++base_crng.generation;
+
+ if (arch_init && trust_cpu && !crng_ready()) {
+ crng_init = 2;
+ pr_notice("crng init done (trusting CPU's manufacturer)\n");
+ }
+
+ if (ratelimit_disable) {
+ urandom_warning.interval = 0;
+ unseeded_warning.interval = 0;
+ }
+ return 0;
+}
/*
* Add device- or boot-specific data to the input pool to help
@@ -1091,23 +1002,27 @@ struct timer_rand_state {
* the entropy pool having similar initial state across largely
* identical devices.
*/
-void add_device_randomness(const void *buf, unsigned int size)
+void add_device_randomness(const void *buf, size_t size)
{
- unsigned long time = random_get_entropy() ^ jiffies;
- unsigned long flags;
+ cycles_t cycles = random_get_entropy();
+ unsigned long flags, now = jiffies;
- if (!crng_ready() && size)
- crng_slow_load(buf, size);
+ if (crng_init == 0 && size)
+ crng_pre_init_inject(buf, size, false);
- trace_add_device_randomness(size, _RET_IP_);
spin_lock_irqsave(&input_pool.lock, flags);
+ _mix_pool_bytes(&cycles, sizeof(cycles));
+ _mix_pool_bytes(&now, sizeof(now));
_mix_pool_bytes(buf, size);
- _mix_pool_bytes(&time, sizeof(time));
spin_unlock_irqrestore(&input_pool.lock, flags);
}
EXPORT_SYMBOL(add_device_randomness);
-static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
+/* There is one of these per entropy source */
+struct timer_rand_state {
+ unsigned long last_time;
+ long last_delta, last_delta2;
+};
/*
* This function adds entropy to the entropy "pool" by using timing
@@ -1117,29 +1032,26 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
* The number "num" is also added to the pool - it should somehow describe
* the type of event which just happened. This is currently 0-255 for
* keyboard scan codes, and 256 upwards for interrupts.
- *
*/
-static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
{
- struct {
- long jiffies;
- unsigned int cycles;
- unsigned int num;
- } sample;
+ cycles_t cycles = random_get_entropy();
+ unsigned long flags, now = jiffies;
long delta, delta2, delta3;
- sample.jiffies = jiffies;
- sample.cycles = random_get_entropy();
- sample.num = num;
- mix_pool_bytes(&sample, sizeof(sample));
+ spin_lock_irqsave(&input_pool.lock, flags);
+ _mix_pool_bytes(&cycles, sizeof(cycles));
+ _mix_pool_bytes(&now, sizeof(now));
+ _mix_pool_bytes(&num, sizeof(num));
+ spin_unlock_irqrestore(&input_pool.lock, flags);
/*
* Calculate number of bits of randomness we probably added.
* We take into account the first, second and third-order deltas
* in order to make our estimate.
*/
- delta = sample.jiffies - READ_ONCE(state->last_time);
- WRITE_ONCE(state->last_time, sample.jiffies);
+ delta = now - READ_ONCE(state->last_time);
+ WRITE_ONCE(state->last_time, now);
delta2 = delta - READ_ONCE(state->last_delta);
WRITE_ONCE(state->last_delta, delta);
@@ -1163,318 +1075,303 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
* Round down by 1 bit on general principles,
* and limit entropy estimate to 12 bits.
*/
- credit_entropy_bits(min_t(int, fls(delta >> 1), 11));
+ credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11));
}
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value)
{
static unsigned char last_value;
+ static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
- /* ignore autorepeat and the like */
+ /* Ignore autorepeat and the like. */
if (value == last_value)
return;
last_value = value;
add_timer_randomness(&input_timer_state,
(type << 4) ^ code ^ (code >> 4) ^ value);
- trace_add_input_randomness(POOL_ENTROPY_BITS());
}
EXPORT_SYMBOL_GPL(add_input_randomness);
-static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
-
-#ifdef ADD_INTERRUPT_BENCH
-static unsigned long avg_cycles, avg_deviation;
-
-#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
-#define FIXED_1_2 (1 << (AVG_SHIFT - 1))
-
-static void add_interrupt_bench(cycles_t start)
+#ifdef CONFIG_BLOCK
+void add_disk_randomness(struct gendisk *disk)
{
- long delta = random_get_entropy() - start;
-
- /* Use a weighted moving average */
- delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
- avg_cycles += delta;
- /* And average deviation */
- delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
- avg_deviation += delta;
+ if (!disk || !disk->random)
+ return;
+ /* First major is 1, so we get >= 0x200 here. */
+ add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
}
-#else
-#define add_interrupt_bench(x)
-#endif
+EXPORT_SYMBOL_GPL(add_disk_randomness);
-static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+void rand_initialize_disk(struct gendisk *disk)
{
- u32 *ptr = (u32 *)regs;
- unsigned int idx;
+ struct timer_rand_state *state;
- if (regs == NULL)
- return 0;
- idx = READ_ONCE(f->reg_idx);
- if (idx >= sizeof(struct pt_regs) / sizeof(u32))
- idx = 0;
- ptr += idx++;
- WRITE_ONCE(f->reg_idx, idx);
- return *ptr;
+ /*
+ * If kzalloc returns null, we just won't use that entropy
+ * source.
+ */
+ state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+ if (state) {
+ state->last_time = INITIAL_JIFFIES;
+ disk->random = state;
+ }
}
+#endif
-void add_interrupt_randomness(int irq)
+/*
+ * Interface for in-kernel drivers of true hardware RNGs.
+ * Those devices may produce endless random bits and will be throttled
+ * when our pool is full.
+ */
+void add_hwgenerator_randomness(const void *buffer, size_t count,
+ size_t entropy)
{
- struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
- struct pt_regs *regs = get_irq_regs();
- unsigned long now = jiffies;
- cycles_t cycles = random_get_entropy();
- u32 c_high, j_high;
- u64 ip;
-
- if (cycles == 0)
- cycles = get_reg(fast_pool, regs);
- c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
- j_high = (sizeof(now) > 4) ? now >> 32 : 0;
- fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
- fast_pool->pool[1] ^= now ^ c_high;
- ip = regs ? instruction_pointer(regs) : _RET_IP_;
- fast_pool->pool[2] ^= ip;
- fast_pool->pool[3] ^=
- (sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs);
-
- fast_mix(fast_pool);
- add_interrupt_bench(cycles);
-
if (unlikely(crng_init == 0)) {
- if ((fast_pool->count >= 64) &&
- crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
- fast_pool->count = 0;
- fast_pool->last = now;
- }
- return;
+ size_t ret = crng_pre_init_inject(buffer, count, true);
+ mix_pool_bytes(buffer, ret);
+ count -= ret;
+ buffer += ret;
+ if (!count || crng_init == 0)
+ return;
}
- if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ))
- return;
+ /*
+ * Throttle writing if we're above the trickle threshold.
+ * We'll be woken up again once below POOL_MIN_BITS, when
+ * the calling thread is about to terminate, or once
+ * CRNG_RESEED_INTERVAL has elapsed.
+ */
+ wait_event_interruptible_timeout(random_write_wait,
+ !system_wq || kthread_should_stop() ||
+ input_pool.entropy_count < POOL_MIN_BITS,
+ CRNG_RESEED_INTERVAL);
+ mix_pool_bytes(buffer, count);
+ credit_entropy_bits(entropy);
+}
+EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
- if (!spin_trylock(&input_pool.lock))
- return;
+/*
+ * Handle random seed passed by bootloader.
+ * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
+ * it would be regarded as device data.
+ * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ */
+void add_bootloader_randomness(const void *buf, size_t size)
+{
+ if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
+ add_hwgenerator_randomness(buf, size, size * 8);
+ else
+ add_device_randomness(buf, size);
+}
+EXPORT_SYMBOL_GPL(add_bootloader_randomness);
- fast_pool->last = now;
- __mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
- spin_unlock(&input_pool.lock);
+#if IS_ENABLED(CONFIG_VMGENID)
+static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
- fast_pool->count = 0;
+/*
+ * Handle a new unique VM ID, which is unique, not secret, so we
+ * don't credit it, but we do immediately force a reseed after so
+ * that it's used by the crng posthaste.
+ */
+void add_vmfork_randomness(const void *unique_vm_id, size_t size)
+{
+ add_device_randomness(unique_vm_id, size);
+ if (crng_ready()) {
+ crng_reseed(true);
+ pr_notice("crng reseeded due to virtual machine fork\n");
+ }
+ blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
+}
+#if IS_MODULE(CONFIG_VMGENID)
+EXPORT_SYMBOL_GPL(add_vmfork_randomness);
+#endif
- /* award one bit for the contents of the fast pool */
- credit_entropy_bits(1);
+int register_random_vmfork_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&vmfork_chain, nb);
}
-EXPORT_SYMBOL_GPL(add_interrupt_randomness);
+EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);
-#ifdef CONFIG_BLOCK
-void add_disk_randomness(struct gendisk *disk)
+int unregister_random_vmfork_notifier(struct notifier_block *nb)
{
- if (!disk || !disk->random)
- return;
- /* first major is 1, so we get >= 0x200 here */
- add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
- trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS());
+ return blocking_notifier_chain_unregister(&vmfork_chain, nb);
}
-EXPORT_SYMBOL_GPL(add_disk_randomness);
+EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
#endif
-/*********************************************************************
- *
- * Entropy extraction routines
- *
- *********************************************************************/
+struct fast_pool {
+ struct work_struct mix;
+ unsigned long pool[4];
+ unsigned long last;
+ unsigned int count;
+ u16 reg_idx;
+};
+
+static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
+#ifdef CONFIG_64BIT
+ /* SipHash constants */
+ .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL,
+ 0x6c7967656e657261UL, 0x7465646279746573UL }
+#else
+ /* HalfSipHash constants */
+ .pool = { 0, 0, 0x6c796765U, 0x74656462U }
+#endif
+};
/*
- * This function decides how many bytes to actually take from the
- * given pool, and also debits the entropy count accordingly.
+ * This is [Half]SipHash-1-x, starting from an empty key. Because
+ * the key is fixed, it assumes that its inputs are non-malicious,
+ * and therefore this has no security on its own. s represents the
+ * 128 or 256-bit SipHash state, while v represents a 128-bit input.
*/
-static size_t account(size_t nbytes, int min)
+static void fast_mix(unsigned long s[4], const unsigned long *v)
{
- int entropy_count, orig;
- size_t ibytes, nfrac;
-
- BUG_ON(input_pool.entropy_count > POOL_FRACBITS);
-
- /* Can we pull enough? */
-retry:
- entropy_count = orig = READ_ONCE(input_pool.entropy_count);
- if (WARN_ON(entropy_count < 0)) {
- pr_warn("negative entropy count: count %d\n", entropy_count);
- entropy_count = 0;
- }
-
- /* never pull more than available */
- ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3));
- if (ibytes < min)
- ibytes = 0;
- nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3);
- if ((size_t)entropy_count > nfrac)
- entropy_count -= nfrac;
- else
- entropy_count = 0;
+ size_t i;
- if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
- goto retry;
-
- trace_debit_entropy(8 * ibytes);
- if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) {
- wake_up_interruptible(&random_write_wait);
- kill_fasync(&fasync, SIGIO, POLL_OUT);
+ for (i = 0; i < 16 / sizeof(long); ++i) {
+ s[3] ^= v[i];
+#ifdef CONFIG_64BIT
+ s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32);
+ s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2];
+ s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0];
+ s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32);
+#else
+ s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16);
+ s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2];
+ s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0];
+ s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16);
+#endif
+ s[0] ^= v[i];
}
-
- return ibytes;
}
+#ifdef CONFIG_SMP
/*
- * This function does the actual extraction for extract_entropy.
- *
- * Note: we assume that .poolwords is a multiple of 16 words.
+ * This function is called when the CPU has just come online, with
+ * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
*/
-static void extract_buf(u8 *out)
+int random_online_cpu(unsigned int cpu)
{
- struct blake2s_state state __aligned(__alignof__(unsigned long));
- u8 hash[BLAKE2S_HASH_SIZE];
- unsigned long *salt;
- unsigned long flags;
-
- blake2s_init(&state, sizeof(hash));
-
/*
- * If we have an architectural hardware random number
- * generator, use it for BLAKE2's salt & personal fields.
+ * During CPU shutdown and before CPU onlining, add_interrupt_
+ * randomness() may schedule mix_interrupt_randomness(), and
+ * set the MIX_INFLIGHT flag. However, because the worker can
+ * be scheduled on a different CPU during this period, that
+ * flag will never be cleared. For that reason, we zero out
+ * the flag here, which runs just after workqueues are onlined
+ * for the CPU again. This also has the effect of setting the
+ * irq randomness count to zero so that new accumulated irqs
+ * are fresh.
*/
- for (salt = (unsigned long *)&state.h[4];
- salt < (unsigned long *)&state.h[8]; ++salt) {
- unsigned long v;
- if (!arch_get_random_long(&v))
- break;
- *salt ^= v;
- }
-
- /* Generate a hash across the pool */
- spin_lock_irqsave(&input_pool.lock, flags);
- blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES);
- blake2s_final(&state, hash); /* final zeros out state */
+ per_cpu_ptr(&irq_randomness, cpu)->count = 0;
+ return 0;
+}
+#endif
- /*
- * We mix the hash back into the pool to prevent backtracking
- * attacks (where the attacker knows the state of the pool
- * plus the current outputs, and attempts to find previous
- * outputs), unless the hash function can be inverted. By
- * mixing at least a hash worth of hash data back, we make
- * brute-forcing the feedback as hard as brute-forcing the
- * hash.
- */
- __mix_pool_bytes(hash, sizeof(hash));
- spin_unlock_irqrestore(&input_pool.lock, flags);
+static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs)
+{
+ unsigned long *ptr = (unsigned long *)regs;
+ unsigned int idx;
- /* Note that EXTRACT_SIZE is half of hash size here, because above
- * we've dumped the full length back into mixer. By reducing the
- * amount that we emit, we retain a level of forward secrecy.
- */
- memcpy(out, hash, EXTRACT_SIZE);
- memzero_explicit(hash, sizeof(hash));
+ if (regs == NULL)
+ return 0;
+ idx = READ_ONCE(f->reg_idx);
+ if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long))
+ idx = 0;
+ ptr += idx++;
+ WRITE_ONCE(f->reg_idx, idx);
+ return *ptr;
}
-static ssize_t _extract_entropy(void *buf, size_t nbytes)
+static void mix_interrupt_randomness(struct work_struct *work)
{
- ssize_t ret = 0, i;
- u8 tmp[EXTRACT_SIZE];
+ struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
+ /*
+ * The size of the copied stack pool is explicitly 16 bytes so that we
+ * tax mix_pool_byte()'s compression function the same amount on all
+ * platforms. This means on 64-bit we copy half the pool into this,
+ * while on 32-bit we copy all of it. The entropy is supposed to be
+ * sufficiently dispersed between bits that in the sponge-like
+ * half case, on average we don't wind up "losing" some.
+ */
+ u8 pool[16];
- while (nbytes) {
- extract_buf(tmp);
- i = min_t(int, nbytes, EXTRACT_SIZE);
- memcpy(buf, tmp, i);
- nbytes -= i;
- buf += i;
- ret += i;
+ /* Check to see if we're running on the wrong CPU due to hotplug. */
+ local_irq_disable();
+ if (fast_pool != this_cpu_ptr(&irq_randomness)) {
+ local_irq_enable();
+ return;
}
- /* Wipe data just returned from memory */
- memzero_explicit(tmp, sizeof(tmp));
+ /*
+ * Copy the pool to the stack so that the mixer always has a
+ * consistent view, before we reenable irqs again.
+ */
+ memcpy(pool, fast_pool->pool, sizeof(pool));
+ fast_pool->count = 0;
+ fast_pool->last = jiffies;
+ local_irq_enable();
- return ret;
-}
+ if (unlikely(crng_init == 0)) {
+ crng_pre_init_inject(pool, sizeof(pool), true);
+ mix_pool_bytes(pool, sizeof(pool));
+ } else {
+ mix_pool_bytes(pool, sizeof(pool));
+ credit_entropy_bits(1);
+ }
-/*
- * This function extracts randomness from the "entropy pool", and
- * returns it in a buffer.
- *
- * The min parameter specifies the minimum amount we can pull before
- * failing to avoid races that defeat catastrophic reseeding.
- */
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min)
-{
- trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_);
- nbytes = account(nbytes, min);
- return _extract_entropy(buf, nbytes);
+ memzero_explicit(pool, sizeof(pool));
}
-#define warn_unseeded_randomness(previous) \
- _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-
-static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
+void add_interrupt_randomness(int irq)
{
-#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
- const bool print_once = false;
-#else
- static bool print_once __read_mostly;
-#endif
-
- if (print_once || crng_ready() ||
- (previous && (caller == READ_ONCE(*previous))))
- return;
- WRITE_ONCE(*previous, caller);
-#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
- print_once = true;
-#endif
- if (__ratelimit(&unseeded_warning))
- printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
- func_name, caller, crng_init);
-}
+ enum { MIX_INFLIGHT = 1U << 31 };
+ cycles_t cycles = random_get_entropy();
+ unsigned long now = jiffies;
+ struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
+ struct pt_regs *regs = get_irq_regs();
+ unsigned int new_count;
+ union {
+ u32 u32[4];
+ u64 u64[2];
+ unsigned long longs[16 / sizeof(long)];
+ } irq_data;
-/*
- * This function is the exported kernel interface. It returns some
- * number of good random numbers, suitable for key generation, seeding
- * TCP sequence numbers, etc. It does not rely on the hardware random
- * number generator. For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch(). In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once
- * at any point prior.
- */
-static void _get_random_bytes(void *buf, int nbytes)
-{
- u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+ if (cycles == 0)
+ cycles = get_reg(fast_pool, regs);
- trace_get_random_bytes(nbytes, _RET_IP_);
+ if (sizeof(cycles) == 8)
+ irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq;
+ else {
+ irq_data.u32[0] = cycles ^ irq;
+ irq_data.u32[1] = now;
+ }
- while (nbytes >= CHACHA_BLOCK_SIZE) {
- extract_crng(buf);
- buf += CHACHA_BLOCK_SIZE;
- nbytes -= CHACHA_BLOCK_SIZE;
+ if (sizeof(unsigned long) == 8)
+ irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
+ else {
+ irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_;
+ irq_data.u32[3] = get_reg(fast_pool, regs);
}
- if (nbytes > 0) {
- extract_crng(tmp);
- memcpy(buf, tmp, nbytes);
- crng_backtrack_protect(tmp, nbytes);
- } else
- crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
- memzero_explicit(tmp, sizeof(tmp));
-}
+ fast_mix(fast_pool->pool, irq_data.longs);
+ new_count = ++fast_pool->count;
-void get_random_bytes(void *buf, int nbytes)
-{
- static void *previous;
+ if (new_count & MIX_INFLIGHT)
+ return;
- warn_unseeded_randomness(&previous);
- _get_random_bytes(buf, nbytes);
+ if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) ||
+ unlikely(crng_init == 0)))
+ return;
+
+ if (unlikely(!fast_pool->mix.func))
+ INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
+ fast_pool->count |= MIX_INFLIGHT;
+ queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
}
-EXPORT_SYMBOL(get_random_bytes);
+EXPORT_SYMBOL_GPL(add_interrupt_randomness);
/*
* Each time the timer fires, we expect that we got an unpredictable
@@ -1501,238 +1398,134 @@ static void entropy_timer(struct timer_list *t)
static void try_to_generate_entropy(void)
{
struct {
- unsigned long now;
+ cycles_t cycles;
struct timer_list timer;
} stack;
- stack.now = random_get_entropy();
+ stack.cycles = random_get_entropy();
/* Slow counter - or none. Don't even bother */
- if (stack.now == random_get_entropy())
+ if (stack.cycles == random_get_entropy())
return;
timer_setup_on_stack(&stack.timer, entropy_timer, 0);
- while (!crng_ready()) {
+ while (!crng_ready() && !signal_pending(current)) {
if (!timer_pending(&stack.timer))
mod_timer(&stack.timer, jiffies + 1);
- mix_pool_bytes(&stack.now, sizeof(stack.now));
+ mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
schedule();
- stack.now = random_get_entropy();
+ stack.cycles = random_get_entropy();
}
del_timer_sync(&stack.timer);
destroy_timer_on_stack(&stack.timer);
- mix_pool_bytes(&stack.now, sizeof(stack.now));
+ mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
}
-/*
- * Wait for the urandom pool to be seeded and thus guaranteed to supply
- * cryptographically secure random numbers. This applies to: the /dev/urandom
- * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
- * family of functions. Using any of these functions without first calling
- * this function forfeits the guarantee of security.
- *
- * Returns: 0 if the urandom pool has been seeded.
- * -ERESTARTSYS if the function was interrupted by a signal.
- */
-int wait_for_random_bytes(void)
-{
- if (likely(crng_ready()))
- return 0;
-
- do {
- int ret;
- ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
- if (ret)
- return ret > 0 ? 0 : ret;
-
- try_to_generate_entropy();
- } while (!crng_ready());
- return 0;
-}
-EXPORT_SYMBOL(wait_for_random_bytes);
-
-/*
- * Returns whether or not the urandom pool has been seeded and thus guaranteed
- * to supply cryptographically secure random numbers. This applies to: the
- * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
- * ,u64,int,long} family of functions.
+/**********************************************************************
*
- * Returns: true if the urandom pool has been seeded.
- * false if the urandom pool has not been seeded.
- */
-bool rng_is_initialized(void)
-{
- return crng_ready();
-}
-EXPORT_SYMBOL(rng_is_initialized);
-
-/*
- * Add a callback function that will be invoked when the nonblocking
- * pool is initialised.
+ * Userspace reader/writer interfaces.
*
- * returns: 0 if callback is successfully added
- * -EALREADY if pool is already initialised (callback not called)
- * -ENOENT if module for callback is not alive
- */
-int add_random_ready_callback(struct random_ready_callback *rdy)
-{
- struct module *owner;
- unsigned long flags;
- int err = -EALREADY;
-
- if (crng_ready())
- return err;
-
- owner = rdy->owner;
- if (!try_module_get(owner))
- return -ENOENT;
-
- spin_lock_irqsave(&random_ready_list_lock, flags);
- if (crng_ready())
- goto out;
-
- owner = NULL;
-
- list_add(&rdy->list, &random_ready_list);
- err = 0;
-
-out:
- spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
- module_put(owner);
-
- return err;
-}
-EXPORT_SYMBOL(add_random_ready_callback);
+ * getrandom(2) is the primary modern interface into the RNG and should
+ * be used in preference to anything else.
+ *
+ * Reading from /dev/random has the same functionality as calling
+ * getrandom(2) with flags=0. In earlier versions, however, it had
+ * vastly different semantics and should therefore be avoided, to
+ * prevent backwards compatibility issues.
+ *
+ * Reading from /dev/urandom has the same functionality as calling
+ * getrandom(2) with flags=GRND_INSECURE. Because it does not block
+ * waiting for the RNG to be ready, it should not be used.
+ *
+ * Writing to either /dev/random or /dev/urandom adds entropy to
+ * the input pool but does not credit it.
+ *
+ * Polling on /dev/random indicates when the RNG is initialized, on
+ * the read side, and when it wants new entropy, on the write side.
+ *
+ * Both /dev/random and /dev/urandom have the same set of ioctls for
+ * adding entropy, getting the entropy count, zeroing the count, and
+ * reseeding the crng.
+ *
+ **********************************************************************/
-/*
- * Delete a previously registered readiness callback function.
- */
-void del_random_ready_callback(struct random_ready_callback *rdy)
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
+ flags)
{
- unsigned long flags;
- struct module *owner = NULL;
-
- spin_lock_irqsave(&random_ready_list_lock, flags);
- if (!list_empty(&rdy->list)) {
- list_del_init(&rdy->list);
- owner = rdy->owner;
- }
- spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
- module_put(owner);
-}
-EXPORT_SYMBOL(del_random_ready_callback);
+ if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
+ return -EINVAL;
-/*
- * This function will use the architecture-specific hardware random
- * number generator if it is available. The arch-specific hw RNG will
- * almost certainly be faster than what we can do in software, but it
- * is impossible to verify that it is implemented securely (as
- * opposed, to, say, the AES encryption of a sequence number using a
- * key known by the NSA). So it's useful if we need the speed, but
- * only if we're willing to trust the hardware manufacturer not to
- * have put in a back door.
- *
- * Return number of bytes filled in.
- */
-int __must_check get_random_bytes_arch(void *buf, int nbytes)
-{
- int left = nbytes;
- u8 *p = buf;
+ /*
+ * Requesting insecure and blocking randomness at the same time makes
+ * no sense.
+ */
+ if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
+ return -EINVAL;
- trace_get_random_bytes_arch(left, _RET_IP_);
- while (left) {
- unsigned long v;
- int chunk = min_t(int, left, sizeof(unsigned long));
+ if (count > INT_MAX)
+ count = INT_MAX;
- if (!arch_get_random_long(&v))
- break;
+ if (!(flags & GRND_INSECURE) && !crng_ready()) {
+ int ret;
- memcpy(p, &v, chunk);
- p += chunk;
- left -= chunk;
+ if (flags & GRND_NONBLOCK)
+ return -EAGAIN;
+ ret = wait_for_random_bytes();
+ if (unlikely(ret))
+ return ret;
}
-
- return nbytes - left;
+ return get_random_bytes_user(buf, count);
}
-EXPORT_SYMBOL(get_random_bytes_arch);
-/*
- * init_std_data - initialize pool with system data
- *
- * This function clears the pool's entropy count and mixes some system
- * data into the pool to prepare it for use. The pool is not cleared
- * as that can only decrease the entropy in the pool.
- */
-static void __init init_std_data(void)
+static __poll_t random_poll(struct file *file, poll_table *wait)
{
- int i;
- ktime_t now = ktime_get_real();
- unsigned long rv;
-
- mix_pool_bytes(&now, sizeof(now));
- for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) {
- if (!arch_get_random_seed_long(&rv) &&
- !arch_get_random_long(&rv))
- rv = random_get_entropy();
- mix_pool_bytes(&rv, sizeof(rv));
- }
- mix_pool_bytes(utsname(), sizeof(*(utsname())));
-}
+ __poll_t mask;
-/*
- * Note that setup_arch() may call add_device_randomness()
- * long before we get here. This allows seeding of the pools
- * with some platform dependent data very early in the boot
- * process. But it limits our options here. We must use
- * statically allocated structures that already have all
- * initializations complete at compile time. We should also
- * take care not to overwrite the precious per platform data
- * we were given.
- */
-int __init rand_initialize(void)
-{
- init_std_data();
- if (crng_need_final_init)
- crng_finalize_init(&primary_crng);
- crng_initialize_primary(&primary_crng);
- crng_global_init_time = jiffies;
- if (ratelimit_disable) {
- urandom_warning.interval = 0;
- unseeded_warning.interval = 0;
- }
- return 0;
+ poll_wait(file, &crng_init_wait, wait);
+ poll_wait(file, &random_write_wait, wait);
+ mask = 0;
+ if (crng_ready())
+ mask |= EPOLLIN | EPOLLRDNORM;
+ if (input_pool.entropy_count < POOL_MIN_BITS)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ return mask;
}
-#ifdef CONFIG_BLOCK
-void rand_initialize_disk(struct gendisk *disk)
+static int write_pool(const char __user *ubuf, size_t count)
{
- struct timer_rand_state *state;
+ size_t len;
+ int ret = 0;
+ u8 block[BLAKE2S_BLOCK_SIZE];
- /*
- * If kzalloc returns null, we just won't use that entropy
- * source.
- */
- state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
- if (state) {
- state->last_time = INITIAL_JIFFIES;
- disk->random = state;
+ while (count) {
+ len = min(count, sizeof(block));
+ if (copy_from_user(block, ubuf, len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ count -= len;
+ ubuf += len;
+ mix_pool_bytes(block, len);
+ cond_resched();
}
+
+out:
+ memzero_explicit(block, sizeof(block));
+ return ret;
}
-#endif
-static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
+static ssize_t random_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
int ret;
- nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3));
- ret = extract_crng_user(buf, nbytes);
- trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS());
- return ret;
+ ret = write_pool(buffer, count);
+ if (ret)
+ return ret;
+
+ return (ssize_t)count;
}
static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
@@ -1747,7 +1540,7 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
current->comm, nbytes);
}
- return urandom_read_nowarn(file, buf, nbytes, ppos);
+ return get_random_bytes_user(buf, nbytes);
}
static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
@@ -1758,62 +1551,7 @@ static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
ret = wait_for_random_bytes();
if (ret != 0)
return ret;
- return urandom_read_nowarn(file, buf, nbytes, ppos);
-}
-
-static __poll_t random_poll(struct file *file, poll_table *wait)
-{
- __poll_t mask;
-
- poll_wait(file, &crng_init_wait, wait);
- poll_wait(file, &random_write_wait, wait);
- mask = 0;
- if (crng_ready())
- mask |= EPOLLIN | EPOLLRDNORM;
- if (POOL_ENTROPY_BITS() < random_write_wakeup_bits)
- mask |= EPOLLOUT | EPOLLWRNORM;
- return mask;
-}
-
-static int write_pool(const char __user *buffer, size_t count)
-{
- size_t bytes;
- u32 t, buf[16];
- const char __user *p = buffer;
-
- while (count > 0) {
- int b, i = 0;
-
- bytes = min(count, sizeof(buf));
- if (copy_from_user(&buf, p, bytes))
- return -EFAULT;
-
- for (b = bytes; b > 0; b -= sizeof(u32), i++) {
- if (!arch_get_random_int(&t))
- break;
- buf[i] ^= t;
- }
-
- count -= bytes;
- p += bytes;
-
- mix_pool_bytes(buf, bytes);
- cond_resched();
- }
-
- return 0;
-}
-
-static ssize_t random_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- size_t ret;
-
- ret = write_pool(buffer, count);
- if (ret)
- return ret;
-
- return (ssize_t)count;
+ return get_random_bytes_user(buf, nbytes);
}
static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
@@ -1824,9 +1562,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
switch (cmd) {
case RNDGETENTCNT:
- /* inherently racy, no point locking */
- ent_count = POOL_ENTROPY_BITS();
- if (put_user(ent_count, p))
+ /* Inherently racy, no point locking. */
+ if (put_user(input_pool.entropy_count, p))
return -EFAULT;
return 0;
case RNDADDTOENTCNT:
@@ -1834,7 +1571,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (get_user(ent_count, p))
return -EFAULT;
- return credit_entropy_bits_safe(ent_count);
+ if (ent_count < 0)
+ return -EINVAL;
+ credit_entropy_bits(ent_count);
+ return 0;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1847,7 +1587,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
retval = write_pool((const char __user *)p, size);
if (retval < 0)
return retval;
- return credit_entropy_bits_safe(ent_count);
+ credit_entropy_bits(ent_count);
+ return 0;
case RNDZAPENTCNT:
case RNDCLEARPOOL:
/*
@@ -1856,15 +1597,17 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
*/
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- input_pool.entropy_count = 0;
+ if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) {
+ wake_up_interruptible(&random_write_wait);
+ kill_fasync(&fasync, SIGIO, POLL_OUT);
+ }
return 0;
case RNDRESEEDCRNG:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (crng_init < 2)
+ if (!crng_ready())
return -ENODATA;
- crng_reseed(&primary_crng, true);
- WRITE_ONCE(crng_global_init_time, jiffies - 1);
+ crng_reseed(false);
return 0;
default:
return -EINVAL;
@@ -1895,37 +1638,34 @@ const struct file_operations urandom_fops = {
.llseek = noop_llseek,
};
-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
- flags)
-{
- int ret;
-
- if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
- return -EINVAL;
-
- /*
- * Requesting insecure and blocking randomness at the same time makes
- * no sense.
- */
- if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
- return -EINVAL;
-
- if (count > INT_MAX)
- count = INT_MAX;
-
- if (!(flags & GRND_INSECURE) && !crng_ready()) {
- if (flags & GRND_NONBLOCK)
- return -EAGAIN;
- ret = wait_for_random_bytes();
- if (unlikely(ret))
- return ret;
- }
- return urandom_read_nowarn(NULL, buf, count, NULL);
-}
/********************************************************************
*
- * Sysctl interface
+ * Sysctl interface.
+ *
+ * These are partly unused legacy knobs with dummy values to not break
+ * userspace and partly still useful things. They are usually accessible
+ * in /proc/sys/kernel/random/ and are as follows:
+ *
+ * - boot_id - a UUID representing the current boot.
+ *
+ * - uuid - a random UUID, different each time the file is read.
+ *
+ * - poolsize - the number of bits of entropy that the input pool can
+ * hold, tied to the POOL_BITS constant.
+ *
+ * - entropy_avail - the number of bits of entropy currently in the
+ * input pool. Always <= poolsize.
+ *
+ * - write_wakeup_threshold - the amount of entropy in the input pool
+ * below which write polls to /dev/random will unblock, requesting
+ * more entropy, tied to the POOL_MIN_BITS constant. It is writable
+ * to avoid breaking old userspaces, but writing to it does not
+ * change any behavior of the RNG.
+ *
+ * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
+ * It is writable to avoid breaking old userspaces, but writing
+ * to it does not change any behavior of the RNG.
*
********************************************************************/
@@ -1933,25 +1673,28 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
#include <linux/sysctl.h>
-static int min_write_thresh;
-static int max_write_thresh = POOL_BITS;
-static int random_min_urandom_seed = 60;
-static char sysctl_bootid[16];
+static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
+static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
+static int sysctl_poolsize = POOL_BITS;
+static u8 sysctl_bootid[UUID_SIZE];
/*
* This function is used to return both the bootid UUID, and random
- * UUID. The difference is in whether table->data is NULL; if it is,
+ * UUID. The difference is in whether table->data is NULL; if it is,
* then a new UUID is generated and returned to the user.
- *
- * If the user accesses this via the proc interface, the UUID will be
- * returned as an ASCII string in the standard UUID format; if via the
- * sysctl system call, as 16 bytes of binary data.
*/
static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
- struct ctl_table fake_table;
- unsigned char buf[64], tmp_uuid[16], *uuid;
+ u8 tmp_uuid[UUID_SIZE], *uuid;
+ char uuid_string[UUID_STRING_LEN + 1];
+ struct ctl_table fake_table = {
+ .data = uuid_string,
+ .maxlen = UUID_STRING_LEN
+ };
+
+ if (write)
+ return -EPERM;
uuid = table->data;
if (!uuid) {
@@ -1966,32 +1709,17 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
spin_unlock(&bootid_spinlock);
}
- sprintf(buf, "%pU", uuid);
-
- fake_table.data = buf;
- fake_table.maxlen = sizeof(buf);
-
- return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+ snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
+ return proc_dostring(&fake_table, 0, buffer, lenp, ppos);
}
-/*
- * Return entropy available scaled to integral bits
- */
-static int proc_do_entropy(struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+/* The same as proc_dointvec, but writes don't change anything. */
+static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
- struct ctl_table fake_table;
- int entropy_count;
-
- entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT;
-
- fake_table.data = &entropy_count;
- fake_table.maxlen = sizeof(entropy_count);
-
- return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
+ return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos);
}
-static int sysctl_poolsize = POOL_BITS;
static struct ctl_table random_table[] = {
{
.procname = "poolsize",
@@ -2002,56 +1730,36 @@ static struct ctl_table random_table[] = {
},
{
.procname = "entropy_avail",
+ .data = &input_pool.entropy_count,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_do_entropy,
- .data = &input_pool.entropy_count,
+ .proc_handler = proc_dointvec,
},
{
.procname = "write_wakeup_threshold",
- .data = &random_write_wakeup_bits,
+ .data = &sysctl_random_write_wakeup_bits,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_write_thresh,
- .extra2 = &max_write_thresh,
+ .proc_handler = proc_do_rointvec,
},
{
.procname = "urandom_min_reseed_secs",
- .data = &random_min_urandom_seed,
+ .data = &sysctl_random_min_urandom_seed,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_do_rointvec,
},
{
.procname = "boot_id",
.data = &sysctl_bootid,
- .maxlen = 16,
.mode = 0444,
.proc_handler = proc_do_uuid,
},
{
.procname = "uuid",
- .maxlen = 16,
.mode = 0444,
.proc_handler = proc_do_uuid,
},
-#ifdef ADD_INTERRUPT_BENCH
- {
- .procname = "add_interrupt_avg_cycles",
- .data = &avg_cycles,
- .maxlen = sizeof(avg_cycles),
- .mode = 0444,
- .proc_handler = proc_doulongvec_minmax,
- },
- {
- .procname = "add_interrupt_avg_deviation",
- .data = &avg_deviation,
- .maxlen = sizeof(avg_deviation),
- .mode = 0444,
- .proc_handler = proc_doulongvec_minmax,
- },
-#endif
{ }
};
@@ -2065,168 +1773,4 @@ static int __init random_sysctls_init(void)
return 0;
}
device_initcall(random_sysctls_init);
-#endif /* CONFIG_SYSCTL */
-
-struct batched_entropy {
- union {
- u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
- u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
- };
- unsigned int position;
- spinlock_t batch_lock;
-};
-
-/*
- * Get a random word for internal kernel use only. The quality of the random
- * number is good as /dev/urandom, but there is no backtrack protection, with
- * the goal of being quite fast and not depleting entropy. In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once at any
- * point prior.
- */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
-};
-
-u64 get_random_u64(void)
-{
- u64 ret;
- unsigned long flags;
- struct batched_entropy *batch;
- static void *previous;
-
- warn_unseeded_randomness(&previous);
-
- batch = raw_cpu_ptr(&batched_entropy_u64);
- spin_lock_irqsave(&batch->batch_lock, flags);
- if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
- extract_crng((u8 *)batch->entropy_u64);
- batch->position = 0;
- }
- ret = batch->entropy_u64[batch->position++];
- spin_unlock_irqrestore(&batch->batch_lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(get_random_u64);
-
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
-};
-u32 get_random_u32(void)
-{
- u32 ret;
- unsigned long flags;
- struct batched_entropy *batch;
- static void *previous;
-
- warn_unseeded_randomness(&previous);
-
- batch = raw_cpu_ptr(&batched_entropy_u32);
- spin_lock_irqsave(&batch->batch_lock, flags);
- if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
- extract_crng((u8 *)batch->entropy_u32);
- batch->position = 0;
- }
- ret = batch->entropy_u32[batch->position++];
- spin_unlock_irqrestore(&batch->batch_lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(get_random_u32);
-
-/* It's important to invalidate all potential batched entropy that might
- * be stored before the crng is initialized, which we can do lazily by
- * simply resetting the counter to zero so that it's re-extracted on the
- * next usage. */
-static void invalidate_batched_entropy(void)
-{
- int cpu;
- unsigned long flags;
-
- for_each_possible_cpu(cpu) {
- struct batched_entropy *batched_entropy;
-
- batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
- spin_lock_irqsave(&batched_entropy->batch_lock, flags);
- batched_entropy->position = 0;
- spin_unlock(&batched_entropy->batch_lock);
-
- batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
- spin_lock(&batched_entropy->batch_lock);
- batched_entropy->position = 0;
- spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
- }
-}
-
-/**
- * randomize_page - Generate a random, page aligned address
- * @start: The smallest acceptable address the caller will take.
- * @range: The size of the area, starting at @start, within which the
- * random address must fall.
- *
- * If @start + @range would overflow, @range is capped.
- *
- * NOTE: Historical use of randomize_range, which this replaces, presumed that
- * @start was already page aligned. We now align it regardless.
- *
- * Return: A page aligned address within [start, start + range). On error,
- * @start is returned.
- */
-unsigned long randomize_page(unsigned long start, unsigned long range)
-{
- if (!PAGE_ALIGNED(start)) {
- range -= PAGE_ALIGN(start) - start;
- start = PAGE_ALIGN(start);
- }
-
- if (start > ULONG_MAX - range)
- range = ULONG_MAX - start;
-
- range >>= PAGE_SHIFT;
-
- if (range == 0)
- return start;
-
- return start + (get_random_long() % range << PAGE_SHIFT);
-}
-
-/* Interface for in-kernel drivers of true hardware RNGs.
- * Those devices may produce endless random bits and will be throttled
- * when our pool is full.
- */
-void add_hwgenerator_randomness(const char *buffer, size_t count,
- size_t entropy)
-{
- if (unlikely(crng_init == 0)) {
- size_t ret = crng_fast_load(buffer, count);
- mix_pool_bytes(buffer, ret);
- count -= ret;
- buffer += ret;
- if (!count || crng_init == 0)
- return;
- }
-
- /* Suspend writing if we're above the trickle threshold.
- * We'll be woken up again once below random_write_wakeup_thresh,
- * or when the calling thread is about to terminate.
- */
- wait_event_interruptible(random_write_wait,
- !system_wq || kthread_should_stop() ||
- POOL_ENTROPY_BITS() <= random_write_wakeup_bits);
- mix_pool_bytes(buffer, count);
- credit_entropy_bits(entropy);
-}
-EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
-
-/* Handle random seed passed by bootloader.
- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
- * it would be regarded as device data.
- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
- */
-void add_bootloader_randomness(const void *buf, unsigned int size)
-{
- if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
- add_hwgenerator_randomness(buf, size, size * 8);
- else
- add_device_randomness(buf, size);
-}
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
+#endif
diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index 7c617edff4ca..3170d59d660c 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -267,11 +267,8 @@ static int st33zp24_i2c_probe(struct i2c_client *client,
static int st33zp24_i2c_remove(struct i2c_client *client)
{
struct tpm_chip *chip = i2c_get_clientdata(client);
- int ret;
- ret = st33zp24_remove(chip);
- if (ret)
- return ret;
+ st33zp24_remove(chip);
return 0;
}
diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c
index a75dafd39445..22d184884694 100644
--- a/drivers/char/tpm/st33zp24/spi.c
+++ b/drivers/char/tpm/st33zp24/spi.c
@@ -381,16 +381,11 @@ static int st33zp24_spi_probe(struct spi_device *dev)
* @param: client, the spi_device description (TPM SPI description).
* @return: 0 in case of success.
*/
-static int st33zp24_spi_remove(struct spi_device *dev)
+static void st33zp24_spi_remove(struct spi_device *dev)
{
struct tpm_chip *chip = spi_get_drvdata(dev);
- int ret;
- ret = st33zp24_remove(chip);
- if (ret)
- return ret;
-
- return 0;
+ st33zp24_remove(chip);
}
static const struct spi_device_id st33zp24_spi_id[] = {
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index ce9efb73c144..15b393e92c8e 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -511,10 +511,9 @@ _tpm_clean_answer:
}
EXPORT_SYMBOL(st33zp24_probe);
-int st33zp24_remove(struct tpm_chip *chip)
+void st33zp24_remove(struct tpm_chip *chip)
{
tpm_chip_unregister(chip);
- return 0;
}
EXPORT_SYMBOL(st33zp24_remove);
diff --git a/drivers/char/tpm/st33zp24/st33zp24.h b/drivers/char/tpm/st33zp24/st33zp24.h
index 6747be1e2502..b387a476c555 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.h
+++ b/drivers/char/tpm/st33zp24/st33zp24.h
@@ -34,5 +34,5 @@ int st33zp24_pm_resume(struct device *dev);
int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops,
struct device *dev, int irq, int io_lpcpd);
-int st33zp24_remove(struct tpm_chip *chip);
+void st33zp24_remove(struct tpm_chip *chip);
#endif /* __LOCAL_ST33ZP24_H__ */
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index b009e7479b70..783d65fc71f0 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev)
kfree(chip);
}
-static void tpm_devs_release(struct device *dev)
-{
- struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
-
- /* release the master device reference */
- put_device(&chip->dev);
-}
-
/**
* tpm_class_shutdown() - prepare the TPM device for loss of power.
* @dev: device to which the chip is associated.
@@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
chip->dev_num = rc;
device_initialize(&chip->dev);
- device_initialize(&chip->devs);
chip->dev.class = tpm_class;
chip->dev.class->shutdown_pre = tpm_class_shutdown;
@@ -352,39 +343,20 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
chip->dev.parent = pdev;
chip->dev.groups = chip->groups;
- chip->devs.parent = pdev;
- chip->devs.class = tpmrm_class;
- chip->devs.release = tpm_devs_release;
- /* get extra reference on main device to hold on
- * behalf of devs. This holds the chip structure
- * while cdevs is in use. The corresponding put
- * is in the tpm_devs_release (TPM2 only)
- */
- if (chip->flags & TPM_CHIP_FLAG_TPM2)
- get_device(&chip->dev);
-
if (chip->dev_num == 0)
chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
else
chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
- chip->devs.devt =
- MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
-
rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num);
if (rc)
goto out;
- rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
- if (rc)
- goto out;
if (!pdev)
chip->flags |= TPM_CHIP_FLAG_VIRTUAL;
cdev_init(&chip->cdev, &tpm_fops);
- cdev_init(&chip->cdevs, &tpmrm_fops);
chip->cdev.owner = THIS_MODULE;
- chip->cdevs.owner = THIS_MODULE;
rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
if (rc) {
@@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
return chip;
out:
- put_device(&chip->devs);
put_device(&chip->dev);
return ERR_PTR(rc);
}
@@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip)
}
if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) {
- rc = cdev_device_add(&chip->cdevs, &chip->devs);
- if (rc) {
- dev_err(&chip->devs,
- "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
- dev_name(&chip->devs), MAJOR(chip->devs.devt),
- MINOR(chip->devs.devt), rc);
- return rc;
- }
+ rc = tpm_devs_add(chip);
+ if (rc)
+ goto err_del_cdev;
}
/* Make the chip available. */
@@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip)
idr_replace(&dev_nums_idr, chip, chip->dev_num);
mutex_unlock(&idr_lock);
+ return 0;
+
+err_del_cdev:
+ cdev_device_del(&chip->cdev, &chip->dev);
return rc;
}
@@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip)
hwrng_unregister(&chip->hwrng);
tpm_bios_log_teardown(chip);
if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
- cdev_device_del(&chip->cdevs, &chip->devs);
+ tpm_devs_remove(chip);
tpm_del_char_device(chip);
}
EXPORT_SYMBOL_GPL(tpm_chip_unregister);
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index c08cbb306636..dc4c0a0a5129 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work)
ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
sizeof(priv->data_buffer));
tpm_put_ops(priv->chip);
- if (ret > 0) {
+
+ /*
+ * If ret is > 0 then tpm_dev_transmit returned the size of the
+ * response. If ret is < 0 then tpm_dev_transmit failed and
+ * returned an error code.
+ */
+ if (ret != 0) {
priv->response_length = ret;
mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
}
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 283f78211c3a..2163c6ee0d36 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
size_t cmdsiz);
int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
size_t *bufsiz);
+int tpm_devs_add(struct tpm_chip *chip);
+void tpm_devs_remove(struct tpm_chip *chip);
void tpm_bios_log_setup(struct tpm_chip *chip);
void tpm_bios_log_teardown(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 97e916856cf3..ffb35f0154c1 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space)
{
- mutex_lock(&chip->tpm_mutex);
- if (!tpm_chip_start(chip)) {
+
+ if (tpm_try_get_ops(chip) == 0) {
tpm2_flush_sessions(chip, space);
- tpm_chip_stop(chip);
+ tpm_put_ops(chip);
}
- mutex_unlock(&chip->tpm_mutex);
+
kfree(space->context_buf);
kfree(space->session_buf);
}
@@ -574,3 +574,68 @@ out:
dev_err(&chip->dev, "%s: error %d\n", __func__, rc);
return rc;
}
+
+/*
+ * Put the reference to the main device.
+ */
+static void tpm_devs_release(struct device *dev)
+{
+ struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
+
+ /* release the master device reference */
+ put_device(&chip->dev);
+}
+
+/*
+ * Remove the device file for exposed TPM spaces and release the device
+ * reference. This may also release the reference to the master device.
+ */
+void tpm_devs_remove(struct tpm_chip *chip)
+{
+ cdev_device_del(&chip->cdevs, &chip->devs);
+ put_device(&chip->devs);
+}
+
+/*
+ * Add a device file to expose TPM spaces. Also take a reference to the
+ * main device.
+ */
+int tpm_devs_add(struct tpm_chip *chip)
+{
+ int rc;
+
+ device_initialize(&chip->devs);
+ chip->devs.parent = chip->dev.parent;
+ chip->devs.class = tpmrm_class;
+
+ /*
+ * Get extra reference on main device to hold on behalf of devs.
+ * This holds the chip structure while cdevs is in use. The
+ * corresponding put is in the tpm_devs_release.
+ */
+ get_device(&chip->dev);
+ chip->devs.release = tpm_devs_release;
+ chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
+ cdev_init(&chip->cdevs, &tpmrm_fops);
+ chip->cdevs.owner = THIS_MODULE;
+
+ rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
+ if (rc)
+ goto err_put_devs;
+
+ rc = cdev_device_add(&chip->cdevs, &chip->devs);
+ if (rc) {
+ dev_err(&chip->devs,
+ "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
+ dev_name(&chip->devs), MAJOR(chip->devs.devt),
+ MINOR(chip->devs.devt), rc);
+ goto err_put_devs;
+ }
+
+ return 0;
+
+err_put_devs:
+ put_device(&chip->devs);
+
+ return rc;
+}
diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index aaa59a00eeae..184396b3af50 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -254,13 +254,12 @@ static int tpm_tis_spi_driver_probe(struct spi_device *spi)
static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume);
-static int tpm_tis_spi_remove(struct spi_device *dev)
+static void tpm_tis_spi_remove(struct spi_device *dev)
{
struct tpm_chip *chip = spi_get_drvdata(dev);
tpm_chip_unregister(chip);
tpm_tis_remove(chip);
- return 0;
}
static const struct spi_device_id tpm_tis_spi_id[] = {
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 91c772e38bb5..5c865987ba5c 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -91,7 +91,7 @@ static ssize_t vtpm_proxy_fops_read(struct file *filp, char __user *buf,
len = proxy_dev->req_len;
- if (count < len) {
+ if (count < len || len > sizeof(proxy_dev->buffer)) {
mutex_unlock(&proxy_dev->buf_lock);
pr_debug("Invalid size in recv: count=%zd, req_len=%zd\n",
count, len);
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index da5b30771418..f53e0cf1ec7e 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -126,16 +126,16 @@ static void vtpm_cancel(struct tpm_chip *chip)
notify_remote_via_evtchn(priv->evtchn);
}
-static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
+static size_t shr_data_offset(struct vtpm_shared_page *shr)
{
- return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
+ return struct_size(shr, extra_pages, shr->nr_extra_pages);
}
static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
{
struct tpm_private *priv = dev_get_drvdata(&chip->dev);
struct vtpm_shared_page *shr = priv->shr;
- unsigned int offset = shr_data_offset(shr);
+ size_t offset = shr_data_offset(shr);
u32 ordinal;
unsigned long duration;
@@ -177,7 +177,7 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
{
struct tpm_private *priv = dev_get_drvdata(&chip->dev);
struct vtpm_shared_page *shr = priv->shr;
- unsigned int offset = shr_data_offset(shr);
+ size_t offset = shr_data_offset(shr);
size_t length = shr->length;
if (shr->state == VTPM_STATE_IDLE)
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 2359889a35a0..e3c430539a17 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1957,6 +1957,13 @@ static void virtcons_remove(struct virtio_device *vdev)
list_del(&portdev->list);
spin_unlock_irq(&pdrvdata_lock);
+ /* Device is going away, exit any polling for buffers */
+ virtio_break_device(vdev);
+ if (use_multiport(portdev))
+ flush_work(&portdev->control_work);
+ else
+ flush_work(&portdev->config_work);
+
/* Disable interrupts for vqs */
virtio_reset_device(vdev);
/* Finish up work that's lined up */
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index ad4256d54361..d4d67fbae869 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -231,6 +231,8 @@ config COMMON_CLK_GEMINI
config COMMON_CLK_LAN966X
bool "Generic Clock Controller driver for LAN966X SoC"
+ depends on HAS_IOMEM
+ depends on OF
help
This driver provides support for Generic Clock Controller(GCK) on
LAN966X SoC. GCK generates and supplies clock to various peripherals
diff --git a/drivers/clk/clk-lmk04832.c b/drivers/clk/clk-lmk04832.c
index 8f02c0b88000..f416f8bc2898 100644
--- a/drivers/clk/clk-lmk04832.c
+++ b/drivers/clk/clk-lmk04832.c
@@ -1544,14 +1544,12 @@ err_disable_oscin:
return ret;
}
-static int lmk04832_remove(struct spi_device *spi)
+static void lmk04832_remove(struct spi_device *spi)
{
struct lmk04832 *lmk = spi_get_drvdata(spi);
clk_disable_unprepare(lmk->oscin);
of_clk_del_provider(spi->dev.of_node);
-
- return 0;
}
static const struct spi_device_id lmk04832_id[] = {
{ "lmk04832", LMK04832 },
diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c
index 744d136b721b..15d61793f53b 100644
--- a/drivers/clk/ingenic/jz4725b-cgu.c
+++ b/drivers/clk/ingenic/jz4725b-cgu.c
@@ -139,11 +139,10 @@ static const struct ingenic_cgu_clk_info jz4725b_cgu_clocks[] = {
},
[JZ4725B_CLK_I2S] = {
- "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE,
+ "i2s", CGU_CLK_MUX | CGU_CLK_DIV,
.parents = { JZ4725B_CLK_EXT, JZ4725B_CLK_PLL_HALF, -1, -1 },
.mux = { CGU_REG_CPCCR, 31, 1 },
.div = { CGU_REG_I2SCDR, 0, 1, 9, -1, -1, -1 },
- .gate = { CGU_REG_CLKGR, 6 },
},
[JZ4725B_CLK_SPI] = {
diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c
index 538e4963c915..5d2ae297e741 100644
--- a/drivers/clk/qcom/dispcc-sc7180.c
+++ b/drivers/clk/qcom/dispcc-sc7180.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2019, 2022, The Linux Foundation. All rights reserved.
*/
#include <linux/clk-provider.h>
@@ -625,6 +625,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
static struct gdsc mdss_gdsc = {
.gdscr = 0x3000,
+ .en_rest_wait_val = 0x2,
+ .en_few_wait_val = 0x2,
+ .clk_dis_wait_val = 0xf,
.pd = {
.name = "mdss_gdsc",
},
diff --git a/drivers/clk/qcom/dispcc-sc7280.c b/drivers/clk/qcom/dispcc-sc7280.c
index 4ef4ae231794..ad596d567f6a 100644
--- a/drivers/clk/qcom/dispcc-sc7280.c
+++ b/drivers/clk/qcom/dispcc-sc7280.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2022, The Linux Foundation. All rights reserved.
*/
#include <linux/clk-provider.h>
@@ -787,6 +787,9 @@ static struct clk_branch disp_cc_sleep_clk = {
static struct gdsc disp_cc_mdss_core_gdsc = {
.gdscr = 0x1004,
+ .en_rest_wait_val = 0x2,
+ .en_few_wait_val = 0x2,
+ .clk_dis_wait_val = 0xf,
.pd = {
.name = "disp_cc_mdss_core_gdsc",
},
diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c
index 566fdfa0a15b..db9379634fb2 100644
--- a/drivers/clk/qcom/dispcc-sm8250.c
+++ b/drivers/clk/qcom/dispcc-sm8250.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2018-2020, 2022, The Linux Foundation. All rights reserved.
*/
#include <linux/clk-provider.h>
@@ -1126,6 +1126,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
static struct gdsc mdss_gdsc = {
.gdscr = 0x3000,
+ .en_rest_wait_val = 0x2,
+ .en_few_wait_val = 0x2,
+ .clk_dis_wait_val = 0xf,
.pd = {
.name = "mdss_gdsc",
},
diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c
index 71aa630fa4bd..f09499999eb3 100644
--- a/drivers/clk/qcom/gcc-msm8994.c
+++ b/drivers/clk/qcom/gcc-msm8994.c
@@ -108,42 +108,6 @@ static const struct clk_parent_data gcc_xo_gpll0_gpll4[] = {
{ .hw = &gpll4.clkr.hw },
};
-static struct clk_rcg2 system_noc_clk_src = {
- .cmd_rcgr = 0x0120,
- .hid_width = 5,
- .parent_map = gcc_xo_gpll0_map,
- .clkr.hw.init = &(struct clk_init_data){
- .name = "system_noc_clk_src",
- .parent_data = gcc_xo_gpll0,
- .num_parents = ARRAY_SIZE(gcc_xo_gpll0),
- .ops = &clk_rcg2_ops,
- },
-};
-
-static struct clk_rcg2 config_noc_clk_src = {
- .cmd_rcgr = 0x0150,
- .hid_width = 5,
- .parent_map = gcc_xo_gpll0_map,
- .clkr.hw.init = &(struct clk_init_data){
- .name = "config_noc_clk_src",
- .parent_data = gcc_xo_gpll0,
- .num_parents = ARRAY_SIZE(gcc_xo_gpll0),
- .ops = &clk_rcg2_ops,
- },
-};
-
-static struct clk_rcg2 periph_noc_clk_src = {
- .cmd_rcgr = 0x0190,
- .hid_width = 5,
- .parent_map = gcc_xo_gpll0_map,
- .clkr.hw.init = &(struct clk_init_data){
- .name = "periph_noc_clk_src",
- .parent_data = gcc_xo_gpll0,
- .num_parents = ARRAY_SIZE(gcc_xo_gpll0),
- .ops = &clk_rcg2_ops,
- },
-};
-
static struct freq_tbl ftbl_ufs_axi_clk_src[] = {
F(50000000, P_GPLL0, 12, 0, 0),
F(100000000, P_GPLL0, 6, 0, 0),
@@ -1150,8 +1114,6 @@ static struct clk_branch gcc_blsp1_ahb_clk = {
.enable_mask = BIT(17),
.hw.init = &(struct clk_init_data){
.name = "gcc_blsp1_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -1435,8 +1397,6 @@ static struct clk_branch gcc_blsp2_ahb_clk = {
.enable_mask = BIT(15),
.hw.init = &(struct clk_init_data){
.name = "gcc_blsp2_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -1764,8 +1724,6 @@ static struct clk_branch gcc_lpass_q6_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_lpass_q6_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -1778,8 +1736,6 @@ static struct clk_branch gcc_mss_q6_bimc_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_mss_q6_bimc_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -1807,9 +1763,6 @@ static struct clk_branch gcc_pcie_0_cfg_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_0_cfg_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1822,9 +1775,6 @@ static struct clk_branch gcc_pcie_0_mstr_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_0_mstr_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1854,9 +1804,6 @@ static struct clk_branch gcc_pcie_0_slv_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_0_slv_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1884,9 +1831,6 @@ static struct clk_branch gcc_pcie_1_cfg_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_1_cfg_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1899,9 +1843,6 @@ static struct clk_branch gcc_pcie_1_mstr_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_1_mstr_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1930,9 +1871,6 @@ static struct clk_branch gcc_pcie_1_slv_axi_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_1_slv_axi_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -1960,8 +1898,6 @@ static struct clk_branch gcc_pdm_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_pdm_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -1989,9 +1925,6 @@ static struct clk_branch gcc_sdcc1_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_sdcc1_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -2004,9 +1937,6 @@ static struct clk_branch gcc_sdcc2_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_sdcc2_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -2034,9 +1964,6 @@ static struct clk_branch gcc_sdcc3_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_sdcc3_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -2064,9 +1991,6 @@ static struct clk_branch gcc_sdcc4_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_sdcc4_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_branch2_ops,
},
},
@@ -2124,8 +2048,6 @@ static struct clk_branch gcc_tsif_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_tsif_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2153,8 +2075,6 @@ static struct clk_branch gcc_ufs_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_ufs_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2198,8 +2118,6 @@ static struct clk_branch gcc_ufs_rx_symbol_0_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_ufs_rx_symbol_0_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2213,8 +2131,6 @@ static struct clk_branch gcc_ufs_rx_symbol_1_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_ufs_rx_symbol_1_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2243,8 +2159,6 @@ static struct clk_branch gcc_ufs_tx_symbol_0_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_ufs_tx_symbol_0_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2258,8 +2172,6 @@ static struct clk_branch gcc_ufs_tx_symbol_1_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_ufs_tx_symbol_1_clk",
- .parent_hws = (const struct clk_hw *[]){ &system_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2364,8 +2276,6 @@ static struct clk_branch gcc_usb_hs_ahb_clk = {
.enable_mask = BIT(0),
.hw.init = &(struct clk_init_data){
.name = "gcc_usb_hs_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2488,8 +2398,6 @@ static struct clk_branch gcc_boot_rom_ahb_clk = {
.enable_mask = BIT(10),
.hw.init = &(struct clk_init_data){
.name = "gcc_boot_rom_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &config_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2503,8 +2411,6 @@ static struct clk_branch gcc_prng_ahb_clk = {
.enable_mask = BIT(13),
.hw.init = &(struct clk_init_data){
.name = "gcc_prng_ahb_clk",
- .parent_hws = (const struct clk_hw *[]){ &periph_noc_clk_src.clkr.hw },
- .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -2547,9 +2453,6 @@ static struct clk_regmap *gcc_msm8994_clocks[] = {
[GPLL0] = &gpll0.clkr,
[GPLL4_EARLY] = &gpll4_early.clkr,
[GPLL4] = &gpll4.clkr,
- [CONFIG_NOC_CLK_SRC] = &config_noc_clk_src.clkr,
- [PERIPH_NOC_CLK_SRC] = &periph_noc_clk_src.clkr,
- [SYSTEM_NOC_CLK_SRC] = &system_noc_clk_src.clkr,
[UFS_AXI_CLK_SRC] = &ufs_axi_clk_src.clkr,
[USB30_MASTER_CLK_SRC] = &usb30_master_clk_src.clkr,
[BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr,
@@ -2696,6 +2599,15 @@ static struct clk_regmap *gcc_msm8994_clocks[] = {
[USB_SS_PHY_LDO] = &usb_ss_phy_ldo.clkr,
[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+
+ /*
+ * The following clocks should NOT be managed by this driver, but they once were
+ * mistakengly added. Now they are only here to indicate that they are not defined
+ * on purpose, even though the names will stay in the header file (for ABI sanity).
+ */
+ [CONFIG_NOC_CLK_SRC] = NULL,
+ [PERIPH_NOC_CLK_SRC] = NULL,
+ [SYSTEM_NOC_CLK_SRC] = NULL,
};
static struct gdsc *gcc_msm8994_gdscs[] = {
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 7e1dd8ccfa38..44520efc6c72 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
*/
#include <linux/bitops.h>
@@ -35,9 +35,14 @@
#define CFG_GDSCR_OFFSET 0x4
/* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */
-#define EN_REST_WAIT_VAL (0x2 << 20)
-#define EN_FEW_WAIT_VAL (0x8 << 16)
-#define CLK_DIS_WAIT_VAL (0x2 << 12)
+#define EN_REST_WAIT_VAL 0x2
+#define EN_FEW_WAIT_VAL 0x8
+#define CLK_DIS_WAIT_VAL 0x2
+
+/* Transition delay shifts */
+#define EN_REST_WAIT_SHIFT 20
+#define EN_FEW_WAIT_SHIFT 16
+#define CLK_DIS_WAIT_SHIFT 12
#define RETAIN_MEM BIT(14)
#define RETAIN_PERIPH BIT(13)
@@ -380,7 +385,18 @@ static int gdsc_init(struct gdsc *sc)
*/
mask = HW_CONTROL_MASK | SW_OVERRIDE_MASK |
EN_REST_WAIT_MASK | EN_FEW_WAIT_MASK | CLK_DIS_WAIT_MASK;
- val = EN_REST_WAIT_VAL | EN_FEW_WAIT_VAL | CLK_DIS_WAIT_VAL;
+
+ if (!sc->en_rest_wait_val)
+ sc->en_rest_wait_val = EN_REST_WAIT_VAL;
+ if (!sc->en_few_wait_val)
+ sc->en_few_wait_val = EN_FEW_WAIT_VAL;
+ if (!sc->clk_dis_wait_val)
+ sc->clk_dis_wait_val = CLK_DIS_WAIT_VAL;
+
+ val = sc->en_rest_wait_val << EN_REST_WAIT_SHIFT |
+ sc->en_few_wait_val << EN_FEW_WAIT_SHIFT |
+ sc->clk_dis_wait_val << CLK_DIS_WAIT_SHIFT;
+
ret = regmap_update_bits(sc->regmap, sc->gdscr, mask, val);
if (ret)
return ret;
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index d7cc4c21a9d4..ad313d7210bd 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
*/
#ifndef __QCOM_GDSC_H__
@@ -22,6 +22,9 @@ struct reset_controller_dev;
* @cxcs: offsets of branch registers to toggle mem/periph bits in
* @cxc_count: number of @cxcs
* @pwrsts: Possible powerdomain power states
+ * @en_rest_wait_val: transition delay value for receiving enr ack signal
+ * @en_few_wait_val: transition delay value for receiving enf ack signal
+ * @clk_dis_wait_val: transition delay value for halting clock
* @resets: ids of resets associated with this gdsc
* @reset_count: number of @resets
* @rcdev: reset controller
@@ -36,6 +39,9 @@ struct gdsc {
unsigned int clamp_io_ctrl;
unsigned int *cxcs;
unsigned int cxc_count;
+ unsigned int en_rest_wait_val;
+ unsigned int en_few_wait_val;
+ unsigned int clk_dis_wait_val;
const u8 pwrsts;
/* Powerdomain allowable state bitfields */
#define PWRSTS_OFF BIT(0)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index cfb8ea0df3b1..1ea556e75494 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -713,7 +713,6 @@ config INGENIC_OST
config MICROCHIP_PIT64B
bool "Microchip PIT64B support"
depends on OF || COMPILE_TEST
- select CLKSRC_MMIO
select TIMER_OF
help
This option enables Microchip PIT64B timer for Atmel
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index eb596ff9e7bb..279ddff81ab4 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg)
int ret;
ret = kstrtouint(arg, 16, &base);
- if (ret)
- return ret;
+ if (ret) {
+ pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg);
+ return 1;
+ }
pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport,
base);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 1ecd52f903b8..9ab8221ee3c6 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type,
clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
}
-static void arch_timer_evtstrm_enable(int divider)
+static void arch_timer_evtstrm_enable(unsigned int divider)
{
u32 cntkctl = arch_timer_get_cntkctl();
+#ifdef CONFIG_ARM64
+ /* ECV is likely to require a large divider. Use the EVNTIS flag. */
+ if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
+ cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
+ divider -= 8;
+ }
+#endif
+
+ divider = min(divider, 15U);
cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
/* Set the divider and enable virtual event stream */
cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
@@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void)
lsb++;
/* enable event stream */
- arch_timer_evtstrm_enable(max(0, min(lsb, 15)));
+ arch_timer_evtstrm_enable(max(0, lsb));
}
static void arch_counter_set_user_access(void)
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 6db3d5511b0f..f29c812b70c9 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -60,27 +60,18 @@
#define MCT_CLKEVENTS_RATING 350
#endif
+/* There are four Global timers starting with 0 offset */
+#define MCT_G0_IRQ 0
+/* Local timers count starts after global timer count */
+#define MCT_L0_IRQ 4
+/* Max number of IRQ as per DT binding document */
+#define MCT_NR_IRQS 20
+
enum {
MCT_INT_SPI,
MCT_INT_PPI
};
-enum {
- MCT_G0_IRQ,
- MCT_G1_IRQ,
- MCT_G2_IRQ,
- MCT_G3_IRQ,
- MCT_L0_IRQ,
- MCT_L1_IRQ,
- MCT_L2_IRQ,
- MCT_L3_IRQ,
- MCT_L4_IRQ,
- MCT_L5_IRQ,
- MCT_L6_IRQ,
- MCT_L7_IRQ,
- MCT_NR_IRQS,
-};
-
static void __iomem *reg_base;
static unsigned long clk_rate;
static unsigned int mct_int_type;
@@ -89,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS];
struct mct_clock_event_device {
struct clock_event_device evt;
unsigned long base;
- char name[10];
+ /**
+ * The length of the name must be adjusted if number of
+ * local timer interrupts grow over two digits
+ */
+ char name[11];
};
static void exynos4_mct_write(unsigned int value, unsigned long offset)
@@ -541,6 +536,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
* irqs are specified.
*/
nr_irqs = of_irq_count(np);
+ if (nr_irqs > ARRAY_SIZE(mct_irqs)) {
+ pr_err("exynos-mct: too many (%d) interrupts configured in DT\n",
+ nr_irqs);
+ nr_irqs = ARRAY_SIZE(mct_irqs);
+ }
for (i = MCT_L0_IRQ; i < nr_irqs; i++)
mct_irqs[i] = irq_of_parse_and_map(np, i);
@@ -553,11 +553,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
mct_irqs[MCT_L0_IRQ], err);
} else {
for_each_possible_cpu(cpu) {
- int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+ int mct_irq;
struct mct_clock_event_device *pcpu_mevt =
per_cpu_ptr(&percpu_mct_tick, cpu);
pcpu_mevt->evt.irq = -1;
+ if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
+ break;
+ mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
if (request_irq(mct_irq,
diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c
index 55a8e198d2a1..523e37662a6e 100644
--- a/drivers/clocksource/timer-imx-sysctr.c
+++ b/drivers/clocksource/timer-imx-sysctr.c
@@ -110,7 +110,7 @@ static struct timer_of to_sysctr = {
},
.of_irq = {
.handler = sysctr_timer_interrupt,
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
+ .flags = IRQF_TIMER,
},
.of_clk = {
.name = "per",
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 2cdc077a39f5..bd64a8a8427f 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -32,8 +32,8 @@
#define TPM_C0SC_CHF_MASK (0x1 << 7)
#define TPM_C0V 0x24
-static int counter_width;
-static void __iomem *timer_base;
+static int counter_width __ro_after_init;
+static void __iomem *timer_base __ro_after_init;
static inline void tpm_timer_disable(void)
{
@@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void)
{
return tpm_read_counter();
}
-#endif
static u64 notrace tpm_read_sched_clock(void)
{
return tpm_read_counter();
}
+#endif
static int tpm_set_next_event(unsigned long delta,
struct clock_event_device *evt)
@@ -127,9 +127,9 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id)
static struct timer_of to_tpm = {
.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
.clkevt = {
- .name = "i.MX7ULP TPM Timer",
+ .name = "i.MX TPM Timer",
.rating = 200,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ,
.set_state_shutdown = tpm_set_state_shutdown,
.set_state_oneshot = tpm_set_state_oneshot,
.set_next_event = tpm_set_next_event,
@@ -137,7 +137,7 @@ static struct timer_of to_tpm = {
},
.of_irq = {
.handler = tpm_timer_interrupt,
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
+ .flags = IRQF_TIMER,
},
.of_clk = {
.name = "per",
@@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void)
tpm_delay_timer.read_current_timer = &tpm_read_current_timer;
tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3;
register_current_timer_delay(&tpm_delay_timer);
-#endif
sched_clock_register(tpm_read_sched_clock, counter_width,
timer_of_rate(&to_tpm) >> 3);
+#endif
return clocksource_mmio_init(timer_base + TPM_CNT,
"imx-tpm",
diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
index cfa4ec7ef396..abce83d2f00b 100644
--- a/drivers/clocksource/timer-microchip-pit64b.c
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -42,8 +42,7 @@
#define MCHP_PIT64B_LSBMASK GENMASK_ULL(31, 0)
#define MCHP_PIT64B_PRES_TO_MODE(p) (MCHP_PIT64B_MR_PRES & ((p) << 8))
#define MCHP_PIT64B_MODE_TO_PRES(m) ((MCHP_PIT64B_MR_PRES & (m)) >> 8)
-#define MCHP_PIT64B_DEF_CS_FREQ 5000000UL /* 5 MHz */
-#define MCHP_PIT64B_DEF_CE_FREQ 32768 /* 32 KHz */
+#define MCHP_PIT64B_DEF_FREQ 5000000UL /* 5 MHz */
#define MCHP_PIT64B_NAME "pit64b"
@@ -165,7 +164,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
}
-static u64 mchp_pit64b_sched_read_clk(void)
+static u64 notrace mchp_pit64b_sched_read_clk(void)
{
return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
}
@@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
bool clkevt)
{
- u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ;
struct mchp_pit64b_timer timer;
unsigned long clk_rate;
u32 irq = 0;
@@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
}
/* Initialize mode (prescaler + SGCK bit). To be used at runtime. */
- ret = mchp_pit64b_init_mode(&timer, freq);
+ ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ);
if (ret)
goto irq_unmap;
diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
index 529cc6a51cdb..c3f54d9912be 100644
--- a/drivers/clocksource/timer-of.c
+++ b/drivers/clocksource/timer-of.c
@@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np,
of_base->base = of_base->name ?
of_io_request_and_map(np, of_base->index, of_base->name) :
of_iomap(np, of_base->index);
- if (IS_ERR(of_base->base)) {
- pr_err("Failed to iomap (%s)\n", of_base->name);
- return PTR_ERR(of_base->base);
+ if (IS_ERR_OR_NULL(of_base->base)) {
+ pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name);
+ return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM;
}
return 0;
diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
index b6f97960d8ee..2737407ff069 100644
--- a/drivers/clocksource/timer-ti-dm-systimer.c
+++ b/drivers/clocksource/timer-ti-dm-systimer.c
@@ -241,8 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void)
bool quirk_unreliable_oscillator = false;
/* Quirk unreliable 32 KiHz oscillator with incomplete dts */
- if (of_machine_is_compatible("ti,omap3-beagle") ||
- of_machine_is_compatible("timll,omap3-devkit8000")) {
+ if (of_machine_is_compatible("ti,omap3-beagle-ab4")) {
quirk_unreliable_oscillator = true;
counter_32k = -ENODEV;
}
@@ -695,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa)
return 0;
}
- if (pa == 0x48034000) /* dra7 dmtimer3 */
+ if (pa == 0x4882c000) /* dra7 dmtimer15 */
return dmtimer_percpu_timer_init(np, 0);
- else if (pa == 0x48036000) /* dra7 dmtimer4 */
+ else if (pa == 0x4882e000) /* dra7 dmtimer16 */
return dmtimer_percpu_timer_init(np, 1);
return 0;
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 646ad385e490..ccac1c453080 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -358,7 +358,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
* other namespaces.
*/
if ((current_user_ns() != &init_user_ns) ||
- (task_active_pid_ns(current) != &init_pid_ns))
+ !task_is_in_init_pid_ns(current))
return;
/* Can only change if privileged. */
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index 7e0957eea094..869894b74741 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -90,10 +90,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv)
int err;
ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL);
- if (!ch) {
- err = -ENOMEM;
- goto err_alloc_ch;
- }
+ if (!ch)
+ return NULL;
counter = &ch->counter;
dev = &counter->dev;
@@ -123,9 +121,8 @@ err_chrdev_add:
err_ida_alloc:
kfree(ch);
-err_alloc_ch:
- return ERR_PTR(err);
+ return NULL;
}
EXPORT_SYMBOL_GPL(counter_alloc);
@@ -208,12 +205,12 @@ struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv
int err;
counter = counter_alloc(sizeof_priv);
- if (IS_ERR(counter))
- return counter;
+ if (!counter)
+ return NULL;
err = devm_add_action_or_reset(dev, devm_counter_put, counter);
if (err < 0)
- return ERR_PTR(err);
+ return NULL;
return counter;
}
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index 7cc4d1d523ea..04eac41dad33 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -19,6 +19,11 @@
#include "counter-sysfs.h"
+static inline struct counter_device *counter_from_dev(struct device *dev)
+{
+ return container_of(dev, struct counter_device, dev);
+}
+
/**
* struct counter_attribute - Counter sysfs attribute
* @dev_attr: device attribute for sysfs
@@ -90,7 +95,7 @@ static ssize_t counter_comp_u8_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
int err;
u8 data = 0;
@@ -122,7 +127,7 @@ static ssize_t counter_comp_u8_store(struct device *dev,
const char *buf, size_t len)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
int err;
bool bool_data = 0;
u8 data = 0;
@@ -158,7 +163,7 @@ static ssize_t counter_comp_u32_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
const struct counter_available *const avail = a->comp.priv;
int err;
u32 data = 0;
@@ -221,7 +226,7 @@ static ssize_t counter_comp_u32_store(struct device *dev,
const char *buf, size_t len)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
struct counter_count *const count = a->parent;
struct counter_synapse *const synapse = a->comp.priv;
const struct counter_available *const avail = a->comp.priv;
@@ -281,7 +286,7 @@ static ssize_t counter_comp_u64_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
int err;
u64 data = 0;
@@ -309,7 +314,7 @@ static ssize_t counter_comp_u64_store(struct device *dev,
const char *buf, size_t len)
{
const struct counter_attribute *const a = to_counter_attribute(attr);
- struct counter_device *const counter = dev_get_drvdata(dev);
+ struct counter_device *const counter = counter_from_dev(dev);
int err;
u64 data = 0;
diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 647505957d4f..35f38ae67fb1 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
TP_PROTO(unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity,
+ u64 freq,
+ u64 mperf,
+ u64 aperf,
+ u64 tsc,
unsigned int cpu_id,
bool changed,
bool fast_switch
@@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
TP_ARGS(min_perf,
target_perf,
capacity,
+ freq,
+ mperf,
+ aperf,
+ tsc,
cpu_id,
changed,
fast_switch
@@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
__field(unsigned long, min_perf)
__field(unsigned long, target_perf)
__field(unsigned long, capacity)
+ __field(unsigned long long, freq)
+ __field(unsigned long long, mperf)
+ __field(unsigned long long, aperf)
+ __field(unsigned long long, tsc)
__field(unsigned int, cpu_id)
__field(bool, changed)
__field(bool, fast_switch)
@@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
__entry->min_perf = min_perf;
__entry->target_perf = target_perf;
__entry->capacity = capacity;
+ __entry->freq = freq;
+ __entry->mperf = mperf;
+ __entry->aperf = aperf;
+ __entry->tsc = tsc;
__entry->cpu_id = cpu_id;
__entry->changed = changed;
__entry->fast_switch = fast_switch;
),
- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
+ TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
(unsigned long)__entry->min_perf,
(unsigned long)__entry->target_perf,
(unsigned long)__entry->capacity,
+ (unsigned long long)__entry->freq,
+ (unsigned long long)__entry->mperf,
+ (unsigned long long)__entry->aperf,
+ (unsigned long long)__entry->tsc,
(unsigned int)__entry->cpu_id,
(__entry->changed) ? "true" : "false",
(__entry->fast_switch) ? "true" : "false"
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ce75ed11f8e..7be38bc6a673 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -66,6 +66,18 @@ MODULE_PARM_DESC(shared_mem,
static struct cpufreq_driver amd_pstate_driver;
/**
+ * struct amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc: time stamp counter
+ */
+struct amd_aperf_mperf {
+ u64 aperf;
+ u64 mperf;
+ u64 tsc;
+};
+
+/**
* struct amd_cpudata - private CPU data for AMD P-State
* @cpu: CPU number
* @req: constraint request to apply
@@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
* @min_freq: the frequency that mapped to lowest_perf
* @nominal_freq: the frequency that mapped to nominal_perf
* @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
@@ -102,6 +117,10 @@ struct amd_cpudata {
u32 nominal_freq;
u32 lowest_nonlinear_freq;
+ struct amd_aperf_mperf cur;
+ struct amd_aperf_mperf prev;
+
+ u64 freq;
bool boost_supported;
};
@@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
max_perf, fast_switch);
}
+static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
+{
+ u64 aperf, mperf, tsc;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+ tsc = rdtsc();
+
+ if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
+ local_irq_restore(flags);
+ return false;
+ }
+
+ local_irq_restore(flags);
+
+ cpudata->cur.aperf = aperf;
+ cpudata->cur.mperf = mperf;
+ cpudata->cur.tsc = tsc;
+ cpudata->cur.aperf -= cpudata->prev.aperf;
+ cpudata->cur.mperf -= cpudata->prev.mperf;
+ cpudata->cur.tsc -= cpudata->prev.tsc;
+
+ cpudata->prev.aperf = aperf;
+ cpudata->prev.mperf = mperf;
+ cpudata->prev.tsc = tsc;
+
+ cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
+
+ return true;
+}
+
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
u32 des_perf, u32 max_perf, bool fast_switch)
{
@@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
value &= ~AMD_CPPC_MAX_PERF(~0L);
value |= AMD_CPPC_MAX_PERF(max_perf);
- trace_amd_pstate_perf(min_perf, des_perf, max_perf,
- cpudata->cpu, (value != prev), fast_switch);
+ if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
+ trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
+ cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
+ cpudata->cpu, (value != prev), fast_switch);
+ }
if (value == prev)
return;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b8d95536ee22..80f535cc8a75 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1518,6 +1518,10 @@ static int cpufreq_online(unsigned int cpu)
kobject_uevent(&policy->kobj, KOBJ_ADD);
+ /* Callback for handling stuff after policy is ready */
+ if (cpufreq_driver->ready)
+ cpufreq_driver->ready(policy);
+
if (cpufreq_thermal_control_enabled(cpufreq_driver))
policy->cdev = of_cpufreq_cooling_register(policy);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 08515f7e515f..b6bd0ff35323 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -146,7 +146,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
/************************** sysfs interface ************************/
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -161,7 +161,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -177,7 +177,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+static ssize_t down_threshold_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -195,7 +195,7 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf,
size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 63f7c219062b..0d42cf8b88d8 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
/* Common sysfs tunables */
/*
- * store_sampling_rate - update sampling rate effective immediately if needed.
+ * sampling_rate_store - update sampling rate effective immediately if needed.
*
* If new rate is smaller than the old, simply updating
* dbs.sampling_rate might not be appropriate. For example, if the
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
* This must be called with dbs_data->mutex held, otherwise traversing
* policy_dbs_list isn't safe.
*/
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -80,7 +80,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
return count;
}
-EXPORT_SYMBOL_GPL(store_sampling_rate);
+EXPORT_SYMBOL_GPL(sampling_rate_store);
/**
* gov_update_cpu_data - Update CPU load data.
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index bab8e6140377..a5a0bc3cc23e 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -51,7 +51,7 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
}
#define gov_show_one(_gov, file_name) \
-static ssize_t show_##file_name \
+static ssize_t file_name##_show \
(struct gov_attr_set *attr_set, char *buf) \
{ \
struct dbs_data *dbs_data = to_dbs_data(attr_set); \
@@ -60,7 +60,7 @@ static ssize_t show_##file_name \
}
#define gov_show_one_common(file_name) \
-static ssize_t show_##file_name \
+static ssize_t file_name##_show \
(struct gov_attr_set *attr_set, char *buf) \
{ \
struct dbs_data *dbs_data = to_dbs_data(attr_set); \
@@ -68,12 +68,10 @@ static ssize_t show_##file_name \
}
#define gov_attr_ro(_name) \
-static struct governor_attr _name = \
-__ATTR(_name, 0444, show_##_name, NULL)
+static struct governor_attr _name = __ATTR_RO(_name)
#define gov_attr_rw(_name) \
-static struct governor_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
+static struct governor_attr _name = __ATTR_RW(_name)
/* Common to all CPUs of a policy */
struct policy_dbs_info {
@@ -176,7 +174,7 @@ void od_register_powersave_bias_handler(unsigned int (*f)
(struct cpufreq_policy *, unsigned int, unsigned int),
unsigned int powersave_bias);
void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
size_t count);
void gov_update_cpu_data(struct dbs_data *dbs_data);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
index a6f365b9cc1a..771770ea0ed0 100644
--- a/drivers/cpufreq/cpufreq_governor_attr_set.c
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -8,11 +8,6 @@
#include "cpufreq_governor.h"
-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
-{
- return container_of(kobj, struct gov_attr_set, kobj);
-}
-
static inline struct governor_attr *to_gov_attr(struct attribute *attr)
{
return container_of(attr, struct governor_attr, attr);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6a41ea4729b8..e8fbf970ff07 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -202,7 +202,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
/************************** sysfs interface ************************/
static struct dbs_governor od_dbs_gov;
-static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf,
size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
return count;
}
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -237,7 +237,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -265,7 +265,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -290,7 +290,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
return count;
}
-static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+static ssize_t powersave_bias_store(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index bc7f7e6759bd..846bb3a78788 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1692,6 +1692,37 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
}
}
+static void intel_pstate_update_epp_defaults(struct cpudata *cpudata)
+{
+ cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+ /*
+ * If this CPU gen doesn't call for change in balance_perf
+ * EPP return.
+ */
+ if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE)
+ return;
+
+ /*
+ * If powerup EPP is something other than chipset default 0x80 and
+ * - is more performance oriented than 0x80 (default balance_perf EPP)
+ * - But less performance oriented than performance EPP
+ * then use this as new balance_perf EPP.
+ */
+ if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE &&
+ cpudata->epp_default > HWP_EPP_PERFORMANCE) {
+ epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default;
+ return;
+ }
+
+ /*
+ * Use hard coded value per gen to update the balance_perf
+ * and default EPP.
+ */
+ cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
+ intel_pstate_set_epp(cpudata, cpudata->epp_default);
+}
+
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt till we activate again */
@@ -1705,12 +1736,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
if (cpudata->epp_default >= 0)
return;
- if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) {
- cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
- } else {
- cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
- intel_pstate_set_epp(cpudata, cpudata->epp_default);
- }
+ intel_pstate_update_epp_defaults(cpudata);
}
static int atom_get_min_pstate(void)
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c538a153ee82..3e000e1a75c6 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
u32 nesting_level,
void *context, void **return_value)
{
- struct acpi_device *d;
+ struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle);
- if (acpi_bus_get_device(obj_handle, &d))
+ if (!d)
return 0;
*return_value = acpi_driver_data(d);
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 12ab4014af71..d289036beff2 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1172,14 +1172,14 @@ static int powernowk8_init(void)
unsigned int i, supported_cpus = 0;
int ret;
+ if (!x86_match_cpu(powernow_k8_ids))
+ return -ENODEV;
+
if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) {
__request_acpi_cpufreq();
return -ENODEV;
}
- if (!x86_match_cpu(powernow_k8_ids))
- return -ENODEV;
-
cpus_read_lock();
for_each_online_cpu(i) {
smp_call_function_single(i, check_supported_cpu, &ret, 1);
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 05f3d7876e44..effbb680b453 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -388,7 +388,7 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu);
ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq,
- IRQF_ONESHOT, data->irq_name, data);
+ IRQF_ONESHOT | IRQF_NO_AUTOEN, data->irq_name, data);
if (ret) {
dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret);
return 0;
@@ -542,6 +542,14 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
return 0;
}
+static void qcom_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ struct qcom_cpufreq_data *data = policy->driver_data;
+
+ if (data->throttle_irq >= 0)
+ enable_irq(data->throttle_irq);
+}
+
static struct freq_attr *qcom_cpufreq_hw_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
&cpufreq_freq_attr_scaling_boost_freqs,
@@ -561,6 +569,7 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = {
.fast_switch = qcom_cpufreq_hw_fast_switch,
.name = "qcom-cpufreq-hw",
.attr = qcom_cpufreq_hw_attr,
+ .ready = qcom_cpufreq_ready,
};
static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index fcc53215bac8..3a39a7f48b77 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -108,11 +108,11 @@ static int __init haltpoll_init(void)
if (boot_option_idle_override != IDLE_NO_OVERRIDE)
return -ENODEV;
- cpuidle_poll_state_init(drv);
-
if (!kvm_para_available() || !haltpoll_want())
return -ENODEV;
+ cpuidle_poll_state_init(drv);
+
ret = cpuidle_register_driver(drv);
if (ret < 0)
return ret;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4f705674f94f..7b2d138bc83e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -808,6 +808,16 @@ config CRYPTO_DEV_ZYNQMP_AES
accelerator. Select this if you want to use the ZynqMP module
for AES algorithms.
+config CRYPTO_DEV_ZYNQMP_SHA3
+ tristate "Support for Xilinx ZynqMP SHA3 hardware accelerator"
+ depends on ZYNQMP_FIRMWARE || COMPILE_TEST
+ select CRYPTO_SHA3
+ help
+ Xilinx ZynqMP has SHA3 engine used for secure hash calculation.
+ This driver interfaces with SHA3 hardware engine.
+ Select this if you want to use the ZynqMP module
+ for SHA3 hash computation.
+
source "drivers/crypto/chelsio/Kconfig"
source "drivers/crypto/virtio/Kconfig"
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 1fe5120eb966..0a4fff23d272 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
-obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/
+obj-y += xilinx/
obj-y += hisilicon/
obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
obj-y += keembay/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 54ae8d16e493..35e3cadccac2 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -11,6 +11,7 @@
* You could find a link for the datasheet in Documentation/arm/sunxi.rst
*/
+#include <linux/bottom_half.h>
#include <linux/crypto.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
@@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
flow = rctx->flow;
err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
+ local_bh_disable();
crypto_finalize_skcipher_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 88194718a806..859b7522faaa 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -9,6 +9,7 @@
*
* You could find the datasheet in Documentation/arm/sunxi.rst
*/
+#include <linux/bottom_half.h>
#include <linux/dma-mapping.h>
#include <linux/pm_runtime.h>
#include <linux/scatterlist.h>
@@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
theend:
kfree(buf);
kfree(result);
+ local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 9ef1c85c4aaa..554e400d41ca 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -11,6 +11,7 @@
* You could find a link for the datasheet in Documentation/arm/sunxi.rst
*/
+#include <linux/bottom_half.h>
#include <linux/crypto.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
@@ -274,7 +275,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar
struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
err = sun8i_ss_cipher(breq);
+ local_bh_disable();
crypto_finalize_skcipher_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 80e89066dbd1..319fe3279a71 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -30,6 +30,8 @@
static const struct ss_variant ss_a80_variant = {
.alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
},
+ .alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP,
+ },
.op_mode = { SS_OP_ECB, SS_OP_CBC,
},
.ss_clks = {
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 3c073eb3db03..1a71ed49d233 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -9,6 +9,7 @@
*
* You could find the datasheet in Documentation/arm/sunxi.rst
*/
+#include <linux/bottom_half.h>
#include <linux/dma-mapping.h>
#include <linux/pm_runtime.h>
#include <linux/scatterlist.h>
@@ -442,6 +443,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
theend:
kfree(pad);
kfree(result);
+ local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index c6865cbd334b..e79514fce731 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine,
struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
err = meson_cipher(breq);
+ local_bh_disable();
crypto_finalize_skcipher_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index fe0558403191..f72c6b3e4ad8 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2509,6 +2509,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x700:
case 0x500:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 1b13f601fd95..d1628112dacc 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2508,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x700:
case 0x510:
dd->caps.has_dma = 1;
dd->caps.has_dualbuff = 1;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index e30786ec9f2d..9fd7b8e439d2 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1130,6 +1130,7 @@ static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xf00) {
+ case 0x800:
case 0x700:
dd->caps.has_dma = 1;
dd->caps.has_cfb_3keys = 1;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 2e9c0d214363..9e7308e39b30 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitmap.h>
#include <linux/workqueue.h>
#include "nitrox_csr.h"
@@ -120,6 +121,7 @@ static void pf2vf_resp_handler(struct work_struct *work)
void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
{
+ DECLARE_BITMAP(csr, BITS_PER_TYPE(u64));
struct nitrox_vfdev *vfdev;
struct pf2vf_work *pfwork;
u64 value, reg_addr;
@@ -129,7 +131,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
/* loop for VF(0..63) */
reg_addr = NPS_PKT_MBOX_INT_LO;
value = nitrox_read_csr(ndev, reg_addr);
- for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+ bitmap_from_u64(csr, value);
+ for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
/* get the vfno from ring */
vfno = RING_TO_VFNO(i, ndev->iov.max_vf_queues);
vfdev = ndev->iov.vfdev + vfno;
@@ -151,7 +154,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
/* loop for VF(64..127) */
reg_addr = NPS_PKT_MBOX_INT_HI;
value = nitrox_read_csr(ndev, reg_addr);
- for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+ bitmap_from_u64(csr, value);
+ for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
/* get the vfno from ring */
vfno = RING_TO_VFNO(i + 64, ndev->iov.max_vf_queues);
vfdev = ndev->iov.vfdev + vfno;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
index ed174883c8e3..6bf088bcdd11 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_req.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -440,7 +440,7 @@ struct aqmq_command_s {
/**
* struct ctx_hdr - Book keeping data about the crypto context
* @pool: Pool used to allocate crypto context
- * @dma: Base DMA address of the cypto context
+ * @dma: Base DMA address of the crypto context
* @ctx_dma: Actual usable crypto context for NITROX
*/
struct ctx_hdr {
diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
index 812b4ac9afd6..dc5b7bf7e1fd 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -55,6 +55,11 @@ static const struct pci_device_id zip_id_table[] = {
{ 0, }
};
+static void zip_debugfs_init(void);
+static void zip_debugfs_exit(void);
+static int zip_register_compression_device(void);
+static void zip_unregister_compression_device(void);
+
void zip_reg_write(u64 val, u64 __iomem *addr)
{
writeq(val, addr);
@@ -235,6 +240,15 @@ static int zip_init_hw(struct zip_device *zip)
return 0;
}
+static void zip_reset(struct zip_device *zip)
+{
+ union zip_cmd_ctl cmd_ctl;
+
+ cmd_ctl.u_reg64 = 0x0ull;
+ cmd_ctl.s.reset = 1; /* Forces ZIP cores to do reset */
+ zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+}
+
static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
@@ -282,8 +296,21 @@ static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_release_regions;
+ /* Register with the Kernel Crypto Interface */
+ err = zip_register_compression_device();
+ if (err < 0) {
+ zip_err("ZIP: Kernel Crypto Registration failed\n");
+ goto err_register;
+ }
+
+ /* comp-decomp statistics are handled with debugfs interface */
+ zip_debugfs_init();
+
return 0;
+err_register:
+ zip_reset(zip);
+
err_release_regions:
if (zip->reg_base)
iounmap(zip->reg_base);
@@ -305,16 +332,17 @@ err_free_device:
static void zip_remove(struct pci_dev *pdev)
{
struct zip_device *zip = pci_get_drvdata(pdev);
- union zip_cmd_ctl cmd_ctl;
int q = 0;
if (!zip)
return;
+ zip_debugfs_exit();
+
+ zip_unregister_compression_device();
+
if (zip->reg_base) {
- cmd_ctl.u_reg64 = 0x0ull;
- cmd_ctl.s.reset = 1; /* Forces ZIP cores to do reset */
- zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+ zip_reset(zip);
iounmap(zip->reg_base);
}
@@ -585,7 +613,7 @@ DEFINE_SHOW_ATTRIBUTE(zip_regs);
/* Root directory for thunderx_zip debugfs entry */
static struct dentry *zip_debugfs_root;
-static void __init zip_debugfs_init(void)
+static void zip_debugfs_init(void)
{
if (!debugfs_initialized())
return;
@@ -604,7 +632,7 @@ static void __init zip_debugfs_init(void)
}
-static void __exit zip_debugfs_exit(void)
+static void zip_debugfs_exit(void)
{
debugfs_remove_recursive(zip_debugfs_root);
}
@@ -615,48 +643,7 @@ static void __exit zip_debugfs_exit(void) { }
#endif
/* debugfs - end */
-static int __init zip_init_module(void)
-{
- int ret;
-
- zip_msg("%s\n", DRV_NAME);
-
- ret = pci_register_driver(&zip_driver);
- if (ret < 0) {
- zip_err("ZIP: pci_register_driver() failed\n");
- return ret;
- }
-
- /* Register with the Kernel Crypto Interface */
- ret = zip_register_compression_device();
- if (ret < 0) {
- zip_err("ZIP: Kernel Crypto Registration failed\n");
- goto err_pci_unregister;
- }
-
- /* comp-decomp statistics are handled with debugfs interface */
- zip_debugfs_init();
-
- return ret;
-
-err_pci_unregister:
- pci_unregister_driver(&zip_driver);
- return ret;
-}
-
-static void __exit zip_cleanup_module(void)
-{
- zip_debugfs_exit();
-
- /* Unregister from the kernel crypto interface */
- zip_unregister_compression_device();
-
- /* Unregister this driver for pci zip devices */
- pci_unregister_driver(&zip_driver);
-}
-
-module_init(zip_init_module);
-module_exit(zip_cleanup_module);
+module_pci_driver(zip_driver);
MODULE_AUTHOR("Cavium Inc");
MODULE_DESCRIPTION("Cavium Inc ThunderX ZIP Driver");
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e6dcd8cedd53..bed331953ff9 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -69,7 +69,6 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
struct scatterlist *iv_sg = NULL;
unsigned int iv_len = 0;
- int ret;
if (!ctx->u.aes.key_len)
return -EINVAL;
@@ -104,9 +103,7 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
rctx->cmd.u.aes.src_len = req->cryptlen;
rctx->cmd.u.aes.dst = req->dst;
- ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
-
- return ret;
+ return ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
}
static int ccp_aes_encrypt(struct skcipher_request *req)
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index d718db224be4..7d4b4ad1db1f 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -632,6 +632,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
return 0;
}
+static void ccp_dma_release(struct ccp_device *ccp)
+{
+ struct ccp_dma_chan *chan;
+ struct dma_chan *dma_chan;
+ unsigned int i;
+
+ for (i = 0; i < ccp->cmd_q_count; i++) {
+ chan = ccp->ccp_dma_chan + i;
+ dma_chan = &chan->dma_chan;
+ tasklet_kill(&chan->cleanup_tasklet);
+ list_del_rcu(&dma_chan->device_node);
+ }
+}
+
int ccp_dmaengine_register(struct ccp_device *ccp)
{
struct ccp_dma_chan *chan;
@@ -736,6 +750,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
return 0;
err_reg:
+ ccp_dma_release(ccp);
kmem_cache_destroy(ccp->dma_desc_cache);
err_cache:
@@ -752,6 +767,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp)
return;
dma_async_device_unregister(dma_dev);
+ ccp_dma_release(ccp);
kmem_cache_destroy(ccp->dma_desc_cache);
kmem_cache_destroy(ccp->dma_cmd_cache);
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 8fd774a10edc..6ab93dfd478a 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error)
{
struct psp_device *psp = psp_master;
struct sev_device *sev;
- int rc, psp_ret;
+ int rc, psp_ret = -1;
int (*init_function)(int *error);
if (!psp || !psp->sev_data)
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index a5e041d9d2cf..11e0278c8631 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
{
int ret = 0;
+ if (!nbytes) {
+ *mapped_nents = 0;
+ *lbytes = 0;
+ *nents = 0;
+ return 0;
+ }
+
*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
if (*nents > max_sg_nents) {
*nents = 0;
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 78833491f534..309da6334a0a 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm)
&ctx_p->user.key_dma_addr);
/* Free key buffer in context */
- kfree_sensitive(ctx_p->user.key);
dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key);
+ kfree_sensitive(ctx_p->user.key);
}
struct tdes_keys {
diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index c1c2b1d86663..14d0d83d388d 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -23,8 +23,8 @@ static bool sl3516_ce_need_fallback(struct skcipher_request *areq)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
struct sl3516_ce_dev *ce = op->ce;
- struct scatterlist *in_sg = areq->src;
- struct scatterlist *out_sg = areq->dst;
+ struct scatterlist *in_sg;
+ struct scatterlist *out_sg;
struct scatterlist *sg;
if (areq->cryptlen == 0 || areq->cryptlen % 16) {
@@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a
struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
err = sl3516_ce_cipher(breq);
+ local_bh_disable();
crypto_finalize_skcipher_request(engine, breq, err);
+ local_bh_enable();
return 0;
}
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index c5b84a5ea350..453390044181 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3840,7 +3840,7 @@ static void qm_clear_queues(struct hisi_qm *qm)
for (i = 0; i < qm->qp_num; i++) {
qp = &qm->qp_array[i];
- if (qp->is_resetting)
+ if (qp->is_in_kernel && qp->is_resetting)
memset(qp->qdma.va, 0, qp->qdma.size);
}
@@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
static int qm_vf_read_qos(struct hisi_qm *qm)
{
int cnt = 0;
- int ret;
+ int ret = -EINVAL;
/* reset mailbox qos val */
qm->mb_qos = 0;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 6a45bd23b363..a91635c348b5 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -42,6 +42,8 @@
#define SEC_DE_OFFSET_V3 9
#define SEC_SCENE_OFFSET_V3 5
#define SEC_CKEY_OFFSET_V3 13
+#define SEC_CTR_CNT_OFFSET 25
+#define SEC_CTR_CNT_ROLLOVER 2
#define SEC_SRC_SGL_OFFSET_V3 11
#define SEC_DST_SGL_OFFSET_V3 14
#define SEC_CALG_OFFSET_V3 4
@@ -63,6 +65,7 @@
#define SEC_AUTH_CIPHER 0x1
#define SEC_MAX_MAC_LEN 64
#define SEC_MAX_AAD_LEN 65535
+#define SEC_MAX_CCM_AAD_LEN 65279
#define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH)
#define SEC_PBUF_SZ 512
@@ -237,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
if (unlikely(type != type_supported)) {
atomic64_inc(&dfx->err_bd_cnt);
- pr_err("err bd type [%d]\n", type);
+ pr_err("err bd type [%u]\n", type);
return;
}
@@ -641,13 +644,15 @@ static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm)
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
c_ctx->fallback = false;
+
+ /* Currently, only XTS mode need fallback tfm when using 192bit key */
if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ)))
return 0;
c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(c_ctx->fbtfm)) {
- pr_err("failed to alloc fallback tfm!\n");
+ pr_err("failed to alloc xts mode fallback tfm!\n");
return PTR_ERR(c_ctx->fbtfm);
}
@@ -808,7 +813,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
}
memcpy(c_ctx->c_key, key, keylen);
- if (c_ctx->fallback) {
+ if (c_ctx->fallback && c_ctx->fbtfm) {
ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
if (ret) {
dev_err(dev, "failed to set fallback skcipher key!\n");
@@ -1300,6 +1305,10 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
cipher = SEC_CIPHER_DEC;
sec_sqe3->c_icv_key |= cpu_to_le16(cipher);
+ /* Set the CTR counter mode is 128bit rollover */
+ sec_sqe3->auth_mac_key = cpu_to_le32((u32)SEC_CTR_CNT_ROLLOVER <<
+ SEC_CTR_CNT_OFFSET);
+
if (req->use_pbuf) {
bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3;
bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3;
@@ -1614,7 +1623,7 @@ static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir,
sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
} else {
- sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+ sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE2);
sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
}
sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen);
@@ -2032,13 +2041,12 @@ static int sec_skcipher_soft_crypto(struct sec_ctx *ctx,
struct skcipher_request *sreq, bool encrypt)
{
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+ SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
struct device *dev = ctx->dev;
int ret;
- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
-
if (!c_ctx->fbtfm) {
- dev_err(dev, "failed to check fallback tfm\n");
+ dev_err_ratelimited(dev, "the soft tfm isn't supported in the current system.\n");
return -EINVAL;
}
@@ -2219,6 +2227,10 @@ static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq)
}
if (c_mode == SEC_CMODE_CCM) {
+ if (unlikely(req->assoclen > SEC_MAX_CCM_AAD_LEN)) {
+ dev_err_ratelimited(dev, "CCM input aad parameter is too long!\n");
+ return -EINVAL;
+ }
ret = aead_iv_demension_check(req);
if (ret) {
dev_err(dev, "aead input iv param error!\n");
@@ -2256,7 +2268,6 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
if (ctx->sec->qm.ver == QM_HW_V2) {
if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
req->cryptlen <= authsize))) {
- dev_err(dev, "Kunpeng920 not support 0 length!\n");
ctx->a_ctx.fallback = true;
return -EINVAL;
}
@@ -2284,9 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
struct aead_request *aead_req,
bool encrypt)
{
- struct aead_request *subreq = aead_request_ctx(aead_req);
struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
struct device *dev = ctx->dev;
+ struct aead_request *subreq;
+ int ret;
/* Kunpeng920 aead mode not support input 0 size */
if (!a_ctx->fallback_aead_tfm) {
@@ -2294,6 +2306,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
return -EINVAL;
}
+ subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL);
+ if (!subreq)
+ return -ENOMEM;
+
aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
aead_request_set_callback(subreq, aead_req->base.flags,
aead_req->base.complete, aead_req->base.data);
@@ -2301,8 +2317,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
aead_req->cryptlen, aead_req->iv);
aead_request_set_ad(subreq, aead_req->assoclen);
- return encrypt ? crypto_aead_encrypt(subreq) :
- crypto_aead_decrypt(subreq);
+ if (encrypt)
+ ret = crypto_aead_encrypt(subreq);
+ else
+ ret = crypto_aead_decrypt(subreq);
+ aead_request_free(subreq);
+
+ return ret;
}
static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 9f71c358a6d3..5e039b50e9d4 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -354,8 +354,10 @@ struct sec_sqe3 {
* akey_len: 9~14 bits
* a_alg: 15~20 bits
* key_sel: 21~24 bits
- * updata_key: 25 bits
- * reserved: 26~31 bits
+ * ctr_count_mode/sm4_xts: 25~26 bits
+ * sva_prefetch: 27 bits
+ * key_wrap_num: 28~30 bits
+ * update_key: 31 bits
*/
__le32 auth_mac_key;
__le32 salt;
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 26d3ab1d308b..0b9906ff69e3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -90,6 +90,10 @@
SEC_USER1_WB_DATA_SSV)
#define SEC_USER1_SMMU_SVA (SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET)
#define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET)
+#define SEC_INTERFACE_USER_CTRL0_REG_V3 0x302220
+#define SEC_INTERFACE_USER_CTRL1_REG_V3 0x302224
+#define SEC_USER1_SMMU_NORMAL_V3 (BIT(23) | BIT(17) | BIT(11) | BIT(5))
+#define SEC_USER1_SMMU_MASK_V3 0xFF79E79E
#define SEC_CORE_INT_STATUS_M_ECC BIT(2)
#define SEC_PREFETCH_CFG 0x301130
@@ -335,6 +339,41 @@ static void sec_set_endian(struct hisi_qm *qm)
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
}
+static void sec_engine_sva_config(struct hisi_qm *qm)
+{
+ u32 reg;
+
+ if (qm->ver > QM_HW_V2) {
+ reg = readl_relaxed(qm->io_base +
+ SEC_INTERFACE_USER_CTRL0_REG_V3);
+ reg |= SEC_USER0_SMMU_NORMAL;
+ writel_relaxed(reg, qm->io_base +
+ SEC_INTERFACE_USER_CTRL0_REG_V3);
+
+ reg = readl_relaxed(qm->io_base +
+ SEC_INTERFACE_USER_CTRL1_REG_V3);
+ reg &= SEC_USER1_SMMU_MASK_V3;
+ reg |= SEC_USER1_SMMU_NORMAL_V3;
+ writel_relaxed(reg, qm->io_base +
+ SEC_INTERFACE_USER_CTRL1_REG_V3);
+ } else {
+ reg = readl_relaxed(qm->io_base +
+ SEC_INTERFACE_USER_CTRL0_REG);
+ reg |= SEC_USER0_SMMU_NORMAL;
+ writel_relaxed(reg, qm->io_base +
+ SEC_INTERFACE_USER_CTRL0_REG);
+ reg = readl_relaxed(qm->io_base +
+ SEC_INTERFACE_USER_CTRL1_REG);
+ reg &= SEC_USER1_SMMU_MASK;
+ if (qm->use_sva)
+ reg |= SEC_USER1_SMMU_SVA;
+ else
+ reg |= SEC_USER1_SMMU_NORMAL;
+ writel_relaxed(reg, qm->io_base +
+ SEC_INTERFACE_USER_CTRL1_REG);
+ }
+}
+
static void sec_open_sva_prefetch(struct hisi_qm *qm)
{
u32 val;
@@ -426,26 +465,18 @@ static int sec_engine_init(struct hisi_qm *qm)
reg |= (0x1 << SEC_TRNG_EN_SHIFT);
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
- reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
- reg |= SEC_USER0_SMMU_NORMAL;
- writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-
- reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
- reg &= SEC_USER1_SMMU_MASK;
- if (qm->use_sva && qm->ver == QM_HW_V2)
- reg |= SEC_USER1_SMMU_SVA;
- else
- reg |= SEC_USER1_SMMU_NORMAL;
- writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
+ sec_engine_sva_config(qm);
writel(SEC_SINGLE_PORT_MAX_TRANS,
qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS);
writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
- /* Enable sm4 extra mode, as ctr/ecb */
- writel_relaxed(SEC_BD_ERR_CHK_EN0,
- qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+ /* HW V2 enable sm4 extra mode, as ctr/ecb */
+ if (qm->ver < QM_HW_V3)
+ writel_relaxed(SEC_BD_ERR_CHK_EN0,
+ qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+
/* Enable sm4 xts mode multiple iv */
writel_relaxed(SEC_BD_ERR_CHK_EN1,
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig
index 9125199f1702..a48591af12d0 100644
--- a/drivers/crypto/marvell/Kconfig
+++ b/drivers/crypto/marvell/Kconfig
@@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT
select CRYPTO_SKCIPHER
select CRYPTO_HASH
select CRYPTO_AEAD
+ select NET_DEVLINK
help
This driver allows you to utilize the Marvell Cryptographic
Accelerator Unit(CPT) found in OcteonTX2 series of processors.
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
index ccbef01888d4..01c48ddc4eeb 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -1639,11 +1639,8 @@ static void swap_func(void *lptr, void *rptr, int size)
{
struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
- struct cpt_device_desc desc;
- desc = *ldesc;
- *ldesc = *rdesc;
- *rdesc = desc;
+ swap(*ldesc, *rdesc);
}
int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
index b681bd2dc6ad..36d72e35ebeb 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -204,7 +204,6 @@ static int alloc_command_queues(struct otx_cptvf *cptvf,
/* per queue initialization */
for (i = 0; i < cptvf->num_queues; i++) {
- c_size = 0;
rem_q_size = q_size;
first = NULL;
last = NULL;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index fb56824cb0a6..5012b7e669f0 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -157,5 +157,6 @@ struct otx2_cptlfs_info;
int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs);
int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs);
int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs);
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox);
#endif /* __OTX2_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
index 9074876d38e5..a317319696ef 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
@@ -202,3 +202,17 @@ int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs)
}
return ret;
}
+
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox)
+{
+ int err;
+
+ if (!otx2_mbox_nonempty(mbox, 0))
+ return 0;
+ otx2_mbox_msg_send(mbox, 0);
+ err = otx2_mbox_wait_for_rsp(mbox, 0);
+ if (err)
+ return err;
+
+ return otx2_mbox_check_rsp_msgs(mbox, 0);
+}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index b691b6c1d5c4..4fcaf61a70e3 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -26,12 +26,22 @@
*/
#define OTX2_CPT_INST_QLEN_MSGS ((OTX2_CPT_SIZE_DIV40 - 1) * 40)
+/*
+ * LDWB is getting incorrectly used when IQB_LDWB = 1 and CPT instruction
+ * queue has less than 320 free entries. So, increase HW instruction queue
+ * size by 320 and give 320 entries less for SW/NIX RX as a workaround.
+ */
+#define OTX2_CPT_INST_QLEN_EXTRA_BYTES (320 * OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_EXTRA_SIZE_DIV40 (320/40)
+
/* CPT instruction queue length in bytes */
-#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \
- OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_INST_QLEN_BYTES \
+ ((OTX2_CPT_SIZE_DIV40 * 40 * OTX2_CPT_INST_SIZE) + \
+ OTX2_CPT_INST_QLEN_EXTRA_BYTES)
/* CPT instruction group queue length in bytes */
-#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16)
+#define OTX2_CPT_INST_GRP_QLEN_BYTES \
+ ((OTX2_CPT_SIZE_DIV40 + OTX2_CPT_EXTRA_SIZE_DIV40) * 16)
/* CPT FC length in bytes */
#define OTX2_CPT_Q_FC_LEN 128
@@ -179,7 +189,8 @@ static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf)
{
union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 };
- lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40;
+ lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40 +
+ OTX2_CPT_EXTRA_SIZE_DIV40;
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_Q_SIZE, lf_q_size.u);
}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
index 05b2d9c650e1..936174b012e8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
@@ -46,6 +46,7 @@ struct otx2_cptpf_dev {
struct workqueue_struct *flr_wq;
struct cptpf_flr_work *flr_work;
+ struct mutex lock; /* serialize mailbox access */
unsigned long cap_flag;
u8 pf_id; /* RVU PF number */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 1720a5bb7016..a402ccfac557 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -140,10 +140,13 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
vf = flr_work - pf->flr_work;
+ mutex_lock(&pf->lock);
req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
sizeof(struct msg_rsp));
- if (!req)
+ if (!req) {
+ mutex_unlock(&pf->lock);
return;
+ }
req->sig = OTX2_MBOX_REQ_SIG;
req->id = MBOX_MSG_VF_FLR;
@@ -151,16 +154,19 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK;
otx2_cpt_send_mbox_msg(mbox, pf->pdev);
+ if (!otx2_cpt_sync_mbox_msg(&pf->afpf_mbox)) {
- if (vf >= 64) {
- reg = 1;
- vf = vf - 64;
+ if (vf >= 64) {
+ reg = 1;
+ vf = vf - 64;
+ }
+ /* Clear transaction pending register */
+ otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+ otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
}
- /* Clear transaction pending register */
- otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
- otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
+ mutex_unlock(&pf->lock);
}
static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg)
@@ -468,6 +474,7 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
goto error;
INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler);
+ mutex_init(&cptpf->lock);
return 0;
error:
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
index 186f1c1190c1..dee0aa60b698 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
@@ -18,9 +18,12 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
struct mbox_msghdr *msg;
int ret;
+ mutex_lock(&cptpf->lock);
msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size);
- if (msg == NULL)
+ if (msg == NULL) {
+ mutex_unlock(&cptpf->lock);
return -ENOMEM;
+ }
memcpy((uint8_t *)msg + sizeof(struct mbox_msghdr),
(uint8_t *)req + sizeof(struct mbox_msghdr), size);
@@ -29,15 +32,19 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
msg->sig = req->sig;
msg->ver = req->ver;
- otx2_mbox_msg_send(&cptpf->afpf_mbox, 0);
- ret = otx2_mbox_wait_for_rsp(&cptpf->afpf_mbox, 0);
+ ret = otx2_cpt_sync_mbox_msg(&cptpf->afpf_mbox);
+ /* Error code -EIO indicate there is a communication failure
+ * to the AF. Rest of the error codes indicate that AF processed
+ * VF messages and set the error codes in response messages
+ * (if any) so simply forward responses to VF.
+ */
if (ret == -EIO) {
- dev_err(&cptpf->pdev->dev, "RVU MBOX timeout.\n");
+ dev_warn(&cptpf->pdev->dev,
+ "AF not responding to VF%d messages\n", vf->vf_id);
+ mutex_unlock(&cptpf->lock);
return ret;
- } else if (ret) {
- dev_err(&cptpf->pdev->dev, "RVU MBOX error: %d.\n", ret);
- return -EFAULT;
}
+ mutex_unlock(&cptpf->lock);
return 0;
}
@@ -204,6 +211,10 @@ void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work)
if (err == -ENOMEM || err == -EIO)
break;
offset = msg->next_msgoff;
+ /* Write barrier required for VF responses which are handled by
+ * PF driver and not forwarded to AF.
+ */
+ smp_wmb();
}
/* Send mbox responses to VF */
if (mdev->num_msgs)
@@ -350,6 +361,8 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work)
process_afpf_mbox_msg(cptpf, msg);
offset = msg->next_msgoff;
+ /* Sync VF response ready to be sent */
+ smp_wmb();
mdev->msgs_acked++;
}
otx2_mbox_reset(afpf_mbox, 0);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 4c8ebdf671ca..9cba2f714c7e 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev,
delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
}
+#define PCI_DEVID_CN10K_RNM 0xA098
+#define RNM_ENTROPY_STATUS 0x8
+
+static void rnm_to_cpt_errata_fixup(struct device *dev)
+{
+ struct pci_dev *pdev;
+ void __iomem *base;
+ int timeout = 5000;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL);
+ if (!pdev)
+ return;
+
+ base = pci_ioremap_bar(pdev, 0);
+ if (!base)
+ goto put_pdev;
+
+ while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) {
+ cpu_relax();
+ udelay(1);
+ timeout--;
+ if (!timeout) {
+ dev_warn(dev, "RNM is not producing entropy\n");
+ break;
+ }
+ }
+
+ iounmap(base);
+
+put_pdev:
+ pci_dev_put(pdev);
+}
+
int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type)
{
@@ -1111,6 +1144,7 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} };
struct pci_dev *pdev = cptpf->pdev;
struct fw_info_t fw_info;
+ u64 reg_val;
int ret = 0;
mutex_lock(&eng_grps->lock);
@@ -1189,9 +1223,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
if (is_dev_otx2(pdev))
goto unlock;
+
+ /*
+ * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing
+ * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata.
+ */
+ rnm_to_cpt_errata_fixup(&pdev->dev);
+
/*
* Configure engine group mask to allow context prefetching
- * for the groups.
+ * for the groups and enable random number request, to enable
+ * CPT to request random numbers from RNM.
*/
otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL,
OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16),
@@ -1203,6 +1245,18 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
*/
otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER,
CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0);
+
+ /*
+ * Set CPT_AF_DIAG[FLT_DIS], as a workaround for HW errata, when
+ * CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM
+ * encounters a fault/poison, a rare case may result in
+ * unpredictable data being delivered to a CPT engine.
+ */
+ otx2_cpt_read_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, &reg_val,
+ BLKADDR_CPT0);
+ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG,
+ reg_val | BIT_ULL(24), BLKADDR_CPT0);
+
mutex_unlock(&eng_grps->lock);
return 0;
@@ -1753,7 +1807,6 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf)
char engs_info[2 * OTX2_CPT_NAME_LENGTH];
struct otx2_cpt_eng_grp_info *grp;
struct otx2_cpt_engs_rsvd *engs;
- u32 mask[4];
int i, j;
pr_debug("Engine groups global info");
@@ -1785,6 +1838,8 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf)
for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) {
engs = &grp->engs[j];
if (engs->type) {
+ u32 mask[5] = { };
+
get_engs_info(grp, engs_info,
2 * OTX2_CPT_NAME_LENGTH, j);
pr_debug("Slot%d: %s", j, engs_info);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
index 2748a3327e39..f8f8542ce3e4 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
@@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void)
{
int i, err = 0;
- if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
- for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
- otx2_cpt_skciphers[i].base.cra_flags &=
- ~CRYPTO_ALG_DEAD;
-
- err = crypto_register_skciphers(otx2_cpt_skciphers,
- ARRAY_SIZE(otx2_cpt_skciphers));
- if (err)
- return err;
- }
+ for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
+ otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+ err = crypto_register_skciphers(otx2_cpt_skciphers,
+ ARRAY_SIZE(otx2_cpt_skciphers));
+ if (err)
+ return err;
for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++)
otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index d19e5ffb5104..d6f9e2fe863d 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
}
- for_each_sg(req->src, src, sg_nents(src), i) {
+ for_each_sg(req->src, src, sg_nents(req->src), i) {
src_buf = sg_virt(src);
len = sg_dma_len(src);
tlen += len;
diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
index 4e304f6081e4..7584a34ba88c 100644
--- a/drivers/crypto/nx/nx-common-pseries.c
+++ b/drivers/crypto/nx/nx-common-pseries.c
@@ -962,7 +962,7 @@ static struct attribute *nx842_sysfs_entries[] = {
NULL,
};
-static struct attribute_group nx842_attribute_group = {
+static const struct attribute_group nx842_attribute_group = {
.name = NULL, /* put in device directory */
.attrs = nx842_sysfs_entries,
};
@@ -992,7 +992,7 @@ static struct attribute *nxcop_caps_sysfs_entries[] = {
NULL,
};
-static struct attribute_group nxcop_caps_attr_group = {
+static const struct attribute_group nxcop_caps_attr_group = {
.name = "nx_gzip_caps",
.attrs = nxcop_caps_sysfs_entries,
};
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index a196bb8b1701..581211a92628 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1093,7 +1093,7 @@ static struct attribute *omap_aes_attrs[] = {
NULL,
};
-static struct attribute_group omap_aes_attr_group = {
+static const struct attribute_group omap_aes_attr_group = {
.attrs = omap_aes_attrs,
};
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index f6bf53c00b61..4b37dc69a50c 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2045,7 +2045,7 @@ static struct attribute *omap_sham_attrs[] = {
NULL,
};
-static struct attribute_group omap_sham_attr_group = {
+static const struct attribute_group omap_sham_attr_group = {
.attrs = omap_sham_attrs,
};
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 6d10edc40aca..fb5970a68484 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -6,6 +6,7 @@
#include <adf_common_drv.h>
#include <adf_gen4_hw_data.h>
#include <adf_gen4_pfvf.h>
+#include <adf_gen4_pm.h>
#include "adf_4xxx_hw_data.h"
#include "icp_qat_hw.h"
@@ -52,7 +53,7 @@ static const char *const dev_cfg_services[] = {
static int get_service_enabled(struct adf_accel_dev *accel_dev)
{
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
- u32 ret;
+ int ret;
ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
ADF_SERVICES_ENABLED, services);
@@ -229,7 +230,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
void __iomem *csr = misc_bar->virt_addr;
/* Enable all in errsou3 except VFLR notification on host */
- ADF_CSR_WR(csr, ADF_4XXX_ERRMSK3, ADF_4XXX_VFLNOTIFY);
+ ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY);
}
static void adf_enable_ints(struct adf_accel_dev *accel_dev)
@@ -256,19 +257,19 @@ static int adf_init_device(struct adf_accel_dev *accel_dev)
addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr;
/* Temporarily mask PM interrupt */
- csr = ADF_CSR_RD(addr, ADF_4XXX_ERRMSK2);
- csr |= ADF_4XXX_PM_SOU;
- ADF_CSR_WR(addr, ADF_4XXX_ERRMSK2, csr);
+ csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2);
+ csr |= ADF_GEN4_PM_SOU;
+ ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr);
/* Set DRV_ACTIVE bit to power up the device */
- ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE);
+ ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE);
/* Poll status register to make sure the device is powered up */
ret = read_poll_timeout(ADF_CSR_RD, status,
- status & ADF_4XXX_PM_INIT_STATE,
- ADF_4XXX_PM_POLL_DELAY_US,
- ADF_4XXX_PM_POLL_TIMEOUT_US, true, addr,
- ADF_4XXX_PM_STATUS);
+ status & ADF_GEN4_PM_INIT_STATE,
+ ADF_GEN4_PM_POLL_DELAY_US,
+ ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr,
+ ADF_GEN4_PM_STATUS);
if (ret)
dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n");
@@ -354,6 +355,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
+ hw_data->enable_pm = adf_gen4_enable_pm;
+ hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
index 12e4fb9b40ce..1034752845ca 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -39,20 +39,6 @@
#define ADF_4XXX_NUM_RINGS_PER_BANK 2
#define ADF_4XXX_NUM_BANKS_PER_VF 4
-/* Error source registers */
-#define ADF_4XXX_ERRSOU0 (0x41A200)
-#define ADF_4XXX_ERRSOU1 (0x41A204)
-#define ADF_4XXX_ERRSOU2 (0x41A208)
-#define ADF_4XXX_ERRSOU3 (0x41A20C)
-
-/* Error source mask registers */
-#define ADF_4XXX_ERRMSK0 (0x41A210)
-#define ADF_4XXX_ERRMSK1 (0x41A214)
-#define ADF_4XXX_ERRMSK2 (0x41A218)
-#define ADF_4XXX_ERRMSK3 (0x41A21C)
-
-#define ADF_4XXX_VFLNOTIFY BIT(7)
-
/* Arbiter configuration */
#define ADF_4XXX_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0))
#define ADF_4XXX_ARB_OFFSET (0x0)
@@ -63,16 +49,6 @@
#define ADF_4XXX_ADMINMSGLR_OFFSET (0x500578)
#define ADF_4XXX_MAILBOX_BASE_OFFSET (0x600970)
-/* Power management */
-#define ADF_4XXX_PM_POLL_DELAY_US 20
-#define ADF_4XXX_PM_POLL_TIMEOUT_US USEC_PER_SEC
-#define ADF_4XXX_PM_STATUS (0x50A00C)
-#define ADF_4XXX_PM_INTERRUPT (0x50A028)
-#define ADF_4XXX_PM_DRV_ACTIVE BIT(20)
-#define ADF_4XXX_PM_INIT_STATE BIT(21)
-/* Power management source in ERRSOU2 and ERRMSK2 */
-#define ADF_4XXX_PM_SOU BIT(18)
-
/* Firmware Binaries */
#define ADF_4XXX_FW "qat_4xxx.bin"
#define ADF_4XXX_MMP "qat_4xxx_mmp.bin"
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a6c78b9c730b..fa4c350c1bf9 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -75,6 +75,13 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
if (ret)
goto err;
+ /* Temporarily set the number of crypto instances to zero to avoid
+ * registering the crypto algorithms.
+ * This will be removed when the algorithms will support the
+ * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+ */
+ instances = 0;
+
for (i = 0; i < instances; i++) {
val = i;
bank = i * 2;
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 7e191a42a5c7..f25a6c8edfc7 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
adf_hw_arbiter.o \
adf_gen2_hw_data.o \
adf_gen4_hw_data.o \
+ adf_gen4_pm.o \
qat_crypto.o \
qat_algs.o \
qat_asym_algs.o \
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 2d4cd7c7cf33..a03c6cf72331 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -184,6 +184,8 @@ struct adf_hw_device_data {
void (*exit_arb)(struct adf_accel_dev *accel_dev);
const u32 *(*get_arb_mapping)(void);
int (*init_device)(struct adf_accel_dev *accel_dev);
+ int (*enable_pm)(struct adf_accel_dev *accel_dev);
+ bool (*handle_pm_interrupt)(struct adf_accel_dev *accel_dev);
void (*disable_iov)(struct adf_accel_dev *accel_dev);
void (*configure_iov_threads)(struct adf_accel_dev *accel_dev,
bool enable);
diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c
index 498eb6f690e3..3b6184c35081 100644
--- a/drivers/crypto/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/qat/qat_common/adf_admin.c
@@ -251,6 +251,43 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev)
}
EXPORT_SYMBOL_GPL(adf_send_admin_init);
+/**
+ * adf_init_admin_pm() - Function sends PM init message to FW
+ * @accel_dev: Pointer to acceleration device.
+ * @idle_delay: QAT HW idle time before power gating is initiated.
+ * 000 - 64us
+ * 001 - 128us
+ * 010 - 256us
+ * 011 - 512us
+ * 100 - 1ms
+ * 101 - 2ms
+ * 110 - 4ms
+ * 111 - 8ms
+ *
+ * Function sends to the FW the admin init message for the PM state
+ * configuration.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct icp_qat_fw_init_admin_resp resp = {0};
+ struct icp_qat_fw_init_admin_req req = {0};
+ u32 ae_mask = hw_data->admin_ae_mask;
+
+ if (!accel_dev->admin) {
+ dev_err(&GET_DEV(accel_dev), "adf_admin is not available\n");
+ return -EFAULT;
+ }
+
+ req.cmd_id = ICP_QAT_FW_PM_STATE_CONFIG;
+ req.idle_filter = idle_delay;
+
+ return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+EXPORT_SYMBOL_GPL(adf_init_admin_pm);
+
int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
{
struct adf_admin_comms *admin;
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 76f4f96ec5eb..e8c9b77c0d66 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -102,6 +102,7 @@ void adf_exit_aer(void);
int adf_init_admin_comms(struct adf_accel_dev *accel_dev);
void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
int adf_send_admin_init(struct adf_accel_dev *accel_dev);
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
int adf_init_arb(struct adf_accel_dev *accel_dev);
void adf_exit_arb(struct adf_accel_dev *accel_dev);
void adf_update_ring_arb(struct adf_etr_ring_data *ring);
@@ -188,6 +189,9 @@ int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle,
void *addr_ptr, u32 mem_size, char *obj_name);
int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle,
unsigned int cfg_ae_mask);
+int adf_init_misc_wq(void);
+void adf_exit_misc_wq(void);
+bool adf_misc_wq_queue_work(struct work_struct *work);
#if defined(CONFIG_PCI_IOV)
int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 6f64aa693146..e8ac932bbaab 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -419,6 +419,9 @@ static int __init adf_register_ctl_device_driver(void)
if (adf_chr_drv_create())
goto err_chr_dev;
+ if (adf_init_misc_wq())
+ goto err_misc_wq;
+
if (adf_init_aer())
goto err_aer;
@@ -440,6 +443,8 @@ err_vf_wq:
err_pf_wq:
adf_exit_aer();
err_aer:
+ adf_exit_misc_wq();
+err_misc_wq:
adf_chr_drv_destroy();
err_chr_dev:
mutex_destroy(&adf_ctl_lock);
@@ -449,6 +454,7 @@ err_chr_dev:
static void __exit adf_unregister_ctl_device_driver(void)
{
adf_chr_drv_destroy();
+ adf_exit_misc_wq();
adf_exit_aer();
adf_exit_vf_wq();
adf_exit_pf_wq();
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
index f0f71ca44ca3..43b8f864806b 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
@@ -122,6 +122,20 @@ do { \
#define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0)
#define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4)
+/* Error source registers */
+#define ADF_GEN4_ERRSOU0 (0x41A200)
+#define ADF_GEN4_ERRSOU1 (0x41A204)
+#define ADF_GEN4_ERRSOU2 (0x41A208)
+#define ADF_GEN4_ERRSOU3 (0x41A20C)
+
+/* Error source mask registers */
+#define ADF_GEN4_ERRMSK0 (0x41A210)
+#define ADF_GEN4_ERRMSK1 (0x41A214)
+#define ADF_GEN4_ERRMSK2 (0x41A218)
+#define ADF_GEN4_ERRMSK3 (0x41A21C)
+
+#define ADF_GEN4_VFLNOTIFY BIT(7)
+
void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
index 8efbedf63bc8..d80d493a7756 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
@@ -9,15 +9,12 @@
#include "adf_pfvf_pf_proto.h"
#include "adf_pfvf_utils.h"
-#define ADF_4XXX_MAX_NUM_VFS 16
-
#define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20))
#define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20))
/* VF2PF interrupt source registers */
-#define ADF_4XXX_VM2PF_SOU(i) (0x41A180 + ((i) * 4))
-#define ADF_4XXX_VM2PF_MSK(i) (0x41A1C0 + ((i) * 4))
-#define ADF_4XXX_VM2PF_INT_EN_MSK BIT(0)
+#define ADF_4XXX_VM2PF_SOU 0x41A180
+#define ADF_4XXX_VM2PF_MSK 0x41A1C0
#define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2
#define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F
@@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
{
- int i;
u32 sou, mask;
- int num_csrs = ADF_4XXX_MAX_NUM_VFS;
- u32 vf_mask = 0;
- for (i = 0; i < num_csrs; i++) {
- sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i));
- mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i));
- sou &= ~mask;
- vf_mask |= sou << i;
- }
+ sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
+ mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
- return vf_mask;
+ return sou & ~mask;
}
static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
{
- int num_csrs = ADF_4XXX_MAX_NUM_VFS;
- unsigned long mask = vf_mask;
unsigned int val;
- int i;
-
- for_each_set_bit(i, &mask, num_csrs) {
- unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
- val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK;
- ADF_CSR_WR(pmisc_addr, offset, val);
- }
+ val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
}
static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
{
- int num_csrs = ADF_4XXX_MAX_NUM_VFS;
- unsigned long mask = vf_mask;
unsigned int val;
- int i;
-
- for_each_set_bit(i, &mask, num_csrs) {
- unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
- val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK;
- ADF_CSR_WR(pmisc_addr, offset, val);
- }
+ val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask;
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
}
static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev,
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.c b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
new file mode 100644
index 000000000000..7037c0892a8a
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2022 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_gen4_pm.h"
+#include "adf_cfg_strings.h"
+#include "icp_qat_fw_init_admin.h"
+#include "adf_gen4_hw_data.h"
+#include "adf_cfg.h"
+
+enum qat_pm_host_msg {
+ PM_NO_CHANGE = 0,
+ PM_SET_MIN,
+};
+
+struct adf_gen4_pm_data {
+ struct work_struct pm_irq_work;
+ struct adf_accel_dev *accel_dev;
+ u32 pm_int_sts;
+};
+
+static int send_host_msg(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+ u32 msg;
+
+ msg = ADF_CSR_RD(pmisc, ADF_GEN4_PM_HOST_MSG);
+ if (msg & ADF_GEN4_PM_MSG_PENDING)
+ return -EBUSY;
+
+ /* Send HOST_MSG */
+ msg = FIELD_PREP(ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK, PM_SET_MIN);
+ msg |= ADF_GEN4_PM_MSG_PENDING;
+ ADF_CSR_WR(pmisc, ADF_GEN4_PM_HOST_MSG, msg);
+
+ /* Poll status register to make sure the HOST_MSG has been processed */
+ return read_poll_timeout(ADF_CSR_RD, msg,
+ !(msg & ADF_GEN4_PM_MSG_PENDING),
+ ADF_GEN4_PM_MSG_POLL_DELAY_US,
+ ADF_GEN4_PM_POLL_TIMEOUT_US, true, pmisc,
+ ADF_GEN4_PM_HOST_MSG);
+}
+
+static void pm_bh_handler(struct work_struct *work)
+{
+ struct adf_gen4_pm_data *pm_data =
+ container_of(work, struct adf_gen4_pm_data, pm_irq_work);
+ struct adf_accel_dev *accel_dev = pm_data->accel_dev;
+ void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+ u32 pm_int_sts = pm_data->pm_int_sts;
+ u32 val;
+
+ /* PM Idle interrupt */
+ if (pm_int_sts & ADF_GEN4_PM_IDLE_STS) {
+ /* Issue host message to FW */
+ if (send_host_msg(accel_dev))
+ dev_warn_ratelimited(&GET_DEV(accel_dev),
+ "Failed to send host msg to FW\n");
+ }
+
+ /* Clear interrupt status */
+ ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, pm_int_sts);
+
+ /* Reenable PM interrupt */
+ val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+ val &= ~ADF_GEN4_PM_SOU;
+ ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+ kfree(pm_data);
+}
+
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+ struct adf_gen4_pm_data *pm_data = NULL;
+ u32 errsou2;
+ u32 errmsk2;
+ u32 val;
+
+ /* Only handle the interrupt triggered by PM */
+ errmsk2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+ if (errmsk2 & ADF_GEN4_PM_SOU)
+ return false;
+
+ errsou2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRSOU2);
+ if (!(errsou2 & ADF_GEN4_PM_SOU))
+ return false;
+
+ /* Disable interrupt */
+ val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+ val |= ADF_GEN4_PM_SOU;
+ ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+ val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+
+ pm_data = kzalloc(sizeof(*pm_data), GFP_ATOMIC);
+ if (!pm_data)
+ return false;
+
+ pm_data->pm_int_sts = val;
+ pm_data->accel_dev = accel_dev;
+
+ INIT_WORK(&pm_data->pm_irq_work, pm_bh_handler);
+ adf_misc_wq_queue_work(&pm_data->pm_irq_work);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_handle_pm_interrupt);
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+ int ret;
+ u32 val;
+
+ ret = adf_init_admin_pm(accel_dev, ADF_GEN4_PM_DEFAULT_IDLE_FILTER);
+ if (ret)
+ return ret;
+
+ /* Enable default PM interrupts: IDLE, THROTTLE */
+ val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+ val |= ADF_GEN4_PM_INT_EN_DEFAULT;
+
+ /* Clear interrupt status */
+ val |= ADF_GEN4_PM_INT_STS_MASK;
+ ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, val);
+
+ /* Unmask PM Interrupt */
+ val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+ val &= ~ADF_GEN4_PM_SOU;
+ ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_enable_pm);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.h b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
new file mode 100644
index 000000000000..f8f8a9ee29e5
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN4_PM_H
+#define ADF_GEN4_PM_H
+
+#include "adf_accel_devices.h"
+
+/* Power management registers */
+#define ADF_GEN4_PM_HOST_MSG (0x50A01C)
+
+/* Power management */
+#define ADF_GEN4_PM_POLL_DELAY_US 20
+#define ADF_GEN4_PM_POLL_TIMEOUT_US USEC_PER_SEC
+#define ADF_GEN4_PM_MSG_POLL_DELAY_US (10 * USEC_PER_MSEC)
+#define ADF_GEN4_PM_STATUS (0x50A00C)
+#define ADF_GEN4_PM_INTERRUPT (0x50A028)
+
+/* Power management source in ERRSOU2 and ERRMSK2 */
+#define ADF_GEN4_PM_SOU BIT(18)
+
+#define ADF_GEN4_PM_IDLE_INT_EN BIT(18)
+#define ADF_GEN4_PM_THROTTLE_INT_EN BIT(19)
+#define ADF_GEN4_PM_DRV_ACTIVE BIT(20)
+#define ADF_GEN4_PM_INIT_STATE BIT(21)
+#define ADF_GEN4_PM_INT_EN_DEFAULT (ADF_GEN4_PM_IDLE_INT_EN | \
+ ADF_GEN4_PM_THROTTLE_INT_EN)
+
+#define ADF_GEN4_PM_THR_STS BIT(0)
+#define ADF_GEN4_PM_IDLE_STS BIT(1)
+#define ADF_GEN4_PM_FW_INT_STS BIT(2)
+#define ADF_GEN4_PM_INT_STS_MASK (ADF_GEN4_PM_THR_STS | \
+ ADF_GEN4_PM_IDLE_STS | \
+ ADF_GEN4_PM_FW_INT_STS)
+
+#define ADF_GEN4_PM_MSG_PENDING BIT(0)
+#define ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK GENMASK(28, 1)
+
+#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER (0x0)
+#define ADF_GEN4_PM_MAX_IDLE_FILTER (0x7)
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev);
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev);
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 2edc63c6b6ca..c2c718f1b489 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -181,6 +181,12 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
if (hw_data->set_ssm_wdtimer)
hw_data->set_ssm_wdtimer(accel_dev);
+ /* Enable Power Management */
+ if (hw_data->enable_pm && hw_data->enable_pm(accel_dev)) {
+ dev_err(&GET_DEV(accel_dev), "Failed to configure Power Management\n");
+ return -EFAULT;
+ }
+
list_for_each(list_itr, &service_table) {
service = list_entry(list_itr, struct service_hndl, list);
if (service->event_hld(accel_dev, ADF_EVENT_START)) {
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index 4ca482aa69f7..a35149f8bf1e 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -16,6 +16,7 @@
#include "adf_transport_internal.h"
#define ADF_MAX_NUM_VFS 32
+static struct workqueue_struct *adf_misc_wq;
static int adf_enable_msix(struct adf_accel_dev *accel_dev)
{
@@ -123,6 +124,17 @@ static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev)
}
#endif /* CONFIG_PCI_IOV */
+static bool adf_handle_pm_int(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+ if (hw_data->handle_pm_interrupt &&
+ hw_data->handle_pm_interrupt(accel_dev))
+ return true;
+
+ return false;
+}
+
static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
{
struct adf_accel_dev *accel_dev = dev_ptr;
@@ -133,6 +145,9 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
return IRQ_HANDLED;
#endif /* CONFIG_PCI_IOV */
+ if (adf_handle_pm_int(accel_dev))
+ return IRQ_HANDLED;
+
dev_dbg(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n",
accel_dev->accel_id);
@@ -341,3 +356,30 @@ err_out:
return ret;
}
EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
+
+/**
+ * adf_init_misc_wq() - Init misc workqueue
+ *
+ * Function init workqueue 'qat_misc_wq' for general purpose.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int __init adf_init_misc_wq(void)
+{
+ adf_misc_wq = alloc_workqueue("qat_misc_wq", WQ_MEM_RECLAIM, 0);
+
+ return !adf_misc_wq ? -ENOMEM : 0;
+}
+
+void adf_exit_misc_wq(void)
+{
+ if (adf_misc_wq)
+ destroy_workqueue(adf_misc_wq);
+
+ adf_misc_wq = NULL;
+}
+
+bool adf_misc_wq_queue_work(struct work_struct *work)
+{
+ return queue_work(adf_misc_wq, work);
+}
diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
index 14b222691c9c..1141258db4b6 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
@@ -96,7 +96,7 @@ int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- struct capabilities_v3 cap_msg = { { 0 }, };
+ struct capabilities_v3 cap_msg = { 0 };
unsigned int len = sizeof(cap_msg);
if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_CAPABILITIES)
@@ -141,7 +141,7 @@ int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
int adf_vf2pf_get_ring_to_svc(struct adf_accel_dev *accel_dev)
{
- struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, };
+ struct ring_to_svc_map_v1 rts_map_msg = { 0 };
unsigned int len = sizeof(rts_map_msg);
if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_RING_TO_SVC_MAP)
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
index afe59a7684ac..56cb827f93ea 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
@@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id {
ICP_QAT_FW_HEARTBEAT_SYNC = 7,
ICP_QAT_FW_HEARTBEAT_GET = 8,
ICP_QAT_FW_COMP_CAPABILITY_GET = 9,
+ ICP_QAT_FW_PM_STATE_CONFIG = 128,
};
enum icp_qat_fw_init_admin_resp_status {
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 7234c4940fae..67c9588e89df 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -161,6 +161,13 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev)
if (ret)
goto err;
+ /* Temporarily set the number of crypto instances to zero to avoid
+ * registering the crypto algorithms.
+ * This will be removed when the algorithms will support the
+ * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+ */
+ instances = 0;
+
for (i = 0; i < instances; i++) {
val = i;
snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 2026cc6be8f0..6356402a2c9e 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -387,7 +387,9 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle,
page = image->page;
for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) {
- if (!test_bit(ae, (unsigned long *)&uof_image->ae_assigned))
+ unsigned long ae_assigned = uof_image->ae_assigned;
+
+ if (!test_bit(ae, &ae_assigned))
continue;
if (!test_bit(ae, &cfg_ae_mask))
@@ -664,8 +666,9 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae)
continue;
for (i = 0; i < obj_handle->uimage_num; i++) {
- if (!test_bit(ae, (unsigned long *)
- &obj_handle->ae_uimage[i].img_ptr->ae_assigned))
+ unsigned long ae_assigned = obj_handle->ae_uimage[i].img_ptr->ae_assigned;
+
+ if (!test_bit(ae, &ae_assigned))
continue;
mflag = 1;
if (qat_uclo_init_ae_data(obj_handle, ae, i))
diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index 99ba8d51d102..11f30fd48c14 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -8,6 +8,7 @@
#include <linux/clk.h>
#include <linux/crypto.h>
#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
@@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
{
unsigned int currsize = 0;
u32 val;
+ int ret;
/* read random data from hardware */
do {
- val = readl_relaxed(rng->base + PRNG_STATUS);
- if (!(val & PRNG_STATUS_DATA_AVAIL))
- break;
+ ret = readl_poll_timeout(rng->base + PRNG_STATUS, val,
+ val & PRNG_STATUS_DATA_AVAIL,
+ 200, 10000);
+ if (ret)
+ return ret;
val = readl_relaxed(rng->base + PRNG_DATA_OUT);
if (!val)
- break;
+ return -EINVAL;
if ((max - currsize) >= WORD_SZ) {
memcpy(data, &val, WORD_SZ);
@@ -61,11 +65,10 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
} else {
/* copy only remaining bytes */
memcpy(data, &val, max - currsize);
- break;
}
} while (currsize < max);
- return currsize;
+ return 0;
}
static int qcom_rng_generate(struct crypto_rng *tfm,
@@ -87,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm,
mutex_unlock(&rng->lock);
clk_disable_unprepare(rng->clk);
- return 0;
+ return ret;
}
static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed,
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 1cece1a7d3f0..5bbf0d2722e1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
.exit = rk_ablk_exit_tfm,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
.setkey = rk_tdes_setkey,
.encrypt = rk_des3_ede_ecb_encrypt,
.decrypt = rk_des3_ede_ecb_decrypt,
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 97277b7150cb..5a57c9afd8c8 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1264,7 +1264,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
dev_dbg(dev, "[%s]", __func__);
- device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+ device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
if (!device_data) {
ret = -ENOMEM;
goto out;
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 51a6e1a42434..5157c118d642 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1658,7 +1658,7 @@ static int ux500_hash_probe(struct platform_device *pdev)
struct hash_device_data *device_data;
struct device *dev = &pdev->dev;
- device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+ device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
if (!device_data) {
ret = -ENOMEM;
goto out;
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
index c85fab7ef0bd..b2c28b87f14b 100644
--- a/drivers/crypto/vmx/Kconfig
+++ b/drivers/crypto/vmx/Kconfig
@@ -2,7 +2,11 @@
config CRYPTO_DEV_VMX_ENCRYPT
tristate "Encryption acceleration support on P8 CPU"
depends on CRYPTO_DEV_VMX
+ select CRYPTO_AES
+ select CRYPTO_CBC
+ select CRYPTO_CTR
select CRYPTO_GHASH
+ select CRYPTO_XTS
default m
help
Support for VMX cryptographic acceleration instructions on Power8 CPU.
diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile
index 534e32daf76a..730feff5b5f2 100644
--- a/drivers/crypto/xilinx/Makefile
+++ b/drivers/crypto/xilinx/Makefile
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_SHA3) += zynqmp-sha.o
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
new file mode 100644
index 000000000000..43ff170ff1c2
--- /dev/null
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP SHA Driver.
+ * Copyright (c) 2022 Xilinx Inc.
+ */
+#include <linux/cacheflush.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha3.h>
+#include <linux/crypto.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#define ZYNQMP_DMA_BIT_MASK 32U
+#define ZYNQMP_DMA_ALLOC_FIXED_SIZE 0x1000U
+
+enum zynqmp_sha_op {
+ ZYNQMP_SHA3_INIT = 1,
+ ZYNQMP_SHA3_UPDATE = 2,
+ ZYNQMP_SHA3_FINAL = 4,
+};
+
+struct zynqmp_sha_drv_ctx {
+ struct shash_alg sha3_384;
+ struct device *dev;
+};
+
+struct zynqmp_sha_tfm_ctx {
+ struct device *dev;
+ struct crypto_shash *fbk_tfm;
+};
+
+struct zynqmp_sha_desc_ctx {
+ struct shash_desc fbk_req;
+};
+
+static dma_addr_t update_dma_addr, final_dma_addr;
+static char *ubuf, *fbuf;
+
+static int zynqmp_sha_init_tfm(struct crypto_shash *hash)
+{
+ const char *fallback_driver_name = crypto_shash_alg_name(hash);
+ struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+ struct shash_alg *alg = crypto_shash_alg(hash);
+ struct crypto_shash *fallback_tfm;
+ struct zynqmp_sha_drv_ctx *drv_ctx;
+
+ drv_ctx = container_of(alg, struct zynqmp_sha_drv_ctx, sha3_384);
+ tfm_ctx->dev = drv_ctx->dev;
+
+ /* Allocate a fallback and abort if it failed. */
+ fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback_tfm))
+ return PTR_ERR(fallback_tfm);
+
+ tfm_ctx->fbk_tfm = fallback_tfm;
+ hash->descsize += crypto_shash_descsize(tfm_ctx->fbk_tfm);
+
+ return 0;
+}
+
+static void zynqmp_sha_exit_tfm(struct crypto_shash *hash)
+{
+ struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+
+ if (tfm_ctx->fbk_tfm) {
+ crypto_free_shash(tfm_ctx->fbk_tfm);
+ tfm_ctx->fbk_tfm = NULL;
+ }
+
+ memzero_explicit(tfm_ctx, sizeof(struct zynqmp_sha_tfm_ctx));
+}
+
+static int zynqmp_sha_init(struct shash_desc *desc)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+ dctx->fbk_req.tfm = tctx->fbk_tfm;
+ return crypto_shash_init(&dctx->fbk_req);
+}
+
+static int zynqmp_sha_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ return crypto_shash_update(&dctx->fbk_req, data, length);
+}
+
+static int zynqmp_sha_final(struct shash_desc *desc, u8 *out)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ return crypto_shash_final(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_finup(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ return crypto_shash_finup(&dctx->fbk_req, data, length, out);
+}
+
+static int zynqmp_sha_import(struct shash_desc *desc, const void *in)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+ dctx->fbk_req.tfm = tctx->fbk_tfm;
+ return crypto_shash_import(&dctx->fbk_req, in);
+}
+
+static int zynqmp_sha_export(struct shash_desc *desc, void *out)
+{
+ struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ return crypto_shash_export(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ unsigned int remaining_len = len;
+ int update_size;
+ int ret;
+
+ ret = zynqmp_pm_sha_hash(0, 0, ZYNQMP_SHA3_INIT);
+ if (ret)
+ return ret;
+
+ while (remaining_len != 0) {
+ memzero_explicit(ubuf, ZYNQMP_DMA_ALLOC_FIXED_SIZE);
+ if (remaining_len >= ZYNQMP_DMA_ALLOC_FIXED_SIZE) {
+ update_size = ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+ remaining_len -= ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+ } else {
+ update_size = remaining_len;
+ remaining_len = 0;
+ }
+ memcpy(ubuf, data, update_size);
+ flush_icache_range((unsigned long)ubuf, (unsigned long)ubuf + update_size);
+ ret = zynqmp_pm_sha_hash(update_dma_addr, update_size, ZYNQMP_SHA3_UPDATE);
+ if (ret)
+ return ret;
+
+ data += update_size;
+ }
+
+ ret = zynqmp_pm_sha_hash(final_dma_addr, SHA3_384_DIGEST_SIZE, ZYNQMP_SHA3_FINAL);
+ memcpy(out, fbuf, SHA3_384_DIGEST_SIZE);
+ memzero_explicit(fbuf, SHA3_384_DIGEST_SIZE);
+
+ return ret;
+}
+
+static struct zynqmp_sha_drv_ctx sha3_drv_ctx = {
+ .sha3_384 = {
+ .init = zynqmp_sha_init,
+ .update = zynqmp_sha_update,
+ .final = zynqmp_sha_final,
+ .finup = zynqmp_sha_finup,
+ .digest = zynqmp_sha_digest,
+ .export = zynqmp_sha_export,
+ .import = zynqmp_sha_import,
+ .init_tfm = zynqmp_sha_init_tfm,
+ .exit_tfm = zynqmp_sha_exit_tfm,
+ .descsize = sizeof(struct zynqmp_sha_desc_ctx),
+ .statesize = sizeof(struct sha3_state),
+ .digestsize = SHA3_384_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha3-384",
+ .cra_driver_name = "zynqmp-sha3-384",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
+ CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA3_384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct zynqmp_sha_tfm_ctx),
+ .cra_alignmask = 3,
+ .cra_module = THIS_MODULE,
+ }
+ }
+};
+
+static int zynqmp_sha_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ int err;
+ u32 v;
+
+ /* Verify the hardware is present */
+ err = zynqmp_pm_get_api_version(&v);
+ if (err)
+ return err;
+
+
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
+ if (err < 0) {
+ dev_err(dev, "No usable DMA configuration\n");
+ return err;
+ }
+
+ err = crypto_register_shash(&sha3_drv_ctx.sha3_384);
+ if (err < 0) {
+ dev_err(dev, "Failed to register shash alg.\n");
+ return err;
+ }
+
+ sha3_drv_ctx.dev = dev;
+ platform_set_drvdata(pdev, &sha3_drv_ctx);
+
+ ubuf = dma_alloc_coherent(dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, &update_dma_addr, GFP_KERNEL);
+ if (!ubuf) {
+ err = -ENOMEM;
+ goto err_shash;
+ }
+
+ fbuf = dma_alloc_coherent(dev, SHA3_384_DIGEST_SIZE, &final_dma_addr, GFP_KERNEL);
+ if (!fbuf) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ return 0;
+
+err_mem:
+ dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+
+err_shash:
+ crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+ return err;
+}
+
+static int zynqmp_sha_remove(struct platform_device *pdev)
+{
+ sha3_drv_ctx.dev = platform_get_drvdata(pdev);
+
+ dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+ dma_free_coherent(sha3_drv_ctx.dev, SHA3_384_DIGEST_SIZE, fbuf, final_dma_addr);
+ crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+ return 0;
+}
+
+static struct platform_driver zynqmp_sha_driver = {
+ .probe = zynqmp_sha_probe,
+ .remove = zynqmp_sha_remove,
+ .driver = {
+ .name = "zynqmp-sha3-384",
+ },
+};
+
+module_platform_driver(zynqmp_sha_driver);
+MODULE_DESCRIPTION("ZynqMP SHA3 hardware acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Harsha <harsha.harsha@xilinx.com>");
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index d33a0613ed0c..5494d745ced5 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -346,8 +346,7 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
}
static const struct address_space_operations dev_dax_aops = {
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
};
static int dax_open(struct inode *inode, struct file *filp)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e3029389d809..aedc15eabeac 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -282,7 +282,7 @@ static struct inode *dax_alloc_inode(struct super_block *sb)
struct dax_device *dax_dev;
struct inode *inode;
- dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
+ dax_dev = alloc_inode_sb(sb, dax_cache, GFP_KERNEL);
if (!dax_dev)
return NULL;
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
index 56bf5ad01ad5..8f5848aa144f 100644
--- a/drivers/dma-buf/dma-heap.c
+++ b/drivers/dma-buf/dma-heap.c
@@ -14,6 +14,7 @@
#include <linux/xarray.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/dma-heap.h>
@@ -135,6 +136,7 @@ static long dma_heap_ioctl(struct file *file, unsigned int ucmd,
if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds))
return -EINVAL;
+ nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds));
/* Get the kernel ioctl cmd that matches */
kcmd = dma_heap_ioctl_cmds[nr];
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index a1da2b4b6d73..1476156af74b 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1681,8 +1681,10 @@ static void at_xdmac_tasklet(struct tasklet_struct *t)
__func__, atchan->irq_status);
if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) &&
- !(atchan->irq_status & error_mask))
+ !(atchan->irq_status & error_mask)) {
+ spin_unlock_irq(&atchan->lock);
return;
+ }
if (atchan->irq_status & error_mask)
at_xdmac_handle_error(atchan);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 110de8a60058..858400e42ec0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2968,7 +2968,7 @@ static int __maybe_unused pl330_suspend(struct device *dev)
struct amba_device *pcdev = to_amba_device(dev);
pm_runtime_force_suspend(dev);
- amba_pclk_unprepare(pcdev);
+ clk_unprepare(pcdev->pclk);
return 0;
}
@@ -2978,7 +2978,7 @@ static int __maybe_unused pl330_resume(struct device *dev)
struct amba_device *pcdev = to_amba_device(dev);
int ret;
- ret = amba_pclk_prepare(pcdev);
+ ret = clk_prepare(pcdev->pclk);
if (ret)
return ret;
diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
index 8a6bf291a73f..daafea5bc35d 100644
--- a/drivers/dma/ptdma/ptdma-dev.c
+++ b/drivers/dma/ptdma/ptdma-dev.c
@@ -207,7 +207,7 @@ int pt_core_init(struct pt_device *pt)
if (!cmd_q->qbase) {
dev_err(dev, "unable to allocate command queue\n");
ret = -ENOMEM;
- goto e_dma_alloc;
+ goto e_destroy_pool;
}
cmd_q->qidx = 0;
@@ -229,8 +229,10 @@ int pt_core_init(struct pt_device *pt)
/* Request an irq */
ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt);
- if (ret)
- goto e_pool;
+ if (ret) {
+ dev_err(dev, "unable to allocate an IRQ\n");
+ goto e_free_dma;
+ }
/* Update the device registers with queue information. */
cmd_q->qcontrol &= ~CMD_Q_SIZE;
@@ -250,21 +252,20 @@ int pt_core_init(struct pt_device *pt)
/* Register the DMA engine support */
ret = pt_dmaengine_register(pt);
if (ret)
- goto e_dmaengine;
+ goto e_free_irq;
/* Set up debugfs entries */
ptdma_debugfs_setup(pt);
return 0;
-e_dmaengine:
+e_free_irq:
free_irq(pt->pt_irq, pt);
-e_dma_alloc:
+e_free_dma:
dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma);
-e_pool:
- dev_err(dev, "unable to allocate an IRQ\n");
+e_destroy_pool:
dma_pool_destroy(pt->cmd_q.dma_pool);
return ret;
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 481f45c77ce1..13d12d660cc2 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1868,8 +1868,13 @@ static int rcar_dmac_probe(struct platform_device *pdev)
dmac->dev = &pdev->dev;
platform_set_drvdata(pdev, dmac);
- dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
- dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+ ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
+ if (ret)
+ return ret;
+
+ ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+ if (ret)
+ return ret;
ret = rcar_dmac_parse_of(&pdev->dev, dmac);
if (ret < 0)
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 158e5e7defae..b26ed690f03c 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -115,8 +115,10 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
ret = pm_runtime_get(schan->dev);
spin_unlock_irq(&schan->chan_lock);
- if (ret < 0)
+ if (ret < 0) {
dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret);
+ pm_runtime_put(schan->dev);
+ }
pm_runtime_barrier(schan->dev);
diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
index a42164389ebc..d5d55732adba 100644
--- a/drivers/dma/stm32-dmamux.c
+++ b/drivers/dma/stm32-dmamux.c
@@ -292,10 +292,12 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
ret = of_dma_router_register(node, stm32_dmamux_route_allocate,
&stm32_dmamux->dmarouter);
if (ret)
- goto err_clk;
+ goto pm_disable;
return 0;
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
err_clk:
clk_disable_unprepare(stm32_dmamux->clk);
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 3a6d2416cb0f..e7e8e624a436 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -350,7 +350,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
if (irq < 0) {
edac_printk(KERN_ERR, EDAC_MC,
"No irq %d in DT\n", irq);
- return -ENODEV;
+ return irq;
}
/* Arria10 has a 2nd IRQ */
@@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
#ifdef CONFIG_EDAC_ALTERA_SDRAM
+/*
+ * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable
+ * register if ECC is enabled. Linux checks the ECC Enable register to
+ * determine ECC status.
+ * Use an SMC call (which always works) to determine ECC enablement.
+ */
+static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+ const struct edac_device_prv_data *prv = device->data;
+ unsigned long sdram_ecc_addr;
+ struct arm_smccc_res result;
+ struct device_node *np;
+ phys_addr_t sdram_addr;
+ u32 read_reg;
+ int ret;
+
+ np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl");
+ if (!np)
+ goto sdram_err;
+
+ sdram_addr = of_translate_address(np, of_get_address(np, 0,
+ NULL, NULL));
+ of_node_put(np);
+ sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst;
+ arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr,
+ 0, 0, 0, 0, 0, 0, &result);
+ read_reg = (unsigned int)result.a1;
+ ret = (int)result.a0;
+ if (!ret && (read_reg & prv->ecc_enable_mask))
+ return 0;
+
+sdram_err:
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: No ECC present or ECC disabled.\n",
+ device->edac_dev_name);
+ return -ENODEV;
+}
+
static const struct edac_device_prv_data s10_sdramecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = altr_s10_sdram_check_ecc_deps,
.ce_clear_mask = ALTR_S10_ECC_SERRPENA,
.ue_clear_mask = ALTR_S10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_S10_ECC_EN,
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index fba609ada0e6..812baa48b290 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
static struct amd64_family_type *fam_type;
+static inline u32 get_umc_reg(u32 reg)
+{
+ if (!fam_type->flags.zn_regs_v2)
+ return reg;
+
+ switch (reg) {
+ case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5;
+ case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5;
+ case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5;
+ }
+
+ WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
+ return 0;
+}
+
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
- if (pvt->dram_type == MEM_LRDDR4) {
- amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
+ if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
+ amd_smn_read(pvt->mc_node_id,
+ umc_base + get_umc_reg(UMCCH_ADDR_CFG),
+ &tmp);
edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
i, 1 << ((tmp >> 4) & 0x3));
}
@@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
for_each_umc(umc) {
pvt->csels[umc].b_cnt = 4;
- pvt->csels[umc].m_cnt = 2;
+ pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
}
} else {
@@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
}
umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
- umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+ umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
for_each_chip_select_mask(cs, umc, pvt) {
mask = &pvt->csels[umc].csmasks[cs];
@@ -1616,19 +1633,49 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
}
}
+static void determine_memory_type_df(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ if (!(umc->sdp_ctrl & UMC_SDP_INIT)) {
+ umc->dram_type = MEM_EMPTY;
+ continue;
+ }
+
+ /*
+ * Check if the system supports the "DDR Type" field in UMC Config
+ * and has DDR5 DIMMs in use.
+ */
+ if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
+ if (umc->dimm_cfg & BIT(5))
+ umc->dram_type = MEM_LRDDR5;
+ else if (umc->dimm_cfg & BIT(4))
+ umc->dram_type = MEM_RDDR5;
+ else
+ umc->dram_type = MEM_DDR5;
+ } else {
+ if (umc->dimm_cfg & BIT(5))
+ umc->dram_type = MEM_LRDDR4;
+ else if (umc->dimm_cfg & BIT(4))
+ umc->dram_type = MEM_RDDR4;
+ else
+ umc->dram_type = MEM_DDR4;
+ }
+
+ edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]);
+ }
+}
+
static void determine_memory_type(struct amd64_pvt *pvt)
{
u32 dram_ctrl, dcsm;
- if (pvt->umc) {
- if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
- pvt->dram_type = MEM_LRDDR4;
- else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
- pvt->dram_type = MEM_RDDR4;
- else
- pvt->dram_type = MEM_DDR4;
- return;
- }
+ if (pvt->umc)
+ return determine_memory_type_df(pvt);
switch (pvt->fam) {
case 0xf:
@@ -2149,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
{
u32 addr_mask_orig, addr_mask_deinterleaved;
u32 msb, weight, num_zero_bits;
+ int cs_mask_nr = csrow_nr;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -2164,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
return size;
/*
- * There is one mask per DIMM, and two Chip Selects per DIMM.
- * CS0 and CS1 -> DIMM0
- * CS2 and CS3 -> DIMM1
+ * Family 17h introduced systems with one mask per DIMM,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 and CS1 -> MASK0 / DIMM0
+ * CS2 and CS3 -> MASK1 / DIMM1
+ *
+ * Family 19h Model 10h introduced systems with one mask per Chip Select,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 -> MASK0 -> DIMM0
+ * CS1 -> MASK1 -> DIMM0
+ * CS2 -> MASK2 -> DIMM1
+ * CS3 -> MASK3 -> DIMM1
+ *
+ * Keep the mask number equal to the Chip Select number for newer systems,
+ * and shift the mask number for older systems.
*/
dimm = csrow_nr >> 1;
+ if (!fam_type->flags.zn_regs_v2)
+ cs_mask_nr >>= 1;
+
/* Asymmetric dual-rank DIMM support. */
if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+ addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
else
- addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+ addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -2930,6 +2994,7 @@ static struct amd64_family_type family_types[] = {
.f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
.max_mcs = 12,
+ .flags.zn_regs_v2 = 1,
.ops = {
.early_channel_count = f17_early_channel_count,
.dbam_to_cs = f17_addr_mask_to_cs_size,
@@ -3368,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
- amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
+ amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
@@ -3452,7 +3517,9 @@ skip:
read_dct_base_mask(pvt);
determine_memory_type(pvt);
- edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
+
+ if (!pvt->umc)
+ edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
determine_ecc_sym_sz(pvt);
}
@@ -3548,7 +3615,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
pvt->mc_node_id, cs);
dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
- dimm->mtype = pvt->dram_type;
+ dimm->mtype = pvt->umc[umc].dram_type;
dimm->edac_mode = edac_mode;
dimm->dtype = dev_type;
dimm->grain = 64;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 352bda9803f6..38e5ad95d010 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -273,8 +273,11 @@
#define UMCCH_BASE_ADDR_SEC 0x10
#define UMCCH_ADDR_MASK 0x20
#define UMCCH_ADDR_MASK_SEC 0x28
+#define UMCCH_ADDR_MASK_SEC_DDR5 0x30
#define UMCCH_ADDR_CFG 0x30
+#define UMCCH_ADDR_CFG_DDR5 0x40
#define UMCCH_DIMM_CFG 0x80
+#define UMCCH_DIMM_CFG_DDR5 0x90
#define UMCCH_UMC_CFG 0x100
#define UMCCH_SDP_CTRL 0x104
#define UMCCH_ECC_CTRL 0x14C
@@ -344,6 +347,9 @@ struct amd64_umc {
u32 sdp_ctrl; /* SDP Control reg */
u32 ecc_ctrl; /* DRAM ECC Control reg */
u32 umc_cap_hi; /* Capabilities High reg */
+
+ /* cache the dram_type */
+ enum mem_type dram_type;
};
struct amd64_pvt {
@@ -391,7 +397,12 @@ struct amd64_pvt {
/* place to store error injection parameters prior to issue */
struct error_injection injection;
- /* cache the dram_type */
+ /*
+ * cache the dram_type
+ *
+ * NOTE: Don't use this for Family 17h and later.
+ * Use dram_type in struct amd64_umc instead.
+ */
enum mem_type dram_type;
struct amd64_umc *umc; /* UMC registers */
@@ -480,11 +491,22 @@ struct low_ops {
unsigned cs_mode, int cs_mask_nr);
};
+struct amd64_family_flags {
+ /*
+ * Indicates that the system supports the new register offsets, etc.
+ * first introduced with Family 19h Model 10h.
+ */
+ __u64 zn_regs_v2 : 1,
+
+ __reserved : 63;
+};
+
struct amd64_family_type {
const char *ctl_name;
u16 f0_id, f1_id, f2_id, f6_id;
/* Maximum number of memory controllers per die/node. */
u8 max_mcs;
+ struct amd64_family_flags flags;
struct low_ops ops;
};
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 5e7593753799..9a61d92bdf42 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
/* Base Attributes of the EDAC_DEVICE ECC object */
-static struct ctl_info_attribute *device_ctrl_attr[] = {
- &attr_ctl_info_panic_on_ue,
- &attr_ctl_info_log_ue,
- &attr_ctl_info_log_ce,
- &attr_ctl_info_poll_msec,
+static struct attribute *device_ctrl_attrs[] = {
+ &attr_ctl_info_panic_on_ue.attr,
+ &attr_ctl_info_log_ue.attr,
+ &attr_ctl_info_log_ce.attr,
+ &attr_ctl_info_poll_msec.attr,
NULL,
};
+ATTRIBUTE_GROUPS(device_ctrl);
/*
* edac_device_ctrl_master_release
@@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
static struct kobj_type ktype_device_ctrl = {
.release = edac_device_ctrl_master_release,
.sysfs_ops = &device_ctl_info_ops,
- .default_attrs = (struct attribute **)device_ctrl_attr,
+ .default_groups = device_ctrl_groups,
};
/*
@@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
/* list of edac_dev 'instance' attributes */
-static struct instance_attribute *device_instance_attr[] = {
- &attr_instance_ce_count,
- &attr_instance_ue_count,
+static struct attribute *device_instance_attrs[] = {
+ &attr_instance_ce_count.attr,
+ &attr_instance_ue_count.attr,
NULL,
};
+ATTRIBUTE_GROUPS(device_instance);
/* The 'ktype' for each edac_dev 'instance' */
static struct kobj_type ktype_instance_ctrl = {
.release = edac_device_ctrl_instance_release,
.sysfs_ops = &device_instance_ops,
- .default_attrs = (struct attribute **)device_instance_attr,
+ .default_groups = device_instance_groups,
};
/* edac_dev -> instance -> block information */
@@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
/* list of edac_dev 'block' attributes */
-static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
- &attr_block_ce_count,
- &attr_block_ue_count,
+static struct attribute *device_block_attrs[] = {
+ &attr_block_ce_count.attr,
+ &attr_block_ue_count.attr,
NULL,
};
+ATTRIBUTE_GROUPS(device_block);
/* The 'ktype' for each edac_dev 'block' */
static struct kobj_type ktype_block_ctrl = {
.release = edac_device_ctrl_block_release,
.sysfs_ops = &device_block_ops,
- .default_attrs = (struct attribute **)device_block_attr,
+ .default_groups = device_block_groups,
};
/* block ctor/dtor code */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 9d9aabdec96b..d2715774af6f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
else if (size > sizeof(char))
align = sizeof(short);
else
- return (char *)ptr;
+ return ptr;
- r = (unsigned long)p % align;
+ r = (unsigned long)ptr % align;
if (r == 0)
- return (char *)ptr;
+ return ptr;
*p += align - r;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 53042af7262e..888d5728ecef 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
/* pci instance attributes */
-static struct instance_attribute *pci_instance_attr[] = {
- &attr_instance_pe_count,
- &attr_instance_npe_count,
+static struct attribute *pci_instance_attrs[] = {
+ &attr_instance_pe_count.attr,
+ &attr_instance_npe_count.attr,
NULL
};
+ATTRIBUTE_GROUPS(pci_instance);
/* the ktype for a pci instance */
static struct kobj_type ktype_pci_instance = {
.release = edac_pci_instance_release,
.sysfs_ops = &pci_instance_ops,
- .default_attrs = (struct attribute **)pci_instance_attr,
+ .default_groups = pci_instance_groups,
};
/*
@@ -292,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
/* Base Attributes of the memory ECC object */
-static struct edac_pci_dev_attribute *edac_pci_attr[] = {
- &edac_pci_attr_check_pci_errors,
- &edac_pci_attr_edac_pci_log_pe,
- &edac_pci_attr_edac_pci_log_npe,
- &edac_pci_attr_edac_pci_panic_on_pe,
- &edac_pci_attr_pci_parity_count,
- &edac_pci_attr_pci_nonparity_count,
+static struct attribute *edac_pci_attrs[] = {
+ &edac_pci_attr_check_pci_errors.attr,
+ &edac_pci_attr_edac_pci_log_pe.attr,
+ &edac_pci_attr_edac_pci_log_npe.attr,
+ &edac_pci_attr_edac_pci_panic_on_pe.attr,
+ &edac_pci_attr_pci_parity_count.attr,
+ &edac_pci_attr_pci_nonparity_count.attr,
NULL,
};
+ATTRIBUTE_GROUPS(edac_pci);
/*
* edac_pci_release_main_kobj
@@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj)
static struct kobj_type ktype_edac_pci_main_kobj = {
.release = edac_pci_release_main_kobj,
.sysfs_ops = &edac_pci_sysfs_ops,
- .default_attrs = (struct attribute **)edac_pci_attr,
+ .default_groups = edac_pci_groups,
};
/**
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index 2ccd1db5e98f..7197f9fa0245 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev)
irq = platform_get_irq_optional(pdev, i);
if (irq < 0) {
dev_err(&pdev->dev, "No IRQ resource\n");
- rc = -EINVAL;
+ rc = irq;
goto out_err;
}
rc = devm_request_irq(&pdev->dev, irq,
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 75cb91055c17..e5cfb01353d8 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
config ARM_SDE_INTERFACE
bool "ARM Software Delegated Exception Interface (SDEI)"
depends on ARM64
+ depends on ACPI_APEI_GHES
help
The Software Delegated Exception Interface (SDEI) is an ARM
standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index b406b3f78f46..d76bab3aaac4 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -2112,7 +2112,7 @@ static void __exit scmi_driver_exit(void)
}
module_exit(scmi_driver_exit);
-MODULE_ALIAS("platform: arm-scmi");
+MODULE_ALIAS("platform:arm-scmi");
MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
MODULE_DESCRIPTION("ARM SCMI protocol driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}
-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;
ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;
pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}
-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 4c3201e290e2..ea84108035eb 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -24,7 +24,7 @@ static bool dump_properties __initdata;
static int __init dump_properties_enable(char *arg)
{
dump_properties = true;
- return 0;
+ return 1;
}
__setup("dump_apple_properties", dump_properties_enable);
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 0ef086e43090..7e771c56c13c 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record)
efi_name[i] = name[i];
ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
- preemptible(), record->size, record->psi->buf);
+ false, record->size, record->psi->buf);
if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE))
if (!schedule_work(&efivar_work))
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index ae79c3300129..5502e176d51b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str)
memcpy(efivar_ssdt, str, strlen(str));
else
pr_warn("efivar_ssdt: name too long: %s\n", str);
- return 0;
+ return 1;
}
__setup("efivar_ssdt=", efivar_ssdt_setup);
@@ -722,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr,
systab_hdr->revision >> 16,
systab_hdr->revision & 0xffff,
vendor);
+
+ if (IS_ENABLED(CONFIG_X86_64) &&
+ systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION &&
+ !strcmp(vendor, "Apple")) {
+ pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n");
+ efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION;
+ }
}
static __initdata char memory_type_name[][13] = {
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 2363fee9211c..9cc556013d08 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
if (image->image_base != _text)
efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
- if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
- efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
- EFI_KIMG_ALIGN >> 10);
+ if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
+ efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
+ SEGMENT_ALIGN >> 10);
kernel_size = _edata - _text;
kernel_memsize = kernel_size + (_end - _edata);
diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c
index 380e4e251399..9c460843442f 100644
--- a/drivers/firmware/efi/libstub/riscv-stub.c
+++ b/drivers/firmware/efi/libstub/riscv-stub.c
@@ -25,7 +25,7 @@ typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
static u32 hartid;
-static u32 get_boot_hartid_from_fdt(void)
+static int get_boot_hartid_from_fdt(void)
{
const void *fdt;
int chosen_node, len;
@@ -33,23 +33,26 @@ static u32 get_boot_hartid_from_fdt(void)
fdt = get_efi_config_table(DEVICE_TREE_GUID);
if (!fdt)
- return U32_MAX;
+ return -EINVAL;
chosen_node = fdt_path_offset(fdt, "/chosen");
if (chosen_node < 0)
- return U32_MAX;
+ return -EINVAL;
prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len);
if (!prop || len != sizeof(u32))
- return U32_MAX;
+ return -EINVAL;
- return fdt32_to_cpu(*prop);
+ hartid = fdt32_to_cpu(*prop);
+ return 0;
}
efi_status_t check_platform_features(void)
{
- hartid = get_boot_hartid_from_fdt();
- if (hartid == U32_MAX) {
+ int ret;
+
+ ret = get_boot_hartid_from_fdt();
+ if (ret) {
efi_err("/chosen/boot-hartid missing or invalid!\n");
return EFI_UNSUPPORTED;
}
diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c
index 38722d2009e2..5ed0602c2f75 100644
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -359,4 +359,4 @@ static int __init efi_mokvar_sysfs_init(void)
}
return err;
}
-device_initcall(efi_mokvar_sysfs_init);
+fs_initcall(efi_mokvar_sysfs_init);
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index abdc8a6a3963..cae590bd08f2 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -742,6 +742,7 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
{
const struct efivar_operations *ops;
efi_status_t status;
+ unsigned long varsize;
if (!__efivars)
return -EINVAL;
@@ -764,15 +765,17 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
return efivar_entry_set_nonblocking(name, vendor, attributes,
size, data);
+ varsize = size + ucs2_strsize(name, 1024);
if (!block) {
if (down_trylock(&efivars_lock))
return -EBUSY;
+ status = check_var_size_nonblocking(attributes, varsize);
} else {
if (down_interruptible(&efivars_lock))
return -EINTR;
+ status = check_var_size(attributes, varsize);
}
- status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
if (status != EFI_SUCCESS) {
up(&efivars_lock);
return -ENOSPC;
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 450c5f6a1cbf..5e5b0bb2e4e0 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -1121,6 +1121,32 @@ int zynqmp_pm_aes_engine(const u64 address, u32 *out)
EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine);
/**
+ * zynqmp_pm_sha_hash - Access the SHA engine to calculate the hash
+ * @address: Address of the data/ Address of output buffer where
+ * hash should be stored.
+ * @size: Size of the data.
+ * @flags:
+ * BIT(0) - for initializing csudma driver and SHA3(Here address
+ * and size inputs can be NULL).
+ * BIT(1) - to call Sha3_Update API which can be called multiple
+ * times when data is not contiguous.
+ * BIT(2) - to get final hash of the whole updated data.
+ * Hash will be overwritten at provided address with
+ * 48 bytes.
+ *
+ * Return: Returns status, either success or error code.
+ */
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags)
+{
+ u32 lower_addr = lower_32_bits(address);
+ u32 upper_addr = upper_32_bits(address);
+
+ return zynqmp_pm_invoke_fn(PM_SECURE_SHA, upper_addr, lower_addr,
+ size, flags, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash);
+
+/**
* zynqmp_pm_register_notifier() - PM API for register a subsystem
* to be notified about specific
* event/error.
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index 4a55cdf089d6..e00c33310517 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -163,15 +163,13 @@ exit_destroy:
return ret;
}
-static int gen_74x164_remove(struct spi_device *spi)
+static void gen_74x164_remove(struct spi_device *spi)
{
struct gen_74x164_chip *chip = spi_get_drvdata(spi);
gpiod_set_value_cansleep(chip->gpiod_oe, 0);
gpiochip_remove(&chip->gpio_chip);
mutex_destroy(&chip->lock);
-
- return 0;
}
static const struct spi_device_id gen_74x164_spi_ids[] = {
diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c
index 869dc952cf45..0cb2664085cf 100644
--- a/drivers/gpio/gpio-aggregator.c
+++ b/drivers/gpio/gpio-aggregator.c
@@ -278,7 +278,8 @@ static int gpio_fwd_get(struct gpio_chip *chip, unsigned int offset)
{
struct gpiochip_fwd *fwd = gpiochip_get_data(chip);
- return gpiod_get_value(fwd->descs[offset]);
+ return chip->can_sleep ? gpiod_get_value_cansleep(fwd->descs[offset])
+ : gpiod_get_value(fwd->descs[offset]);
}
static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
@@ -293,7 +294,10 @@ static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
for_each_set_bit(i, mask, fwd->chip.ngpio)
descs[j++] = fwd->descs[i];
- error = gpiod_get_array_value(j, descs, NULL, values);
+ if (fwd->chip.can_sleep)
+ error = gpiod_get_array_value_cansleep(j, descs, NULL, values);
+ else
+ error = gpiod_get_array_value(j, descs, NULL, values);
if (error)
return error;
@@ -328,7 +332,10 @@ static void gpio_fwd_set(struct gpio_chip *chip, unsigned int offset, int value)
{
struct gpiochip_fwd *fwd = gpiochip_get_data(chip);
- gpiod_set_value(fwd->descs[offset], value);
+ if (chip->can_sleep)
+ gpiod_set_value_cansleep(fwd->descs[offset], value);
+ else
+ gpiod_set_value(fwd->descs[offset], value);
}
static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
@@ -343,7 +350,10 @@ static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
descs[j++] = fwd->descs[i];
}
- gpiod_set_array_value(j, descs, NULL, values);
+ if (fwd->chip.can_sleep)
+ gpiod_set_array_value_cansleep(j, descs, NULL, values);
+ else
+ gpiod_set_array_value(j, descs, NULL, values);
}
static void gpio_fwd_set_multiple_locked(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 51cd6f98d1c7..161c4751c5f7 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -443,14 +443,12 @@ static int max3191x_probe(struct spi_device *spi)
return 0;
}
-static int max3191x_remove(struct spi_device *spi)
+static void max3191x_remove(struct spi_device *spi)
{
struct max3191x_chip *max3191x = spi_get_drvdata(spi);
gpiochip_remove(&max3191x->gpio);
mutex_destroy(&max3191x->lock);
-
- return 0;
}
static int __init max3191x_register_driver(struct spi_driver *sdrv)
diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c
index 5862d73bf325..11813f41d460 100644
--- a/drivers/gpio/gpio-max7301.c
+++ b/drivers/gpio/gpio-max7301.c
@@ -64,11 +64,9 @@ static int max7301_probe(struct spi_device *spi)
return ret;
}
-static int max7301_remove(struct spi_device *spi)
+static void max7301_remove(struct spi_device *spi)
{
__max730x_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id max7301_id[] = {
diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c
index 31d2be1bebc8..cd9b16dbe1a9 100644
--- a/drivers/gpio/gpio-mc33880.c
+++ b/drivers/gpio/gpio-mc33880.c
@@ -134,7 +134,7 @@ exit_destroy:
return ret;
}
-static int mc33880_remove(struct spi_device *spi)
+static void mc33880_remove(struct spi_device *spi)
{
struct mc33880 *mc;
@@ -142,8 +142,6 @@ static int mc33880_remove(struct spi_device *spi)
gpiochip_remove(&mc->chip);
mutex_destroy(&mc->lock);
-
- return 0;
}
static struct spi_driver mc33880_driver = {
diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index ccaad1cb3c2e..d8a26e503ca5 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -239,7 +239,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank)
rg->chip.offset = bank * MTK_BANK_WIDTH;
rg->irq_chip.name = dev_name(dev);
- rg->irq_chip.parent_device = dev;
rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask;
rg->irq_chip.irq_mask = mediatek_gpio_irq_mask;
rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask;
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index e099c39e0355..80ddc43fd875 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -986,7 +986,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
writel_relaxed(0, base + bank->regs->ctrl);
}
-static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
+static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc,
+ struct device *pm_dev)
{
struct gpio_irq_chip *irq;
static int gpio;
@@ -1052,6 +1053,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
if (ret)
return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n");
+ irq_domain_set_pm_device(bank->chip.irq.domain, pm_dev);
ret = devm_request_irq(bank->chip.parent, bank->irq,
omap_gpio_irq_handler,
0, dev_name(bank->chip.parent), bank);
@@ -1402,7 +1404,6 @@ static int omap_gpio_probe(struct platform_device *pdev)
irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
irqc->name = dev_name(&pdev->dev);
irqc->flags = IRQCHIP_MASK_ON_SUSPEND;
- irqc->parent_device = dev;
bank->irq = platform_get_irq(pdev, 0);
if (bank->irq <= 0) {
@@ -1466,7 +1467,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
omap_gpio_mod_init(bank);
- ret = omap_gpio_chip_init(bank, irqc);
+ ret = omap_gpio_chip_init(bank, irqc, dev);
if (ret) {
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 8e04054cf07e..81a47ae09ff8 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -163,15 +163,13 @@ static int pisosr_gpio_probe(struct spi_device *spi)
return 0;
}
-static int pisosr_gpio_remove(struct spi_device *spi)
+static void pisosr_gpio_remove(struct spi_device *spi)
{
struct pisosr_gpio *gpio = spi_get_drvdata(spi);
gpiochip_remove(&gpio->chip);
mutex_destroy(&gpio->lock);
-
- return 0;
}
static const struct spi_device_id pisosr_gpio_id_table[] = {
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index bd2e16d6e21c..3a76538f27fa 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -530,7 +530,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
irq_chip = &p->irq_chip;
irq_chip->name = "gpio-rcar";
- irq_chip->parent_device = dev;
irq_chip->irq_mask = gpio_rcar_irq_disable;
irq_chip->irq_unmask = gpio_rcar_irq_enable;
irq_chip->irq_set_type = gpio_rcar_irq_set_type;
@@ -552,6 +551,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
goto err0;
}
+ irq_domain_set_pm_device(gpio_chip->irq.domain, dev);
ret = devm_request_irq(dev, p->irq_parent, gpio_rcar_irq_handler,
IRQF_SHARED, name, p);
if (ret) {
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index a4c4e4584f5b..099e358d2491 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -410,10 +410,8 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type);
polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity);
- switch (type) {
- case IRQ_TYPE_EDGE_BOTH:
+ if (type == IRQ_TYPE_EDGE_BOTH) {
if (bank->gpio_type == GPIO_TYPE_V2) {
- bank->toggle_edge_mode &= ~mask;
rockchip_gpio_writel_bit(bank, d->hwirq, 1,
bank->gpio_regs->int_bothedge);
goto out;
@@ -431,30 +429,34 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
else
polarity |= mask;
}
- break;
- case IRQ_TYPE_EDGE_RISING:
- bank->toggle_edge_mode &= ~mask;
- level |= mask;
- polarity |= mask;
- break;
- case IRQ_TYPE_EDGE_FALLING:
- bank->toggle_edge_mode &= ~mask;
- level |= mask;
- polarity &= ~mask;
- break;
- case IRQ_TYPE_LEVEL_HIGH:
- bank->toggle_edge_mode &= ~mask;
- level &= ~mask;
- polarity |= mask;
- break;
- case IRQ_TYPE_LEVEL_LOW:
- bank->toggle_edge_mode &= ~mask;
- level &= ~mask;
- polarity &= ~mask;
- break;
- default:
- ret = -EINVAL;
- goto out;
+ } else {
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+ rockchip_gpio_writel_bit(bank, d->hwirq, 0,
+ bank->gpio_regs->int_bothedge);
+ } else {
+ bank->toggle_edge_mode &= ~mask;
+ }
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ level |= mask;
+ polarity |= mask;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ level |= mask;
+ polarity &= ~mask;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ level &= ~mask;
+ polarity |= mask;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ level &= ~mask;
+ polarity &= ~mask;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
}
rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type);
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 403f9e833d6a..7d82388b4ab7 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -223,7 +223,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
NULL,
chip->base + SIFIVE_GPIO_OUTPUT_EN,
chip->base + SIFIVE_GPIO_INPUT_EN,
- 0);
+ BGPIOF_READ_OUTPUT_REG_SET);
if (ret) {
dev_err(dev, "unable to init generic GPIO\n");
return ret;
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 838bbfed11d3..8e5d87984a48 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -547,7 +547,7 @@ struct gpio_sim_bank {
*
* So we need to store the pointer to the parent struct here. We can
* dereference it anywhere we need with no checks and no locking as
- * it's guaranteed to survive the childred and protected by configfs
+ * it's guaranteed to survive the children and protected by configfs
* locks.
*
* Same for other structures.
@@ -570,6 +570,11 @@ static struct gpio_sim_bank *to_gpio_sim_bank(struct config_item *item)
return container_of(group, struct gpio_sim_bank, group);
}
+static bool gpio_sim_bank_has_label(struct gpio_sim_bank *bank)
+{
+ return bank->label && *bank->label;
+}
+
static struct gpio_sim_device *
gpio_sim_bank_get_device(struct gpio_sim_bank *bank)
{
@@ -770,9 +775,15 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev)
* point the device doesn't exist yet and so dev_name()
* is not available.
*/
- hog->chip_label = kasprintf(GFP_KERNEL,
- "gpio-sim.%u-%s", dev->id,
- fwnode_get_name(bank->swnode));
+ if (gpio_sim_bank_has_label(bank))
+ hog->chip_label = kstrdup(bank->label,
+ GFP_KERNEL);
+ else
+ hog->chip_label = kasprintf(GFP_KERNEL,
+ "gpio-sim.%u-%s",
+ dev->id,
+ fwnode_get_name(
+ bank->swnode));
if (!hog->chip_label) {
gpio_sim_remove_hogs(dev);
return -ENOMEM;
@@ -816,7 +827,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank,
properties[prop_idx++] = PROPERTY_ENTRY_U32("ngpios", bank->num_lines);
- if (bank->label)
+ if (gpio_sim_bank_has_label(bank))
properties[prop_idx++] = PROPERTY_ENTRY_STRING("gpio-sim,label",
bank->label);
@@ -1311,7 +1322,7 @@ static void gpio_sim_hog_config_item_release(struct config_item *item)
kfree(hog);
}
-struct configfs_item_operations gpio_sim_hog_config_item_ops = {
+static struct configfs_item_operations gpio_sim_hog_config_item_ops = {
.release = gpio_sim_hog_config_item_release,
};
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 34b36a8c035f..031fe105b58e 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -343,9 +343,12 @@ static int tegra186_gpio_of_xlate(struct gpio_chip *chip,
return offset + pin;
}
+#define to_tegra_gpio(x) container_of((x), struct tegra_gpio, gpio)
+
static void tegra186_irq_ack(struct irq_data *data)
{
- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct tegra_gpio *gpio = to_tegra_gpio(gc);
void __iomem *base;
base = tegra186_gpio_get_base(gpio, data->hwirq);
@@ -357,7 +360,8 @@ static void tegra186_irq_ack(struct irq_data *data)
static void tegra186_irq_mask(struct irq_data *data)
{
- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct tegra_gpio *gpio = to_tegra_gpio(gc);
void __iomem *base;
u32 value;
@@ -372,7 +376,8 @@ static void tegra186_irq_mask(struct irq_data *data)
static void tegra186_irq_unmask(struct irq_data *data)
{
- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct tegra_gpio *gpio = to_tegra_gpio(gc);
void __iomem *base;
u32 value;
@@ -387,7 +392,8 @@ static void tegra186_irq_unmask(struct irq_data *data)
static int tegra186_irq_set_type(struct irq_data *data, unsigned int type)
{
- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct tegra_gpio *gpio = to_tegra_gpio(gc);
void __iomem *base;
u32 value;
@@ -1069,6 +1075,7 @@ static const struct tegra_gpio_soc tegra241_main_soc = {
.ports = tegra241_main_ports,
.name = "tegra241-gpio",
.instance = 0,
+ .num_irqs_per_bank = 8,
};
#define TEGRA241_AON_GPIO_PORT(_name, _bank, _port, _pins) \
@@ -1089,6 +1096,7 @@ static const struct tegra_gpio_soc tegra241_aon_soc = {
.ports = tegra241_aon_ports,
.name = "tegra241-gpio-aon",
.instance = 1,
+ .num_irqs_per_bank = 8,
};
static const struct of_device_id tegra186_gpio_of_match[] = {
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 5b103221b58d..fa4bc7481f9a 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -281,7 +281,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
u8 irq_status;
irq_chip->name = chip->label;
- irq_chip->parent_device = &pdev->dev;
irq_chip->irq_mask = tqmx86_gpio_irq_mask;
irq_chip->irq_unmask = tqmx86_gpio_irq_unmask;
irq_chip->irq_set_type = tqmx86_gpio_irq_set_type;
@@ -316,6 +315,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
goto out_pm_dis;
}
+ irq_domain_set_pm_device(girq->domain, dev);
+
dev_info(dev, "GPIO functionality initialized with %d pins\n",
chip->ngpio);
diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
index d885032cf814..d918d2df4de2 100644
--- a/drivers/gpio/gpio-ts4900.c
+++ b/drivers/gpio/gpio-ts4900.c
@@ -1,7 +1,7 @@
/*
* Digital I/O driver for Technologic Systems I2C FPGA Core
*
- * Copyright (C) 2015 Technologic Systems
+ * Copyright (C) 2015, 2018 Technologic Systems
* Copyright (C) 2016 Savoir-Faire Linux
*
* This program is free software; you can redistribute it and/or
@@ -55,19 +55,33 @@ static int ts4900_gpio_direction_input(struct gpio_chip *chip,
{
struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
- /*
- * This will clear the output enable bit, the other bits are
- * dontcare when this is cleared
+ /* Only clear the OE bit here, requires a RMW. Prevents potential issue
+ * with OE and data getting to the physical pin at different times.
*/
- return regmap_write(priv->regmap, offset, 0);
+ return regmap_update_bits(priv->regmap, offset, TS4900_GPIO_OE, 0);
}
static int ts4900_gpio_direction_output(struct gpio_chip *chip,
unsigned int offset, int value)
{
struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
+ unsigned int reg;
int ret;
+ /* If changing from an input to an output, we need to first set the
+ * proper data bit to what is requested and then set OE bit. This
+ * prevents a glitch that can occur on the IO line
+ */
+ regmap_read(priv->regmap, offset, &reg);
+ if (!(reg & TS4900_GPIO_OE)) {
+ if (value)
+ reg = TS4900_GPIO_OUT;
+ else
+ reg &= ~TS4900_GPIO_OUT;
+
+ regmap_write(priv->regmap, offset, reg);
+ }
+
if (value)
ret = regmap_write(priv->regmap, offset, TS4900_GPIO_OE |
TS4900_GPIO_OUT);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index c0f6a25c3279..a5495ad31c9c 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -307,7 +307,8 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
if (IS_ERR(desc))
return desc;
- ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout);
+ /* ACPI uses hundredths of milliseconds units */
+ ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10);
if (ret)
dev_warn(chip->parent,
"Failed to set debounce-timeout for pin 0x%04X, err %d\n",
@@ -1035,7 +1036,8 @@ int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int ind
if (ret < 0)
return ret;
- ret = gpio_set_debounce_timeout(desc, info.debounce);
+ /* ACPI uses hundredths of milliseconds units */
+ ret = gpio_set_debounce_timeout(desc, info.debounce * 10);
if (ret)
return ret;
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index c7b5446d01fd..ffa0256cad5a 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -330,7 +330,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
goto out_free_lh;
}
- ret = gpiod_request(desc, lh->label);
+ ret = gpiod_request_user(desc, lh->label);
if (ret)
goto out_free_lh;
lh->descs[i] = desc;
@@ -1378,7 +1378,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip)
goto out_free_linereq;
}
- ret = gpiod_request(desc, lr->label);
+ ret = gpiod_request_user(desc, lr->label);
if (ret)
goto out_free_linereq;
@@ -1764,7 +1764,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
}
}
- ret = gpiod_request(desc, le->label);
+ ret = gpiod_request_user(desc, le->label);
if (ret)
goto out_free_le;
le->desc = desc;
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 4098bc7f88b7..44c1ad51b3fe 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -475,12 +475,9 @@ static ssize_t export_store(struct class *class,
* they may be undone on its behalf too.
*/
- status = gpiod_request(desc, "sysfs");
- if (status) {
- if (status == -EPROBE_DEFER)
- status = -ENODEV;
+ status = gpiod_request_user(desc, "sysfs");
+ if (status)
goto done;
- }
status = gpiod_set_transitory(desc, false);
if (!status) {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 3859911b61e9..6630d92e30ad 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2227,6 +2227,16 @@ static int gpio_set_bias(struct gpio_desc *desc)
return gpio_set_config_with_argument_optional(desc, bias, arg);
}
+/**
+ * gpio_set_debounce_timeout() - Set debounce timeout
+ * @desc: GPIO descriptor to set the debounce timeout
+ * @debounce: Debounce timeout in microseconds
+ *
+ * The function calls the certain GPIO driver to set debounce timeout
+ * in the hardware.
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce)
{
return gpio_set_config_with_argument_optional(desc,
@@ -3147,6 +3157,16 @@ int gpiod_to_irq(const struct gpio_desc *desc)
return retirq;
}
+#ifdef CONFIG_GPIOLIB_IRQCHIP
+ if (gc->irq.chip) {
+ /*
+ * Avoid race condition with other code, which tries to lookup
+ * an IRQ before the irqchip has been properly registered,
+ * i.e. while gpiochip is still being brought up.
+ */
+ return -EPROBE_DEFER;
+ }
+#endif
return -ENXIO;
}
EXPORT_SYMBOL_GPL(gpiod_to_irq);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 30bc3f80f83e..c31f4626915d 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -135,6 +135,18 @@ struct gpio_desc {
int gpiod_request(struct gpio_desc *desc, const char *label);
void gpiod_free(struct gpio_desc *desc);
+
+static inline int gpiod_request_user(struct gpio_desc *desc, const char *label)
+{
+ int ret;
+
+ ret = gpiod_request(desc, label);
+ if (ret == -EPROBE_DEFER)
+ ret = -ENODEV;
+
+ return ret;
+}
+
int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
unsigned long lflags, enum gpiod_flags dflags);
int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d8b854fcbffa..9a53a4de2bb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1408,12 +1408,10 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
void amdgpu_acpi_detect(void);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
static inline void amdgpu_acpi_detect(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
@@ -1422,6 +1420,14 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
enum amdgpu_ss ss_state) { return 0; }
#endif
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+#else
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+#endif
+
int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo,
struct amdgpu_bo_va_mapping **mapping);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4811b0faafd9..0e12315fa0cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1031,6 +1031,20 @@ void amdgpu_acpi_detect(void)
}
}
+#if IS_ENABLED(CONFIG_SUSPEND)
+/**
+ * amdgpu_acpi_is_s3_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+{
+ return !(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state == PM_SUSPEND_MEM);
+}
+
/**
* amdgpu_acpi_is_s0ix_active
*
@@ -1040,11 +1054,24 @@ void amdgpu_acpi_detect(void)
*/
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
{
-#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND)
- if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
- if (adev->flags & AMD_IS_APU)
- return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
+ if (!(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+ return false;
+
+ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+ dev_warn_once(adev->dev,
+ "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+ "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+ return false;
}
-#endif
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+ dev_warn_once(adev->dev,
+ "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
return false;
+#else
+ return true;
+#endif /* CONFIG_AMD_PMC */
}
+
+#endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 82011e75ed85..c4387b38229c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1141,7 +1141,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
if (ret)
return ret;
- if (!dev->mode_config.allow_fb_modifiers) {
+ if (!dev->mode_config.allow_fb_modifiers && !adev->enable_virtual_display) {
drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
"GFX9+ requires FB check based on format modifier\n");
ret = check_tiling_flags_gfx6(rfb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b21bcdc97460..0ead08ba58c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1525,6 +1525,87 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
0x99A0,
0x99A2,
0x99A4,
+ /* radeon secondary ids */
+ 0x3171,
+ 0x3e70,
+ 0x4164,
+ 0x4165,
+ 0x4166,
+ 0x4168,
+ 0x4170,
+ 0x4171,
+ 0x4172,
+ 0x4173,
+ 0x496e,
+ 0x4a69,
+ 0x4a6a,
+ 0x4a6b,
+ 0x4a70,
+ 0x4a74,
+ 0x4b69,
+ 0x4b6b,
+ 0x4b6c,
+ 0x4c6e,
+ 0x4e64,
+ 0x4e65,
+ 0x4e66,
+ 0x4e67,
+ 0x4e68,
+ 0x4e69,
+ 0x4e6a,
+ 0x4e71,
+ 0x4f73,
+ 0x5569,
+ 0x556b,
+ 0x556d,
+ 0x556f,
+ 0x5571,
+ 0x5854,
+ 0x5874,
+ 0x5940,
+ 0x5941,
+ 0x5b72,
+ 0x5b73,
+ 0x5b74,
+ 0x5b75,
+ 0x5d44,
+ 0x5d45,
+ 0x5d6d,
+ 0x5d6f,
+ 0x5d72,
+ 0x5d77,
+ 0x5e6b,
+ 0x5e6d,
+ 0x7120,
+ 0x7124,
+ 0x7129,
+ 0x712e,
+ 0x712f,
+ 0x7162,
+ 0x7163,
+ 0x7166,
+ 0x7167,
+ 0x7172,
+ 0x7173,
+ 0x71a0,
+ 0x71a1,
+ 0x71a3,
+ 0x71a7,
+ 0x71bb,
+ 0x71e0,
+ 0x71e1,
+ 0x71e2,
+ 0x71e6,
+ 0x71e7,
+ 0x71f2,
+ 0x7269,
+ 0x726b,
+ 0x726e,
+ 0x72a0,
+ 0x72a8,
+ 0x72b1,
+ 0x72b3,
+ 0x793f,
};
static const struct pci_device_id pciidlist[] = {
@@ -1930,6 +2011,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+ if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
+ amdgpu_aspm = 0;
+
if (amdgpu_virtual_display ||
amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
supports_atomic = true;
@@ -2165,13 +2249,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
static int amdgpu_pmops_prepare(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
if (amdgpu_device_supports_boco(drm_dev))
- return pm_runtime_suspended(dev) &&
- pm_suspend_via_firmware();
+ return pm_runtime_suspended(dev);
+
+ /* if we will not support s3 or s2i for the device
+ * then skip suspend
+ */
+ if (!amdgpu_acpi_is_s0ix_active(adev) &&
+ !amdgpu_acpi_is_s3_active(adev))
+ return 1;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5c3f24069f2a..4655702a5e00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1904,7 +1904,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
unsigned i;
int r;
- if (direct_submit && !ring->sched.ready) {
+ if (!direct_submit && !ring->sched.ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index d99c8779b51e..5224d9a39737 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -391,7 +391,6 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
int index)
{
struct drm_plane *plane;
- uint64_t modifiers[] = {DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID};
int ret;
plane = kzalloc(sizeof(*plane), GFP_KERNEL);
@@ -402,7 +401,7 @@ static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
&amdgpu_vkms_plane_funcs,
amdgpu_vkms_formats,
ARRAY_SIZE(amdgpu_vkms_formats),
- modifiers, type, NULL);
+ NULL, type, NULL);
if (ret) {
kfree(plane);
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b37fc7d7d2c7..418341a67517 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -768,11 +768,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* Check if all VM PDs/PTs are ready for updates
*
* Returns:
- * True if eviction list is empty.
+ * True if VM is not evicting.
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- return list_empty(&vm->evicted);
+ bool ret;
+
+ amdgpu_vm_eviction_lock(vm);
+ ret = !vm->evicting;
+ amdgpu_vm_eviction_unlock(vm);
+
+ return ret && list_empty(&vm->evicted);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index b4eddf6e98a6..ff738e9725ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -543,7 +543,9 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
/* Get SA disabled bitmap from eFuse setting */
efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
@@ -566,6 +568,9 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
disabled_sa = tmp;
WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa);
+ break;
+ default:
+ break;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 38bb42727715..a2f8ed0e6a64 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1140,6 +1140,9 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3))
+ return;
+
adev->mmhub.funcs->get_clockgating(adev, flags);
if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index e8e4749e9c79..f0638db57111 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2057,6 +2057,10 @@ static int sdma_v4_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /* SMU saves SDMA state for us */
+ if (adev->in_s0ix)
+ return 0;
+
return sdma_v4_0_hw_fini(adev);
}
@@ -2064,6 +2068,10 @@ static int sdma_v4_0_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /* SMU restores SDMA state for us */
+ if (adev->in_s0ix)
+ return 0;
+
return sdma_v4_0_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 0fc1747e4a70..12f80fdc1fbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -619,8 +619,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
static int soc15_asic_reset(struct amdgpu_device *adev)
{
/* original raven doesn't have full asic reset */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
- !(adev->apu_flags & AMD_APU_IS_RAVEN2))
+ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
return 0;
switch (soc15_asic_reset_method(adev)) {
@@ -1114,8 +1114,11 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
+ /*
+ * MMHUB PG needs to be disabled for Picasso for
+ * stability reasons.
+ */
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
- AMD_PG_SUPPORT_MMHUB |
AMD_PG_SUPPORT_VCN;
} else {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index ed5385137f48..e27ca3758762 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -24,6 +24,7 @@
#include <linux/hmm.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -224,7 +225,6 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
page = pfn_to_page(pfn);
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
- get_page(page);
lock_page(page);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ea68f3b3a4e9..6d643b4b791d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -25,6 +25,7 @@
#include <linux/hashtable.h>
#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/atomic.h>
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7f9773f8dab6..075429bea427 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3653,7 +3653,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
/* Use GRPH_PFLIP interrupt */
for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT;
- i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1;
+ i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + dc->caps.max_otg_num - 1;
i++) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
if (r) {
@@ -4256,6 +4256,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
}
#endif
+ /* Disable vblank IRQs aggressively for power-saving. */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
@@ -4301,19 +4304,17 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
update_connector_ext_caps(aconnector);
if (psr_feature_enabled)
amdgpu_dm_set_psr_caps(link);
+
+ /* TODO: Fix vblank control helpers to delay PSR entry to allow this when
+ * PSR is also supported.
+ */
+ if (link->psr_settings.psr_feature_enabled)
+ adev_to_drm(adev)->vblank_disable_immediate = false;
}
}
- /*
- * Disable vblank IRQs aggressively for power-saving.
- *
- * TODO: Fix vblank control helpers to delay PSR entry to allow this when PSR
- * is also supported.
- */
- adev_to_drm(adev)->vblank_disable_immediate = !psr_feature_enabled;
-
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index ff5bb152ef49..e6ef36de0825 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -2033,10 +2033,10 @@ static void calculate_bandwidth(
kfree(surface_type);
free_tiling_mode:
kfree(tiling_mode);
-free_yclk:
- kfree(yclk);
free_sclk:
kfree(sclk);
+free_yclk:
+ kfree(yclk);
}
/*******************************************************************************
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index ec19678a0702..e447c74be713 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu(
//input[in_idx].dout.output_standard;
/*todo: soc->sr_enter_plus_exit_time??*/
- dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep;
dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src);
dml1_extract_rq_regs(dml, rq_regs, rq_param);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index f977f29907df..10c7be40dfb0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -473,8 +473,10 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
clk_mgr_base->bw_params->dc_mode_softmax_memclk = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
/* Refresh bounding box */
+ DC_FP_START();
clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+ DC_FP_END();
}
static bool dcn3_is_smu_present(struct clk_mgr *clk_mgr_base)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index 48005def1164..bc4ddc36fe58 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -570,32 +570,32 @@ static struct wm_table lpddr5_wm_table = {
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 7.95,
- .sr_enter_plus_exit_time_us = 9,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.82,
- .sr_enter_plus_exit_time_us = 11.196,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.89,
- .sr_enter_plus_exit_time_us = 11.24,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
- .sr_exit_time_us = 9.748,
- .sr_enter_plus_exit_time_us = 11.102,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 4162ce40089b..9d17c5a5ae01 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -329,38 +329,38 @@ static struct clk_bw_params dcn31_bw_params = {
};
-static struct wm_table ddr4_wm_table = {
+static struct wm_table ddr5_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 6.09,
- .sr_enter_plus_exit_time_us = 7.14,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 10.12,
- .sr_enter_plus_exit_time_us = 11.48,
+ .sr_exit_time_us = 9,
+ .sr_enter_plus_exit_time_us = 11,
.valid = true,
},
}
@@ -687,7 +687,7 @@ void dcn31_clk_mgr_construct(
if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
dcn31_bw_params.wm_table = lpddr5_wm_table;
} else {
- dcn31_bw_params.wm_table = ddr4_wm_table;
+ dcn31_bw_params.wm_table = ddr5_wm_table;
}
/* Saved clocks configured at boot for debug purposes */
dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index a1011f3273f3..de3f4643eeef 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -120,7 +120,11 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
if (result == VBIOSSMC_Result_Failed) {
- ASSERT(0);
+ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
+ param == TABLE_WATERMARKS)
+ DC_LOG_WARNING("Watermarks table not configured properly by SMU");
+ else
+ ASSERT(0);
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
return -1;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 01c8849b9db2..ba1aa994db4b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -985,10 +985,13 @@ static bool dc_construct(struct dc *dc,
goto fail;
#ifdef CONFIG_DRM_AMD_DC_DCN
dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present;
-#endif
- if (dc->res_pool->funcs->update_bw_bounding_box)
+ if (dc->res_pool->funcs->update_bw_bounding_box) {
+ DC_FP_START();
dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
+ DC_FP_END();
+ }
+#endif
/* Creation of current_state must occur after dc->dml
* is initialized in dc_create_resource_pool because
@@ -1220,6 +1223,8 @@ struct dc *dc_create(const struct dc_init_data *init_params)
dc->caps.max_dp_protocol_version = DP_VERSION_1_4;
+ dc->caps.max_otg_num = dc->res_pool->res_cap->num_timing_generator;
+
if (dc->res_pool->dmcu != NULL)
dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version;
}
@@ -1404,20 +1409,34 @@ static void program_timing_sync(
status->timing_sync_info.master = false;
}
- /* remove any other unblanked pipes as they have already been synced */
- for (j = j + 1; j < group_size; j++) {
- bool is_blanked;
- if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
- is_blanked =
- pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
- else
- is_blanked =
- pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
- if (!is_blanked) {
- group_size--;
- pipe_set[j] = pipe_set[group_size];
- j--;
+ /* remove any other pipes that are already been synced */
+ if (dc->config.use_pipe_ctx_sync_logic) {
+ /* check pipe's syncd to decide which pipe to be removed */
+ for (j = 1; j < group_size; j++) {
+ if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ } else
+ /* link slave pipe's syncd with master pipe */
+ pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
+ }
+ } else {
+ for (j = j + 1; j < group_size; j++) {
+ bool is_blanked;
+
+ if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked)
+ is_blanked =
+ pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp);
+ else
+ is_blanked =
+ pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg);
+ if (!is_blanked) {
+ group_size--;
+ pipe_set[j] = pipe_set[group_size];
+ j--;
+ }
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 05e216524370..61b8f29a0c30 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -202,7 +202,7 @@ void dp_wait_for_training_aux_rd_interval(
uint32_t wait_in_micro_secs)
{
#if defined(CONFIG_DRM_AMD_DC_DCN)
- if (wait_in_micro_secs > 16000)
+ if (wait_in_micro_secs > 1000)
msleep(wait_in_micro_secs/1000);
else
udelay(wait_in_micro_secs);
@@ -5597,6 +5597,26 @@ static bool retrieve_link_cap(struct dc_link *link)
dp_hw_fw_revision.ieee_fw_rev,
sizeof(dp_hw_fw_revision.ieee_fw_rev));
+ /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */
+ {
+ uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 };
+ uint8_t fwrev_mbp_2018[] = { 7, 4 };
+ uint8_t fwrev_mbp_2018_vega[] = { 8, 4 };
+
+ /* We also check for the firmware revision as 16,1 models have an
+ * identical device id and are incorrectly quirked otherwise.
+ */
+ if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018,
+ sizeof(str_mbp_2018)) &&
+ (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018,
+ sizeof(fwrev_mbp_2018)) ||
+ !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega,
+ sizeof(fwrev_mbp_2018_vega)))) {
+ link->reported_link_cap.link_rate = LINK_RATE_RBR2;
+ }
+ }
+
memset(&link->dpcd_caps.dsc_caps, '\0',
sizeof(link->dpcd_caps.dsc_caps));
memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap));
@@ -6935,7 +6955,7 @@ bool dpcd_write_128b_132b_sst_payload_allocation_table(
}
}
retries++;
- udelay(5000);
+ msleep(5);
}
if (!result && retries == max_retries) {
@@ -6987,7 +7007,7 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link)
break;
}
- udelay(5000);
+ msleep(5);
}
if (result == ACT_FAILED) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index d4ff6cc6b8d9..18757c158523 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1964,10 +1964,6 @@ enum dc_status dc_remove_stream_from_ctx(
dc->res_pool,
del_pipe->stream_res.stream_enc,
false);
- /* Release link encoder from stream in new dc_state. */
- if (dc->res_pool->funcs->link_enc_unassign)
- dc->res_pool->funcs->link_enc_unassign(new_ctx, del_pipe->stream);
-
#if defined(CONFIG_DRM_AMD_DC_DCN)
if (is_dp_128b_132b_signal(del_pipe)) {
update_hpo_dp_stream_engine_usage(
@@ -3217,6 +3213,60 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
}
#endif
+void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd;
+
+ /* If pipe backend is reset, need to reset pipe syncd status */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i];
+ pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx_old->stream)
+ continue;
+
+ if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+ continue;
+
+ if (!pipe_ctx->stream ||
+ pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
+
+ /* Reset all the syncd pipes from the disabled pipe */
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j);
+ }
+ }
+ }
+}
+
+void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc,
+ struct dc_state *context,
+ uint8_t disabled_master_pipe_idx)
+{
+ int i;
+ struct pipe_ctx *pipe_ctx, *pipe_ctx_check;
+
+ pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx];
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) ||
+ !IS_PIPE_SYNCD_VALID(pipe_ctx))
+ SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx);
+
+ /* for the pipe disabled, check if any slave pipe exists and assert */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx_check = &context->res_ctx.pipe_ctx[i];
+
+ if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) &&
+ IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx))
+ DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n",
+ i, disabled_master_pipe_idx);
+ }
+}
+
uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter)
{
/* TODO - get transmitter to phy idx mapping from DMUB */
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index da2c78ce14d6..b51864890621 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -202,6 +202,7 @@ struct dc_caps {
bool edp_dsc_support;
bool vbios_lttpr_aware;
bool vbios_lttpr_enable;
+ uint32_t max_otg_num;
};
struct dc_bug_wa {
@@ -344,6 +345,7 @@ struct dc_config {
uint8_t vblank_alignment_max_frame_time_diff;
bool is_asymmetric_memory;
bool is_single_rank_dimm;
+ bool use_pipe_ctx_sync_logic;
};
enum visual_confirm {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 78192ecba102..eb2755bdb30e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1566,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw(
&pipe_ctx->stream->audio_info);
}
+ /* make sure no pipes syncd to the pipe being enabled */
+ if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic)
+ check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx);
+
#if defined(CONFIG_DRM_AMD_DC_DCN)
/* DCN3.1 FPGA Workaround
* Need to enable HPO DP Stream Encoder before setting OTG master enable.
@@ -1604,11 +1608,6 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx->stream_res.stream_enc,
pipe_ctx->stream_res.tg->inst);
- if (dc_is_dp_signal(pipe_ctx->stream->signal) &&
- pipe_ctx->stream_res.stream_enc->funcs->reset_fifo)
- pipe_ctx->stream_res.stream_enc->funcs->reset_fifo(
- pipe_ctx->stream_res.stream_enc);
-
if (dc_is_dp_signal(pipe_ctx->stream->signal))
dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
@@ -1835,9 +1834,29 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
break;
}
}
- // We are trying to enable eDP, don't power down VDD
- if (can_apply_edp_fast_boot)
+
+ /*
+ * TO-DO: So far the code logic below only addresses single eDP case.
+ * For dual eDP case, there are a few things that need to be
+ * implemented first:
+ *
+ * 1. Change the fastboot logic above, so eDP link[0 or 1]'s
+ * stream[0 or 1] will all be checked.
+ *
+ * 2. Change keep_edp_vdd_on to an array, and maintain keep_edp_vdd_on
+ * for each eDP.
+ *
+ * Once above 2 things are completed, we can then change the logic below
+ * correspondingly, so dual eDP case will be fully covered.
+ */
+
+ // We are trying to enable eDP, don't power down VDD if eDP stream is existing
+ if ((edp_stream_num == 1 && edp_streams[0] != NULL) || can_apply_edp_fast_boot) {
keep_edp_vdd_on = true;
+ DC_LOG_EVENT_LINK_TRAINING("Keep eDP Vdd on\n");
+ } else {
+ DC_LOG_EVENT_LINK_TRAINING("No eDP stream enabled, turn eDP Vdd off\n");
+ }
}
// Check seamless boot support
@@ -2297,6 +2316,10 @@ enum dc_status dce110_apply_ctx_to_hw(
enum dc_status status;
int i;
+ /* reset syncd pipes from disabled pipes */
+ if (dc->config.use_pipe_ctx_sync_logic)
+ reset_syncd_pipes_from_disabled_pipes(dc, context);
+
/* Reset old context */
/* look up the targets that have been removed since last commit */
hws->funcs.reset_hw_ctx_wrap(dc, context);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index bf4436d7aaab..b0c08ee6bc2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -902,19 +902,6 @@ void enc1_stream_encoder_stop_dp_info_packets(
}
-void enc1_stream_encoder_reset_fifo(
- struct stream_encoder *enc)
-{
- struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
-
- /* set DIG_START to 0x1 to reset FIFO */
- REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
- udelay(100);
-
- /* write 0 to take the FIFO out of reset */
- REG_UPDATE(DIG_FE_CNTL, DIG_START, 0);
-}
-
void enc1_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc)
@@ -1600,8 +1587,6 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
enc1_stream_encoder_send_immediate_sdp_message,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
- .reset_fifo =
- enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index a146a41f68e9..687d7e4bf7ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -626,9 +626,6 @@ void enc1_stream_encoder_send_immediate_sdp_message(
void enc1_stream_encoder_stop_dp_info_packets(
struct stream_encoder *enc);
-void enc1_stream_encoder_reset_fifo(
- struct stream_encoder *enc);
-
void enc1_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 2bc93df023ad..2a72517e2b28 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
index 8a70f92795c2..aab25ca8343a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
@@ -593,8 +593,6 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = {
enc1_stream_encoder_send_immediate_sdp_message,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
- .reset_fifo =
- enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
index 8daa12730bc1..a04ca4a98392 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
@@ -789,8 +789,6 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = {
enc3_stream_encoder_update_dp_info_packets,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
- .reset_fifo =
- enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 602ec9a08549..8ca26383b568 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -1878,7 +1878,6 @@ noinline bool dcn30_internal_validate_bw(
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
- DC_FP_START();
if (!pipe_cnt) {
out = true;
goto validate_out;
@@ -2104,7 +2103,6 @@ validate_fail:
out = false;
validate_out:
- DC_FP_END();
return out;
}
@@ -2306,7 +2304,9 @@ bool dcn30_validate_bandwidth(struct dc *dc,
BW_VAL_TRACE_COUNT();
+ DC_FP_START();
out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
+ DC_FP_END();
if (pipe_cnt == 0)
goto validate_out;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index c1c6e602b06c..5d9637b07429 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_clock_gate = true,
.disable_pplib_clock_request = true,
.disable_pplib_wm_range = true,
- .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
@@ -1380,6 +1380,17 @@ static void set_wm_ranges(
pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges);
}
+static void dcn301_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ DC_FP_START();
+ dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
+ DC_FP_END();
+}
+
static struct resource_funcs dcn301_res_pool_funcs = {
.destroy = dcn301_destroy_resource_pool,
.link_enc_create = dcn301_link_encoder_create,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
index 90c73a1cb986..5e3bcaf12cac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
@@ -138,8 +138,11 @@ static uint32_t convert_and_clamp(
ret_val = wm_ns * refclk_mhz;
ret_val /= 1000;
- if (ret_val > clamp_value)
+ if (ret_val > clamp_value) {
+ /* clamping WMs is abnormal, unexpected and may lead to underflow*/
+ ASSERT(0);
ret_val = clamp_value;
+ }
return ret_val;
}
@@ -159,7 +162,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->a.urgent_ns > hubbub2->watermarks.a.urgent_ns) {
hubbub2->watermarks.a.urgent_ns = watermarks->a.urgent_ns;
prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
@@ -193,7 +196,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->a.urgent_latency_ns > hubbub2->watermarks.a.urgent_latency_ns) {
hubbub2->watermarks.a.urgent_latency_ns = watermarks->a.urgent_latency_ns;
prog_wm_value = convert_and_clamp(watermarks->a.urgent_latency_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0,
DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, prog_wm_value);
} else if (watermarks->a.urgent_latency_ns < hubbub2->watermarks.a.urgent_latency_ns)
@@ -203,7 +206,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->b.urgent_ns > hubbub2->watermarks.b.urgent_ns) {
hubbub2->watermarks.b.urgent_ns = watermarks->b.urgent_ns;
prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0,
DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value);
@@ -237,7 +240,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->b.urgent_latency_ns > hubbub2->watermarks.b.urgent_latency_ns) {
hubbub2->watermarks.b.urgent_latency_ns = watermarks->b.urgent_latency_ns;
prog_wm_value = convert_and_clamp(watermarks->b.urgent_latency_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0,
DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, prog_wm_value);
} else if (watermarks->b.urgent_latency_ns < hubbub2->watermarks.b.urgent_latency_ns)
@@ -247,7 +250,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->c.urgent_ns > hubbub2->watermarks.c.urgent_ns) {
hubbub2->watermarks.c.urgent_ns = watermarks->c.urgent_ns;
prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, 0,
DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value);
@@ -281,7 +284,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->c.urgent_latency_ns > hubbub2->watermarks.c.urgent_latency_ns) {
hubbub2->watermarks.c.urgent_latency_ns = watermarks->c.urgent_latency_ns;
prog_wm_value = convert_and_clamp(watermarks->c.urgent_latency_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, 0,
DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, prog_wm_value);
} else if (watermarks->c.urgent_latency_ns < hubbub2->watermarks.c.urgent_latency_ns)
@@ -291,7 +294,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->d.urgent_ns > hubbub2->watermarks.d.urgent_ns) {
hubbub2->watermarks.d.urgent_ns = watermarks->d.urgent_ns;
prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, 0,
DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value);
@@ -325,7 +328,7 @@ static bool hubbub31_program_urgent_watermarks(
if (safe_to_lower || watermarks->d.urgent_latency_ns > hubbub2->watermarks.d.urgent_latency_ns) {
hubbub2->watermarks.d.urgent_latency_ns = watermarks->d.urgent_latency_ns;
prog_wm_value = convert_and_clamp(watermarks->d.urgent_latency_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0x3fff);
REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, 0,
DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, prog_wm_value);
} else if (watermarks->d.urgent_latency_ns < hubbub2->watermarks.d.urgent_latency_ns)
@@ -351,7 +354,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
@@ -367,7 +370,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
@@ -383,7 +386,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_A calculated =%d\n"
@@ -399,7 +402,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_A calculated =%d\n"
@@ -416,7 +419,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
@@ -432,7 +435,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
@@ -448,7 +451,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_B calculated =%d\n"
@@ -464,7 +467,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_B calculated =%d\n"
@@ -481,7 +484,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
@@ -497,7 +500,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
@@ -513,7 +516,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_C calculated =%d\n"
@@ -529,7 +532,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_C calculated =%d\n"
@@ -546,7 +549,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
@@ -562,7 +565,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
@@ -578,7 +581,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_D calculated =%d\n"
@@ -594,7 +597,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_D calculated =%d\n"
@@ -625,7 +628,7 @@ static bool hubbub31_program_pstate_watermarks(
watermarks->a.cstate_pstate.pstate_change_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.pstate_change_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
@@ -642,7 +645,7 @@ static bool hubbub31_program_pstate_watermarks(
watermarks->b.cstate_pstate.pstate_change_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.pstate_change_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
@@ -659,7 +662,7 @@ static bool hubbub31_program_pstate_watermarks(
watermarks->c.cstate_pstate.pstate_change_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.pstate_change_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
@@ -676,7 +679,7 @@ static bool hubbub31_program_pstate_watermarks(
watermarks->d.cstate_pstate.pstate_change_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.pstate_change_ns,
- refclk_mhz, 0x1fffff);
+ refclk_mhz, 0xffff);
REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 0,
DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_D calculated =%d\n"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 42ed47e8133d..8d64187478e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -2260,6 +2260,9 @@ static bool dcn31_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
/* read VBIOS LTTPR caps */
{
if (ctx->dc_bios->funcs->get_lttpr_caps) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 246071c72f6b..548cdef8a8ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 015e7f2c0b16..0fc9f3e3ffae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index 8bc27de4c104..618f4b682ab1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg(
mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index aef854270054..747167083dea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -1858,8 +1858,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib,
e2e_pipe_param,
num_pipes);
- dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
- / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 94c32832a0e7..0a7a33864973 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -327,7 +327,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index fc7065d17842..774b0fdfc80b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i,
void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-void dcn301_calculate_wm_and_dlg(struct dc *dc,
+void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index d46a2733024c..8f9f1d607f7c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -546,7 +546,6 @@ struct _vcs_dpi_display_dlg_sys_params_st {
double t_sr_wm_us;
double t_extra_us;
double mem_trip_us;
- double t_srx_delay_us;
double deepsleep_dcfclk_mhz;
double total_flip_bw;
unsigned int total_flip_bytes;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
index 71ea503cb32f..412e75eb4704 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
@@ -142,9 +142,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v
dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us);
dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us);
dml_print(
- "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n",
- dlg_sys_param->t_srx_delay_us);
- dml_print(
"DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n",
dlg_sys_param->deepsleep_dcfclk_mhz);
dml_print(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 59dc2c5b58dd..3df559c591f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params(
if (dual_plane)
DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c);
- DTRACE(
- "DLG: %s: t_srx_delay_us = %3.2f",
- __func__,
- (double) dlg_sys_param->t_srx_delay_us);
DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us);
DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset);
DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 890280026e69..943240e2809e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -382,6 +382,7 @@ struct pipe_ctx {
struct pll_settings pll_settings;
uint8_t pipe_idx;
+ uint8_t pipe_idx_syncd;
struct pipe_ctx *top_pipe;
struct pipe_ctx *bottom_pipe;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 073f8b667eff..c88e113b94d1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -164,10 +164,6 @@ struct stream_encoder_funcs {
void (*stop_dp_info_packets)(
struct stream_encoder *enc);
- void (*reset_fifo)(
- struct stream_encoder *enc
- );
-
void (*dp_blank)(
struct dc_link *link,
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 4249bf306e09..dbfe6690ded8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -34,6 +34,10 @@
#define MEMORY_TYPE_HBM 2
+#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0)
+#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F)
+#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd))
+
enum dce_version resource_parse_asic_id(
struct hw_asic_id asic_id);
@@ -208,6 +212,13 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
const struct dc_link *link);
#endif
+void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
+ struct dc_state *context);
+
+void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc,
+ struct dc_state *context,
+ uint8_t disabled_master_pipe_idx);
+
uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter);
#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index e2cae97f4ff1..48cc009d9bdf 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3462,8 +3462,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr ||
attr == &sensor_dev_attr_power2_cap.dev_attr.attr ||
attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr ||
- attr == &sensor_dev_attr_power2_label.dev_attr.attr ||
- attr == &sensor_dev_attr_power1_label.dev_attr.attr))
+ attr == &sensor_dev_attr_power2_label.dev_attr.attr))
return 0;
return effective_mode;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 777f717c37ae..5488a0edb942 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -421,6 +421,36 @@ static int sienna_cichlid_store_powerplay_table(struct smu_context *smu)
return 0;
}
+static int sienna_cichlid_patch_pptable_quirk(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ uint32_t *board_reserved;
+ uint16_t *freq_table_gfx;
+ uint32_t i;
+
+ /* Fix some OEM SKU specific stability issues */
+ GET_PPTABLE_MEMBER(BoardReserved, &board_reserved);
+ if ((adev->pdev->device == 0x73DF) &&
+ (adev->pdev->revision == 0XC3) &&
+ (adev->pdev->subsystem_device == 0x16C2) &&
+ (adev->pdev->subsystem_vendor == 0x1043))
+ board_reserved[0] = 1387;
+
+ GET_PPTABLE_MEMBER(FreqTableGfx, &freq_table_gfx);
+ if ((adev->pdev->device == 0x73DF) &&
+ (adev->pdev->revision == 0XC3) &&
+ ((adev->pdev->subsystem_device == 0x16C2) ||
+ (adev->pdev->subsystem_device == 0x133C)) &&
+ (adev->pdev->subsystem_vendor == 0x1043)) {
+ for (i = 0; i < NUM_GFXCLK_DPM_LEVELS; i++) {
+ if (freq_table_gfx[i] > 2500)
+ freq_table_gfx[i] = 2500;
+ }
+ }
+
+ return 0;
+}
+
static int sienna_cichlid_setup_pptable(struct smu_context *smu)
{
int ret = 0;
@@ -441,7 +471,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context *smu)
if (ret)
return ret;
- return ret;
+ return sienna_cichlid_patch_pptable_quirk(smu);
}
static int sienna_cichlid_tables_init(struct smu_context *smu)
@@ -1238,21 +1268,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu)
&dpm_context->dpm_tables.soc_table;
struct smu_umd_pstate_table *pstate_table =
&smu->pstate_table;
+ struct amdgpu_device *adev = smu->adev;
pstate_table->gfxclk_pstate.min = gfx_table->min;
pstate_table->gfxclk_pstate.peak = gfx_table->max;
- if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK)
- pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
pstate_table->uclk_pstate.min = mem_table->min;
pstate_table->uclk_pstate.peak = mem_table->max;
- if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK)
- pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
pstate_table->socclk_pstate.min = soc_table->min;
pstate_table->socclk_pstate.peak = soc_table->max;
- if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK)
+
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
+ pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
+ pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
pstate_table->socclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK;
+ break;
+ case CHIP_DIMGREY_CAVEFISH:
+ pstate_table->gfxclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK;
+ pstate_table->uclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK;
+ pstate_table->socclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK;
+ break;
+ case CHIP_BEIGE_GOBY:
+ pstate_table->gfxclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK;
+ pstate_table->uclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK;
+ pstate_table->socclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK;
+ break;
+ default:
+ break;
+ }
return 0;
}
@@ -3696,14 +3742,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu)
{
- struct smu_table_context *table_context = &smu->smu_table;
- PPTable_t *smc_pptable = table_context->driver_pptable;
+ uint16_t *mgpu_fan_boost_limit_rpm;
+ GET_PPTABLE_MEMBER(MGpuFanBoostLimitRpm, &mgpu_fan_boost_limit_rpm);
/*
* Skip the MGpuFanBoost setting for those ASICs
* which do not support it
*/
- if (!smc_pptable->MGpuFanBoostLimitRpm)
+ if (*mgpu_fan_boost_limit_rpm == 0)
return 0;
return smu_cmn_send_smc_msg_with_param(smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
index 38cd0ece24f6..42f705c7a36f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
@@ -33,6 +33,14 @@ typedef enum {
#define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK 960
#define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK 1000
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK 1950
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK 960
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK 676
+
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK 2200
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK 960
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK 1000
+
extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu);
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index caf1775d48ef..0bc84b709a93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -282,14 +282,9 @@ static int yellow_carp_post_smu_init(struct smu_context *smu)
static int yellow_carp_mode_reset(struct smu_context *smu, int type)
{
- int ret = 0, index = 0;
-
- index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
- SMU_MSG_GfxDeviceDriverReset);
- if (index < 0)
- return index == -EACCES ? 0 : index;
+ int ret = 0;
- ret = smu_cmn_send_smc_msg_with_param(smu, (uint16_t)index, type, NULL);
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, type, NULL);
if (ret)
dev_err(smu->adev->dev, "Failed to mode reset!\n");
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
index 58a242871b28..6e3f1d600541 100644
--- a/drivers/gpu/drm/arm/Kconfig
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -6,6 +6,7 @@ config DRM_HDLCD
depends on DRM && OF && (ARM || ARM64 || COMPILE_TEST)
depends on COMMON_CLK
select DRM_KMS_HELPER
+ select DRM_GEM_CMA_HELPER
help
Choose this option if you have an ARM High Definition Colour LCD
controller.
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index d9eb353a4bf0..dbe1cc620f6e 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = {
};
static const struct ast_vbios_enhtable res_1600x900[] = {
- {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */
- (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A },
{1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */
(SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
AST2500PreCatchCRT), 60, 1, 0x3A },
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 61db5a66b493..44ad70939663 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -8,7 +8,6 @@ config DRM_BRIDGE
config DRM_PANEL_BRIDGE
def_bool y
depends on DRM_BRIDGE
- depends on DRM_KMS_HELPER
select DRM_PANEL
help
DRM bridge wrapper of DRM panels
@@ -30,6 +29,7 @@ config DRM_CDNS_DSI
config DRM_CHIPONE_ICN6211
tristate "Chipone ICN6211 MIPI-DSI/RGB Converter bridge"
depends on OF
+ select DRM_KMS_HELPER
select DRM_MIPI_DSI
select DRM_PANEL_BRIDGE
help
diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c
index a7389a0facfb..af07eeb47ca0 100644
--- a/drivers/gpu/drm/bridge/nwl-dsi.c
+++ b/drivers/gpu/drm/bridge/nwl-dsi.c
@@ -7,6 +7,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/irq.h>
#include <linux/math64.h>
@@ -196,12 +197,9 @@ static u32 ps2bc(struct nwl_dsi *dsi, unsigned long long ps)
/*
* ui2bc - UI time periods to byte clock cycles
*/
-static u32 ui2bc(struct nwl_dsi *dsi, unsigned long long ui)
+static u32 ui2bc(unsigned int ui)
{
- u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
-
- return DIV64_U64_ROUND_UP(ui * dsi->lanes,
- dsi->mode.clock * 1000 * bpp);
+ return DIV_ROUND_UP(ui, BITS_PER_BYTE);
}
/*
@@ -232,12 +230,12 @@ static int nwl_dsi_config_host(struct nwl_dsi *dsi)
}
/* values in byte clock cycles */
- cycles = ui2bc(dsi, cfg->clk_pre);
+ cycles = ui2bc(cfg->clk_pre);
DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_pre: 0x%x\n", cycles);
nwl_dsi_write(dsi, NWL_DSI_CFG_T_PRE, cycles);
cycles = ps2bc(dsi, cfg->lpx + cfg->clk_prepare + cfg->clk_zero);
DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_tx_gap (pre): 0x%x\n", cycles);
- cycles += ui2bc(dsi, cfg->clk_pre);
+ cycles += ui2bc(cfg->clk_pre);
DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_post: 0x%x\n", cycles);
nwl_dsi_write(dsi, NWL_DSI_CFG_T_POST, cycles);
cycles = ps2bc(dsi, cfg->hs_exit);
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index dab8f76618f3..68d8415e6c28 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1802,6 +1802,7 @@ static inline void ti_sn_gpio_unregister(void) {}
static void ti_sn65dsi86_runtime_disable(void *data)
{
+ pm_runtime_dont_use_autosuspend(data);
pm_runtime_disable(data);
}
@@ -1861,11 +1862,11 @@ static int ti_sn65dsi86_probe(struct i2c_client *client,
"failed to get reference clock\n");
pm_runtime_enable(dev);
+ pm_runtime_set_autosuspend_delay(pdata->dev, 500);
+ pm_runtime_use_autosuspend(pdata->dev);
ret = devm_add_action_or_reset(dev, ti_sn65dsi86_runtime_disable, dev);
if (ret)
return ret;
- pm_runtime_set_autosuspend_delay(pdata->dev, 500);
- pm_runtime_use_autosuspend(pdata->dev);
ti_sn65dsi86_debugfs_init(pdata);
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 21174efd91be..88cd992df356 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1327,8 +1327,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
drm_dbg_atomic(dev, "checking %p\n", state);
- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
- requested_crtc |= drm_crtc_mask(crtc);
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ if (new_crtc_state->enable)
+ requested_crtc |= drm_crtc_mask(crtc);
+ }
for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
ret = drm_atomic_plane_check(old_plane_state, new_plane_state);
@@ -1377,8 +1379,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
}
}
- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
- affected_crtc |= drm_crtc_mask(crtc);
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ if (new_crtc_state->enable)
+ affected_crtc |= drm_crtc_mask(crtc);
+ }
/*
* For commits that allow modesets drivers can add other CRTCs to the
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 9781722519c3..54d62fdb4ef9 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -76,15 +76,17 @@ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state,
state->mode_blob = NULL;
if (mode) {
+ struct drm_property_blob *blob;
+
drm_mode_convert_to_umode(&umode, mode);
- state->mode_blob =
- drm_property_create_blob(state->crtc->dev,
- sizeof(umode),
- &umode);
- if (IS_ERR(state->mode_blob))
- return PTR_ERR(state->mode_blob);
+ blob = drm_property_create_blob(crtc->dev,
+ sizeof(umode), &umode);
+ if (IS_ERR(blob))
+ return PTR_ERR(blob);
drm_mode_copy(&state->mode, mode);
+
+ state->mode_blob = blob;
state->enable = true;
drm_dbg_atomic(crtc->dev,
"Set [MODE:%s] for [CRTC:%d:%s] state %p\n",
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index f19d9acbe959..50b8a088f763 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -27,11 +27,11 @@
/*
* Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
-
#include <linux/dma-buf-map.h>
#include <linux/export.h>
#include <linux/highmem.h>
#include <linux/cc_platform.h>
+#include <linux/ioport.h>
#include <xen/xen.h>
#include <drm/drm_cache.h>
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index a50c82bc2b2f..76a8c707c34b 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -2330,6 +2330,9 @@ EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal);
void drm_connector_set_vrr_capable_property(
struct drm_connector *connector, bool capable)
{
+ if (!connector->vrr_capable_property)
+ return;
+
drm_object_property_set_value(&connector->base,
connector->vrr_capable_property,
capable);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 12893e7be89b..f5f5de362ff2 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -5345,6 +5345,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
return quirks;
+ info->color_formats |= DRM_COLOR_FORMAT_RGB444;
drm_parse_cea_ext(connector, edid);
/*
@@ -5393,7 +5394,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n",
connector->name, info->bpc);
- info->color_formats |= DRM_COLOR_FORMAT_RGB444;
if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444)
info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index cefd0cbf9deb..dc275c466c9c 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -512,6 +512,7 @@ int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *
*/
vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node);
vma->vm_flags &= ~VM_PFNMAP;
+ vma->vm_flags |= VM_DONTEXPAND;
if (cma_obj->map_noncoherent) {
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 042bb80383c9..b910978d3e48 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -115,6 +115,12 @@ static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = {
.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
};
+static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = {
+ .width = 1600,
+ .height = 2560,
+ .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP,
+};
+
static const struct dmi_system_id orientation_data[] = {
{ /* Acer One 10 (S1003) */
.matches = {
@@ -275,6 +281,12 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"),
},
.driver_data = (void *)&onegx1_pro,
+ }, { /* OneXPlayer */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"),
+ },
+ .driver_data = (void *)&lcd1600x2560_leftside_up,
}, { /* Samsung GalaxyBook 10.6 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
index beaf99e9120a..b688841c18e4 100644
--- a/drivers/gpu/drm/drm_privacy_screen.c
+++ b/drivers/gpu/drm/drm_privacy_screen.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(drm_privacy_screen_get_state);
*
* The notifier is called with no locks held. The new hw_state and sw_state
* can be retrieved using the drm_privacy_screen_get_state() function.
- * A pointer to the drm_privacy_screen's struct is passed as the void *data
+ * A pointer to the drm_privacy_screen's struct is passed as the ``void *data``
* argument of the notifier_block's notifier_call.
*
* The notifier will NOT be called when changes are made through
diff --git a/drivers/gpu/drm/drm_privacy_screen_x86.c b/drivers/gpu/drm/drm_privacy_screen_x86.c
index a2cafb294ca6..e7aa74ad0b24 100644
--- a/drivers/gpu/drm/drm_privacy_screen_x86.c
+++ b/drivers/gpu/drm/drm_privacy_screen_x86.c
@@ -33,6 +33,9 @@ static bool __init detect_thinkpad_privacy_screen(void)
unsigned long long output;
acpi_status status;
+ if (acpi_disabled)
+ return false;
+
/* Get embedded-controller handle */
status = acpi_get_devices("PNP0C09", acpi_set_handle, NULL, &ec_handle);
if (ACPI_FAILURE(status) || !ec_handle)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index b03c20c14ca1..a17313282e8b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K ||
- args->nr_bos > SZ_64K || args->nr_pmrs > 128) {
+ if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K ||
+ args->nr_bos > SZ_128K || args->nr_pmrs > 128) {
DRM_ERROR("submit arguments out of size limits\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 12571ac45540..c04264f70ad1 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -678,7 +678,6 @@ static int decon_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct decon_context *ctx;
struct device_node *i80_if_timings;
- struct resource *res;
int ret;
if (!dev->of_node)
@@ -728,16 +727,11 @@ static int decon_probe(struct platform_device *pdev)
goto err_iounmap;
}
- res = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
- ctx->i80_if ? "lcd_sys" : "vsync");
- if (!res) {
- dev_err(dev, "irq request failed.\n");
- ret = -ENXIO;
+ ret = platform_get_irq_byname(pdev, ctx->i80_if ? "lcd_sys" : "vsync");
+ if (ret < 0)
goto err_iounmap;
- }
- ret = devm_request_irq(dev, res->start, decon_irq_handler,
- 0, "drm_decon", ctx);
+ ret = devm_request_irq(dev, ret, decon_irq_handler, 0, "drm_decon", ctx);
if (ret) {
dev_err(dev, "irq request failed.\n");
goto err_iounmap;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 32a36572b894..d13f5e3a030d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1334,8 +1334,10 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi,
int ret;
int te_gpio_irq;
- dsi->te_gpio = devm_gpiod_get_optional(dsi->dev, "te", GPIOD_IN);
- if (IS_ERR(dsi->te_gpio)) {
+ dsi->te_gpio = gpiod_get_optional(panel, "te", GPIOD_IN);
+ if (!dsi->te_gpio) {
+ return 0;
+ } else if (IS_ERR(dsi->te_gpio)) {
dev_err(dsi->dev, "gpio request failed with %ld\n",
PTR_ERR(dsi->te_gpio));
return PTR_ERR(dsi->te_gpio);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 023f54ee61a8..0ee32e4b1e43 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1267,7 +1267,6 @@ static int fimc_probe(struct platform_device *pdev)
struct exynos_drm_ipp_formats *formats;
struct device *dev = &pdev->dev;
struct fimc_context *ctx;
- struct resource *res;
int ret;
int i, j, num_limits, num_formats;
@@ -1330,14 +1329,12 @@ static int fimc_probe(struct platform_device *pdev)
return PTR_ERR(ctx->regs);
/* resource irq */
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res) {
- dev_err(dev, "failed to request irq resource.\n");
- return -ENOENT;
- }
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ return ret;
- ret = devm_request_irq(dev, res->start, fimc_irq_handler,
- 0, dev_name(dev), ctx);
+ ret = devm_request_irq(dev, ret, fimc_irq_handler,
+ 0, dev_name(dev), ctx);
if (ret < 0) {
dev_err(dev, "failed to request irq.\n");
return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index c735e53939d8..7d5a483a54de 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -1133,7 +1133,6 @@ static int fimd_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct fimd_context *ctx;
struct device_node *i80_if_timings;
- struct resource *res;
int ret;
if (!dev->of_node)
@@ -1206,15 +1205,11 @@ static int fimd_probe(struct platform_device *pdev)
if (IS_ERR(ctx->regs))
return PTR_ERR(ctx->regs);
- res = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
- ctx->i80_if ? "lcd_sys" : "vsync");
- if (!res) {
- dev_err(dev, "irq request failed.\n");
- return -ENXIO;
- }
+ ret = platform_get_irq_byname(pdev, ctx->i80_if ? "lcd_sys" : "vsync");
+ if (ret < 0)
+ return ret;
- ret = devm_request_irq(dev, res->start, fimd_irq_handler,
- 0, "drm_fimd", ctx);
+ ret = devm_request_irq(dev, ret, fimd_irq_handler, 0, "drm_fimd", ctx);
if (ret) {
dev_err(dev, "irq request failed.\n");
return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 166a80262896..964dceb28c1e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1220,7 +1220,6 @@ static int gsc_probe(struct platform_device *pdev)
struct gsc_driverdata *driver_data;
struct exynos_drm_ipp_formats *formats;
struct gsc_context *ctx;
- struct resource *res;
int num_formats, ret, i, j;
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
@@ -1275,13 +1274,10 @@ static int gsc_probe(struct platform_device *pdev)
return PTR_ERR(ctx->regs);
/* resource irq */
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res) {
- dev_err(dev, "failed to request irq resource.\n");
- return -ENOENT;
- }
+ ctx->irq = platform_get_irq(pdev, 0);
+ if (ctx->irq < 0)
+ return ctx->irq;
- ctx->irq = res->start;
ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0,
dev_name(dev), ctx);
if (ret < 0) {
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 41c54f1f60bc..e5204be86093 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -809,19 +809,17 @@ static int mixer_resources_init(struct mixer_context *mixer_ctx)
return -ENXIO;
}
- res = platform_get_resource(mixer_ctx->pdev, IORESOURCE_IRQ, 0);
- if (res == NULL) {
- dev_err(dev, "get interrupt resource failed.\n");
- return -ENXIO;
- }
+ ret = platform_get_irq(mixer_ctx->pdev, 0);
+ if (ret < 0)
+ return ret;
+ mixer_ctx->irq = ret;
- ret = devm_request_irq(dev, res->start, mixer_irq_handler,
- 0, "drm_mixer", mixer_ctx);
+ ret = devm_request_irq(dev, mixer_ctx->irq, mixer_irq_handler,
+ 0, "drm_mixer", mixer_ctx);
if (ret) {
dev_err(dev, "request interrupt failed.\n");
return ret;
}
- mixer_ctx->irq = res->start;
return 0;
}
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index a4c94dc2e216..cfd932514da2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -101,6 +101,7 @@ config DRM_I915_USERPTR
config DRM_I915_GVT
bool "Enable Intel GVT-g graphics virtualization host support"
depends on DRM_I915
+ depends on X86
depends on 64BIT
default n
help
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 2da4aacc956b..8ac196e814d5 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -825,6 +825,7 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
unsigned int max_bw_point = 0, max_bw = 0;
unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
+ bool changed = false;
u32 mask = 0;
/* FIXME earlier gens need some checks too */
@@ -868,6 +869,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
new_bw_state->data_rate[crtc->pipe] = new_data_rate;
new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+ changed = true;
+
drm_dbg_kms(&dev_priv->drm,
"pipe %c data rate %u num active planes %u\n",
pipe_name(crtc->pipe),
@@ -875,7 +878,19 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
new_bw_state->num_active_planes[crtc->pipe]);
}
- if (!new_bw_state)
+ old_bw_state = intel_atomic_get_old_bw_state(state);
+ new_bw_state = intel_atomic_get_new_bw_state(state);
+
+ if (new_bw_state &&
+ intel_can_enable_sagv(dev_priv, old_bw_state) !=
+ intel_can_enable_sagv(dev_priv, new_bw_state))
+ changed = true;
+
+ /*
+ * If none of our inputs (data rates, number of active
+ * planes, SAGV yes/no) changed then nothing to do here.
+ */
+ if (!changed)
return 0;
ret = intel_atomic_lock_global_state(&new_bw_state->base);
@@ -961,7 +976,6 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
*/
new_bw_state->qgv_points_mask = ~allowed_points & mask;
- old_bw_state = intel_atomic_get_old_bw_state(state);
/*
* If the actual mask had changed we need to make sure that
* the commits are serialized(in case this is a nomodeset, nonblocking)
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index 46c6eecbd917..0ceaed1c9656 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -30,19 +30,19 @@ struct intel_bw_state {
*/
u8 pipe_sagv_reject;
+ /* bitmask of active pipes */
+ u8 active_pipes;
+
/*
* Current QGV points mask, which restricts
* some particular SAGV states, not to confuse
* with pipe_sagv_mask.
*/
- u8 qgv_points_mask;
+ u16 qgv_points_mask;
unsigned int data_rate[I915_MAX_PIPES];
u8 num_active_planes[I915_MAX_PIPES];
- /* bitmask of active pipes */
- u8 active_pipes;
-
int min_cdclk;
};
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index bf7ce684dd8e..bb4a85445fc6 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -10673,6 +10673,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
vlv_wm_sanitize(dev_priv);
} else if (DISPLAY_VER(dev_priv) >= 9) {
skl_wm_get_hw_state(dev_priv);
+ skl_wm_sanitize(dev_priv);
} else if (HAS_PCH_SPLIT(dev_priv)) {
ilk_wm_get_hw_state(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
index c1439fcb5a95..3ff149df4a77 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.c
+++ b/drivers/gpu/drm/i915/display/intel_drrs.c
@@ -405,6 +405,7 @@ intel_drrs_init(struct intel_connector *connector,
struct drm_display_mode *fixed_mode)
{
struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ struct intel_encoder *encoder = connector->encoder;
struct drm_display_mode *downclock_mode = NULL;
INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_drrs_downclock_work);
@@ -416,6 +417,13 @@ intel_drrs_init(struct intel_connector *connector,
return NULL;
}
+ if ((DISPLAY_VER(dev_priv) < 8 && !HAS_GMCH(dev_priv)) &&
+ encoder->port != PORT_A) {
+ drm_dbg_kms(&dev_priv->drm,
+ "DRRS only supported on eDP port A\n");
+ return NULL;
+ }
+
if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) {
drm_dbg_kms(&dev_priv->drm, "VBT doesn't support DRRS\n");
return NULL;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 160fd2bdafe5..957feeccff3f 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1115,7 +1115,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
/* Wa_22010751166: icl, ehl, tgl, dg1, rkl */
if (DISPLAY_VER(i915) >= 11 &&
- (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) {
+ (plane_state->view.color_plane[0].y +
+ (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) {
plane_state->no_fbc_reason = "plane end Y offset misaligned";
return false;
}
diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index 0065111593a6..4a2662838cd8 100644
--- a/drivers/gpu/drm/i915/display/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -360,6 +360,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
port++;
}
+ /*
+ * The port numbering and mapping here is bizarre. The now-obsolete
+ * swsci spec supports ports numbered [0..4]. Port E is handled as a
+ * special case, but port F and beyond are not. The functionality is
+ * supposed to be obsolete for new platforms. Just bail out if the port
+ * number is out of bounds after mapping.
+ */
+ if (port > 4) {
+ drm_dbg_kms(&dev_priv->drm,
+ "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n",
+ intel_encoder->base.base.id, intel_encoder->base.name,
+ port_name(intel_encoder->port), port);
+ return -EINVAL;
+ }
+
if (!enable)
parm |= 4 << 8;
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 1a376e9a1ff3..d610e48cab94 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay,
const struct intel_crtc_state *pipe_config =
overlay->crtc->config;
+ if (rec->dst_height == 0 || rec->dst_width == 0)
+ return -EINVAL;
+
if (rec->dst_x < pipe_config->pipe_src_w &&
rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w &&
rec->dst_y < pipe_config->pipe_src_h &&
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index a1a663f362e7..00279e8c2775 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1406,6 +1406,13 @@ static inline u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private
PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME;
}
+static inline u32 man_trk_ctl_partial_frame_bit_get(struct drm_i915_private *dev_priv)
+{
+ return IS_ALDERLAKE_P(dev_priv) ?
+ ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE :
+ PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE;
+}
+
static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
{
struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
@@ -1510,7 +1517,13 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
- u32 val = PSR2_MAN_TRK_CTL_ENABLE;
+ u32 val = 0;
+
+ if (!IS_ALDERLAKE_P(dev_priv))
+ val = PSR2_MAN_TRK_CTL_ENABLE;
+
+ /* SF partial frame enable has to be set even on full update */
+ val |= man_trk_ctl_partial_frame_bit_get(dev_priv);
if (full_update) {
/*
@@ -1530,7 +1543,6 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
} else {
drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4);
- val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE;
val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1);
val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1);
}
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index 09f405e4d363..92ff654f54f5 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -34,7 +34,7 @@ void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv)
if (intel_de_wait_for_clear(dev_priv, ICL_PHY_MISC(phy),
DG2_PHY_DP_TX_ACK_MASK, 25))
DRM_ERROR("SNPS PHY %c failed to calibrate after 25ms.\n",
- phy);
+ phy_name(phy));
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 40faa18947c9..7784c30fe893 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port)
{
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+ enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
struct intel_uncore *uncore = &i915->uncore;
u32 val;
- val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx));
+ val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port));
if (val == 0xffffffff) {
drm_dbg_kms(&i915->drm,
"Port %s: PHY in TCCOLD, assuming not complete\n",
@@ -690,6 +691,8 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
{
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
struct intel_encoder *encoder = &dig_port->base;
+ intel_wakeref_t tc_cold_wref;
+ enum intel_display_power_domain domain;
int active_links = 0;
mutex_lock(&dig_port->tc_lock);
@@ -701,12 +704,11 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
drm_WARN_ON(&i915->drm, dig_port->tc_mode != TC_PORT_DISCONNECTED);
drm_WARN_ON(&i915->drm, dig_port->tc_lock_wakeref);
- if (active_links) {
- enum intel_display_power_domain domain;
- intel_wakeref_t tc_cold_wref = tc_cold_block(dig_port, &domain);
- dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port);
+ tc_cold_wref = tc_cold_block(dig_port, &domain);
+ dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port);
+ if (active_links) {
if (!icl_tc_phy_is_connected(dig_port))
drm_dbg_kms(&i915->drm,
"Port %s: PHY disconnected with %d active link(s)\n",
@@ -715,10 +717,23 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
dig_port->tc_lock_wakeref = tc_cold_block(dig_port,
&dig_port->tc_lock_power_domain);
-
- tc_cold_unblock(dig_port, domain, tc_cold_wref);
+ } else {
+ /*
+ * TBT-alt is the default mode in any case the PHY ownership is not
+ * held (regardless of the sink's connected live state), so
+ * we'll just switch to disconnected mode from it here without
+ * a note.
+ */
+ if (dig_port->tc_mode != TC_PORT_TBT_ALT)
+ drm_dbg_kms(&i915->drm,
+ "Port %s: PHY left in %s mode on disabled port, disconnecting it\n",
+ dig_port->tc_port_name,
+ tc_port_mode_name(dig_port->tc_mode));
+ icl_tc_phy_disconnect(dig_port);
}
+ tc_cold_unblock(dig_port, domain, tc_cold_wref);
+
drm_dbg_kms(&i915->drm, "Port %s: sanitize mode (%s)\n",
dig_port->tc_port_name,
tc_port_mode_name(dig_port->tc_mode));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3a5b247be738..1736efa43339 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
timeout) < 0) {
i915_request_put(rq);
- tl = intel_context_timeline_lock(ce);
+ /*
+ * Error path, cannot use intel_context_timeline_lock as
+ * that is user interruptable and this clean up step
+ * must be done.
+ */
+ mutex_lock(&ce->timeline->mutex);
intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
+ mutex_unlock(&ce->timeline->mutex);
if (nonblock)
return -EWOULDBLOCK;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 4b4829eb16c2..0dd107dcecc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -311,6 +311,7 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(6)
#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(8)
+#define I915_BO_WAS_BOUND_BIT 9
/**
* @mem_flags - Mutable placement-related flags
*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 9f429ed6e78a..a50f884973bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
+#include "gt/intel_gt.h"
+
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+ intel_gt_invalidate_tlbs(to_gt(i915));
+ }
+
return pages;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index de3fe79b665a..1f880c8c66e7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -842,11 +842,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
} else if (obj->mm.madv != I915_MADV_WILLNEED) {
bo->priority = I915_TTM_PRIO_PURGE;
} else if (!i915_gem_object_has_pages(obj)) {
- if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
- bo->priority = I915_TTM_PRIO_HAS_PAGES;
+ bo->priority = I915_TTM_PRIO_NO_PAGES;
} else {
- if (bo->priority > I915_TTM_PRIO_NO_PAGES)
- bo->priority = I915_TTM_PRIO_NO_PAGES;
+ bo->priority = I915_TTM_PRIO_HAS_PAGES;
}
ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index ee9612a3ee5e..e130c820ae4e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -427,11 +427,17 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
if (!IS_ERR(fence))
goto out;
- } else if (move_deps) {
- int err = i915_deps_sync(move_deps, ctx);
+ } else {
+ int err = PTR_ERR(fence);
+
+ if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN)
+ return fence;
- if (err)
- return ERR_PTR(err);
+ if (move_deps) {
+ err = i915_deps_sync(move_deps, ctx);
+ if (err)
+ return ERR_PTR(err);
+ }
}
/* Error intercept failed or no accelerated migration to start with */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f98f0fb21efb..35d0fcd3a86c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
spin_lock_init(&gt->irq_lock);
+ mutex_init(&gt->tlb_invalidate_lock);
+
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
+
+struct reg_and_bit {
+ i915_reg_t reg;
+ u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+ const i915_reg_t *regs, const unsigned int num)
+{
+ const unsigned int class = engine->class;
+ struct reg_and_bit rb = { };
+
+ if (drm_WARN_ON_ONCE(&engine->i915->drm,
+ class >= num || !regs[class].reg))
+ return rb;
+
+ rb.reg = regs[class];
+ if (gen8 && class == VIDEO_DECODE_CLASS)
+ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+ else
+ rb.bit = engine->instance;
+
+ rb.bit = BIT(rb.bit);
+
+ return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+ static const i915_reg_t gen8_regs[] = {
+ [RENDER_CLASS] = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
+ [COPY_ENGINE_CLASS] = GEN8_BTCR,
+ };
+ static const i915_reg_t gen12_regs[] = {
+ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+ };
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const i915_reg_t *regs;
+ unsigned int num = 0;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (GRAPHICS_VER(i915) == 12) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) {
+ return;
+ }
+
+ if (drm_WARN_ONCE(&i915->drm, !num,
+ "Platform does not implement TLB invalidation!"))
+ return;
+
+ GEM_TRACE("\n");
+
+ assert_rpm_wakelock_held(&i915->runtime_pm);
+
+ mutex_lock(&gt->tlb_invalidate_lock);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ /*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+ const unsigned int timeout_us = 100;
+ const unsigned int timeout_ms = 4;
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ if (__intel_wait_for_register_fw(uncore,
+ rb.reg, rb.bit, 0,
+ timeout_us, timeout_ms,
+ NULL))
+ drm_err_ratelimited(&gt->i915->drm,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, timeout_ms);
+ }
+
+ /*
+ * Use delayed put since a) we mostly expect a flurry of TLB
+ * invalidations so it is good to avoid paying the forcewake cost and
+ * b) it works around a bug in Icelake which cannot cope with too rapid
+ * transitions.
+ */
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ mutex_unlock(&gt->tlb_invalidate_lock);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 3ace129eb2af..a913fb6ffec3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 14216cc471b1..f20687796490 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -73,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct mutex tlb_invalidate_lock;
+
struct i915_wa_list wa_list;
struct intel_gt_timelines {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..3aabe164c329 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -206,6 +206,11 @@ struct intel_guc {
* context usage for overflows.
*/
struct delayed_work work;
+
+ /**
+ * @shift: Right shift value for the gpm timestamp
+ */
+ u32 shift;
} timestamp;
#ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 13b27b8ff74e..ba21ace973da 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -110,7 +110,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
{
u32 request[] = {
GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
+ SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
id,
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e7517206af82..154ad726e266 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
if (new_start == lower_32_bits(*prev_start))
return;
+ /*
+ * When gt is unparked, we update the gt timestamp and start the ping
+ * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+ * is unparked, all switched in contexts will have a start time that is
+ * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+ *
+ * If neither gt_stamp nor new_start has rolled over, then the
+ * gt_stamp_hi does not need to be adjusted, however if one of them has
+ * rolled over, we need to adjust gt_stamp_hi accordingly.
+ *
+ * The below conditions address the cases of new_start rollover and
+ * gt_stamp_last rollover respectively.
+ */
if (new_start < gt_stamp_last &&
(new_start - gt_stamp_last) <= POLL_TIME_CLKS)
gt_stamp_hi++;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
}
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+ u32 *last_in, u32 *id, u32 *total)
{
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ int i = 0;
+
+ do {
+ *last_in = READ_ONCE(rec->last_switch_in_stamp);
+ *id = READ_ONCE(rec->current_context_index);
+ *total = READ_ONCE(rec->total_runtime);
+
+ if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+ READ_ONCE(rec->current_context_index) == *id &&
+ READ_ONCE(rec->total_runtime) == *total)
+ break;
+ } while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 last_switch = rec->last_switch_in_stamp;
- u32 ctx_id = rec->current_context_index;
- u32 total = rec->total_runtime;
+ u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
+ __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
stats->running = ctx_id != ~0U && last_switch;
if (stats->running)
__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
}
}
-static void guc_update_pm_timestamp(struct intel_guc *guc,
- struct intel_engine_cs *engine,
- ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+ shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+ return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
+{
+ u32 lo, hi, old_hi, loop = 0;
+
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ do {
+ lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+ old_hi = hi;
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ } while (old_hi != hi && loop++ < 2);
+
+ return ((u64)hi << 32) | lo;
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
{
- u32 gt_stamp_now, gt_stamp_hi;
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 gt_stamp_lo, gt_stamp_hi;
+ u64 gpm_ts;
lockdep_assert_held(&guc->timestamp.lock);
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
- gt_stamp_now = intel_uncore_read(engine->uncore,
- RING_TIMESTAMP(engine->mmio_base));
+ gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+ gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
- if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+ if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
gt_stamp_hi++;
- guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+ guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
}
/*
@@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
+ /*
+ * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+ * start_gt_clk' calculation below for active engines.
+ */
guc_update_engine_gt_clks(engine);
- guc_update_pm_timestamp(guc, engine, now);
+ guc_update_pm_timestamp(guc, now);
intel_gt_pm_put_async(gt);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
@@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
guc_update_engine_gt_clks(engine);
engine->stats.guc.prev_total = 0;
}
@@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
ktime_t unused;
spin_lock_irqsave(&guc->timestamp.lock, flags);
- for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
+
+ guc_update_pm_timestamp(guc, &unused);
+ for_each_engine(engine, gt, id)
guc_update_engine_gt_clks(engine);
- }
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
@@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
struct intel_guc *guc = &gt->uc.guc;
+ unsigned long flags;
+ ktime_t unused;
if (!guc_submission_initialized(guc))
return;
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
guc->timestamp.ping_delay);
}
@@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+ guc->timestamp.shift = gpm_timestamp_shift(gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 99d1781fa5f0..af79b39048f7 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1148,7 +1148,7 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
ops->set_pfn(se, s->shadow_page.mfn);
}
-/**
+/*
* Check if can do 2M page
* @vgpu: target vgpu
* @entry: target pfn's gtt entry
@@ -2193,7 +2193,7 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
}
/**
- * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
+ * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
* @vgpu: a vGPU
* @off: register offset
* @p_data: data will be returned to guest
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5ae812d60abe..0633888a411e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine,
struct i915_request *rq = NULL;
unsigned long flags;
- ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+ ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
if (!ee)
return NULL;
diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
index 76f1d53bdf34..3ad22bbe80eb 100644
--- a/drivers/gpu/drm/i915/i915_mm.h
+++ b/drivers/gpu/drm/i915/i915_mm.h
@@ -6,6 +6,7 @@
#ifndef __I915_MM_H__
#define __I915_MM_H__
+#include <linux/bug.h>
#include <linux/types.h>
struct vm_area_struct;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 971d601fe751..902e4c802a12 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
-#define GUCPMTIMESTAMP _MMIO(0xC3E8)
+#define MISC_STATUS0 _MMIO(0xA500)
+#define MISC_STATUS1 _MMIO(0xA504)
/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8)
@@ -2721,6 +2722,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
+#define GEN8_RTCR _MMIO(0x4260)
+#define GEN8_M1TCR _MMIO(0x4264)
+#define GEN8_M2TCR _MMIO(0x4268)
+#define GEN8_BTCR _MMIO(0x426c)
+#define GEN8_VTCR _MMIO(0x4270)
+
#if 0
#define PRB0_TAIL _MMIO(0x2030)
#define PRB0_HEAD _MMIO(0x2034)
@@ -2819,6 +2826,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
+#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
#define FPGA_DBG _MMIO(0x42300)
@@ -4817,6 +4829,7 @@ enum {
#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val)
#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(12, 0)
#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val)
+#define ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(31)
#define ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(14)
#define ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(13)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 29a858c53bdd..c0d6d5526abe 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -457,6 +457,9 @@ int i915_vma_bind(struct i915_vma *vma,
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
+ if (vma->obj)
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
atomic_or(bind_flags, &vma->flags);
return 0;
}
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index da8f82c2342f..fc8a68f3a2ed 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -108,6 +108,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
/* Comet Lake V PCH is based on KBP, which is SPT compatible */
return PCH_SPT;
case INTEL_PCH_ICP_DEVICE_ID_TYPE:
+ case INTEL_PCH_ICP2_DEVICE_ID_TYPE:
drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n");
drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv));
return PCH_ICP;
@@ -123,7 +124,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
!IS_GEN9_BC(dev_priv));
return PCH_TGP;
case INTEL_PCH_JSP_DEVICE_ID_TYPE:
- case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n");
drm_WARN_ON(&dev_priv->drm, !IS_JSL_EHL(dev_priv));
return PCH_JSP;
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index 6bff77521094..4ba0f1967cca 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -50,11 +50,11 @@ enum intel_pch {
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
+#define INTEL_PCH_ICP2_DEVICE_ID_TYPE 0x3880
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080
#define INTEL_PCH_TGP2_DEVICE_ID_TYPE 0x4380
#define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80
-#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880
#define INTEL_PCH_ADP_DEVICE_ID_TYPE 0x7A80
#define INTEL_PCH_ADP2_DEVICE_ID_TYPE 0x5180
#define INTEL_PCH_ADP3_DEVICE_ID_TYPE 0x7A00
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 434b1f8b7fe3..fae4f7818d28 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4029,6 +4029,17 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
return ret;
}
+ if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
+ intel_can_enable_sagv(dev_priv, old_bw_state)) {
+ ret = intel_atomic_serialize_global_state(&new_bw_state->base);
+ if (ret)
+ return ret;
+ } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
+ ret = intel_atomic_lock_global_state(&new_bw_state->base);
+ if (ret)
+ return ret;
+ }
+
for_each_new_intel_crtc_in_state(state, crtc,
new_crtc_state, i) {
struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal;
@@ -4044,17 +4055,6 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
intel_can_enable_sagv(dev_priv, new_bw_state);
}
- if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
- intel_can_enable_sagv(dev_priv, old_bw_state)) {
- ret = intel_atomic_serialize_global_state(&new_bw_state->base);
- if (ret)
- return ret;
- } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
- ret = intel_atomic_lock_global_state(&new_bw_state->base);
- if (ret)
- return ret;
- }
-
return 0;
}
@@ -4717,6 +4717,10 @@ static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = {
};
static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = {
+ /*
+ * Keep the join_mbus cases first so check_mbus_joined()
+ * will prefer them over the !join_mbus cases.
+ */
{
.active_pipes = BIT(PIPE_A),
.dbuf_mask = {
@@ -4732,6 +4736,20 @@ static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = {
.join_mbus = true,
},
{
+ .active_pipes = BIT(PIPE_A),
+ .dbuf_mask = {
+ [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2),
+ },
+ .join_mbus = false,
+ },
+ {
+ .active_pipes = BIT(PIPE_B),
+ .dbuf_mask = {
+ [PIPE_B] = BIT(DBUF_S3) | BIT(DBUF_S4),
+ },
+ .join_mbus = false,
+ },
+ {
.active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
.dbuf_mask = {
[PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2),
@@ -4835,7 +4853,7 @@ static bool check_mbus_joined(u8 active_pipes,
{
int i;
- for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
+ for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
if (dbuf_slices[i].active_pipes == active_pipes)
return dbuf_slices[i].join_mbus;
}
@@ -4847,13 +4865,14 @@ static bool adlp_check_mbus_joined(u8 active_pipes)
return check_mbus_joined(active_pipes, adlp_allowed_dbufs);
}
-static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes,
+static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus,
const struct dbuf_slice_conf_entry *dbuf_slices)
{
int i;
- for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
- if (dbuf_slices[i].active_pipes == active_pipes)
+ for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
+ if (dbuf_slices[i].active_pipes == active_pipes &&
+ dbuf_slices[i].join_mbus == join_mbus)
return dbuf_slices[i].dbuf_mask[pipe];
}
return 0;
@@ -4864,7 +4883,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes,
* returns correspondent DBuf slice mask as stated in BSpec for particular
* platform.
*/
-static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
+static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
{
/*
* FIXME: For ICL this is still a bit unclear as prev BSpec revision
@@ -4878,37 +4897,41 @@ static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
* still here - we will need it once those additional constraints
* pop up.
*/
- return compute_dbuf_slices(pipe, active_pipes, icl_allowed_dbufs);
+ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
+ icl_allowed_dbufs);
}
-static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
+static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
{
- return compute_dbuf_slices(pipe, active_pipes, tgl_allowed_dbufs);
+ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
+ tgl_allowed_dbufs);
}
-static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes)
+static u8 adlp_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
{
- return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs);
+ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
+ adlp_allowed_dbufs);
}
-static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes)
+static u8 dg2_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
{
- return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs);
+ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
+ dg2_allowed_dbufs);
}
-static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes)
+static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes, bool join_mbus)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
enum pipe pipe = crtc->pipe;
if (IS_DG2(dev_priv))
- return dg2_compute_dbuf_slices(pipe, active_pipes);
+ return dg2_compute_dbuf_slices(pipe, active_pipes, join_mbus);
else if (IS_ALDERLAKE_P(dev_priv))
- return adlp_compute_dbuf_slices(pipe, active_pipes);
+ return adlp_compute_dbuf_slices(pipe, active_pipes, join_mbus);
else if (DISPLAY_VER(dev_priv) == 12)
- return tgl_compute_dbuf_slices(pipe, active_pipes);
+ return tgl_compute_dbuf_slices(pipe, active_pipes, join_mbus);
else if (DISPLAY_VER(dev_priv) == 11)
- return icl_compute_dbuf_slices(pipe, active_pipes);
+ return icl_compute_dbuf_slices(pipe, active_pipes, join_mbus);
/*
* For anything else just return one slice yet.
* Should be extended for other platforms.
@@ -6127,11 +6150,16 @@ skl_compute_ddb(struct intel_atomic_state *state)
return ret;
}
+ if (IS_ALDERLAKE_P(dev_priv))
+ new_dbuf_state->joined_mbus =
+ adlp_check_mbus_joined(new_dbuf_state->active_pipes);
+
for_each_intel_crtc(&dev_priv->drm, crtc) {
enum pipe pipe = crtc->pipe;
new_dbuf_state->slices[pipe] =
- skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes);
+ skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes,
+ new_dbuf_state->joined_mbus);
if (old_dbuf_state->slices[pipe] == new_dbuf_state->slices[pipe])
continue;
@@ -6143,9 +6171,6 @@ skl_compute_ddb(struct intel_atomic_state *state)
new_dbuf_state->enabled_slices = intel_dbuf_enabled_slices(new_dbuf_state);
- if (IS_ALDERLAKE_P(dev_priv))
- new_dbuf_state->joined_mbus = adlp_check_mbus_joined(new_dbuf_state->active_pipes);
-
if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices ||
old_dbuf_state->joined_mbus != new_dbuf_state->joined_mbus) {
ret = intel_atomic_serialize_global_state(&new_dbuf_state->base);
@@ -6626,6 +6651,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
enum pipe pipe = crtc->pipe;
unsigned int mbus_offset;
enum plane_id plane_id;
+ u8 slices;
skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal;
@@ -6645,19 +6671,22 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_uv);
}
- dbuf_state->slices[pipe] =
- skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes);
-
dbuf_state->weight[pipe] = intel_crtc_ddb_weight(crtc_state);
/*
* Used for checking overlaps, so we need absolute
* offsets instead of MBUS relative offsets.
*/
- mbus_offset = mbus_ddb_offset(dev_priv, dbuf_state->slices[pipe]);
+ slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes,
+ dbuf_state->joined_mbus);
+ mbus_offset = mbus_ddb_offset(dev_priv, slices);
crtc_state->wm.skl.ddb.start = mbus_offset + dbuf_state->ddb[pipe].start;
crtc_state->wm.skl.ddb.end = mbus_offset + dbuf_state->ddb[pipe].end;
+ /* The slices actually used by the planes on the pipe */
+ dbuf_state->slices[pipe] =
+ skl_ddb_dbuf_slice_mask(dev_priv, &crtc_state->wm.skl.ddb);
+
drm_dbg_kms(&dev_priv->drm,
"[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x, mbus joined: %s\n",
crtc->base.base.id, crtc->base.name,
@@ -6669,6 +6698,74 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
dbuf_state->enabled_slices = dev_priv->dbuf.enabled_slices;
}
+static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915)
+{
+ const struct intel_dbuf_state *dbuf_state =
+ to_intel_dbuf_state(i915->dbuf.obj.state);
+ struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
+ struct intel_crtc *crtc;
+
+ for_each_intel_crtc(&i915->drm, crtc) {
+ const struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
+
+ entries[crtc->pipe] = crtc_state->wm.skl.ddb;
+ }
+
+ for_each_intel_crtc(&i915->drm, crtc) {
+ const struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
+ u8 slices;
+
+ slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes,
+ dbuf_state->joined_mbus);
+ if (dbuf_state->slices[crtc->pipe] & ~slices)
+ return true;
+
+ if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries,
+ I915_MAX_PIPES, crtc->pipe))
+ return true;
+ }
+
+ return false;
+}
+
+void skl_wm_sanitize(struct drm_i915_private *i915)
+{
+ struct intel_crtc *crtc;
+
+ /*
+ * On TGL/RKL (at least) the BIOS likes to assign the planes
+ * to the wrong DBUF slices. This will cause an infinite loop
+ * in skl_commit_modeset_enables() as it can't find a way to
+ * transition between the old bogus DBUF layout to the new
+ * proper DBUF layout without DBUF allocation overlaps between
+ * the planes (which cannot be allowed or else the hardware
+ * may hang). If we detect a bogus DBUF layout just turn off
+ * all the planes so that skl_commit_modeset_enables() can
+ * simply ignore them.
+ */
+ if (!skl_dbuf_is_misconfigured(i915))
+ return;
+
+ drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n");
+
+ for_each_intel_crtc(&i915->drm, crtc) {
+ struct intel_plane *plane = to_intel_plane(crtc->base.primary);
+ const struct intel_plane_state *plane_state =
+ to_intel_plane_state(plane->base.state);
+ struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
+
+ if (plane_state->uapi.visible)
+ intel_plane_disable_noatomic(crtc, plane);
+
+ drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0);
+
+ memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb));
+ }
+}
+
static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
{
struct drm_device *dev = crtc->base.dev;
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index 990cdcaf85ce..d2243653a893 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -47,6 +47,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
struct skl_pipe_wm *out);
void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
+void skl_wm_sanitize(struct drm_i915_private *dev_priv);
bool intel_can_enable_sagv(struct drm_i915_private *dev_priv,
const struct intel_bw_state *bw_state);
void intel_sagv_pre_plane_update(struct intel_atomic_state *state);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 53f1ccb78849..64c2708efc9e 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,9 +68,7 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
spin_lock_init(&rpm->debug.lock);
-
- if (rpm->available)
- stack_depot_init();
+ stack_depot_init();
}
static noinline depot_stack_handle_t
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index fc25ebf1a593..778da3179b3c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
}
static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
- enum forcewake_domains fw_domains)
+ enum forcewake_domains fw_domains,
+ bool delayed)
{
struct intel_uncore_forcewake_domain *domain;
unsigned int tmp;
@@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
continue;
}
- fw_domains_put(uncore, domain->mask);
+ if (delayed &&
+ !(domain->uncore->fw_domains_timer & domain->mask))
+ fw_domain_arm_timer(domain);
+ else
+ fw_domains_put(uncore, domain->mask);
}
}
@@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
return;
spin_lock_irqsave(&uncore->lock, irqflags);
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
+ spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains fw_domains)
+{
+ unsigned long irqflags;
+
+ if (!uncore->fw_get_funcs)
+ return;
+
+ spin_lock_irqsave(&uncore->lock, irqflags);
+ __intel_uncore_forcewake_put(uncore, fw_domains, true);
spin_unlock_irqrestore(&uncore->lock, irqflags);
}
@@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
if (!uncore->fw_get_funcs)
return;
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
}
void assert_forcewakes_inactive(struct intel_uncore *uncore)
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 210fe2a71612..2a15b2b2e2fc 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -246,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
enum forcewake_domains domains);
void intel_uncore_forcewake_put(struct intel_uncore *uncore,
enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
enum forcewake_domains fw_domains);
diff --git a/drivers/gpu/drm/imx/dcss/Kconfig b/drivers/gpu/drm/imx/dcss/Kconfig
index 7374f1952762..5c2b2277afbf 100644
--- a/drivers/gpu/drm/imx/dcss/Kconfig
+++ b/drivers/gpu/drm/imx/dcss/Kconfig
@@ -2,6 +2,7 @@ config DRM_IMX_DCSS
tristate "i.MX8MQ DCSS"
select IMX_IRQSTEER
select DRM_KMS_HELPER
+ select DRM_GEM_CMA_HELPER
select VIDEOMODE_HELPERS
depends on DRM && ARCH_MXC && ARM64
help
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index a8aba0141ce7..06cb1a59b9bc 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -217,14 +217,6 @@ static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge,
if (!imx_pd_format_supported(bus_fmt))
return -EINVAL;
- if (bus_flags &
- ~(DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_DE_HIGH |
- DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE |
- DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)) {
- dev_warn(imxpd->dev, "invalid bus_flags (%x)\n", bus_flags);
- return -EINVAL;
- }
-
bridge_state->output_bus_cfg.flags = bus_flags;
bridge_state->input_bus_cfg.flags = bus_flags;
imx_crtc_state->bus_flags = bus_flags;
diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c
index 00404ba4126d..2735b8eb3537 100644
--- a/drivers/gpu/drm/kmb/kmb_plane.c
+++ b/drivers/gpu/drm/kmb/kmb_plane.c
@@ -158,12 +158,6 @@ static void kmb_plane_atomic_disable(struct drm_plane *plane,
case LAYER_1:
kmb->plane_status[plane_id].ctrl = LCD_CTRL_VL2_ENABLE;
break;
- case LAYER_2:
- kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL1_ENABLE;
- break;
- case LAYER_3:
- kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL2_ENABLE;
- break;
}
kmb->plane_status[plane_id].disable = true;
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 5d90d2eb0019..bced4c7d668e 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -786,18 +786,101 @@ void mtk_dsi_ddp_stop(struct device *dev)
mtk_dsi_poweroff(dsi);
}
+static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi)
+{
+ int ret;
+
+ ret = drm_simple_encoder_init(drm, &dsi->encoder,
+ DRM_MODE_ENCODER_DSI);
+ if (ret) {
+ DRM_ERROR("Failed to encoder init to drm\n");
+ return ret;
+ }
+
+ dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev);
+
+ ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL,
+ DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+ if (ret)
+ goto err_cleanup_encoder;
+
+ dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder);
+ if (IS_ERR(dsi->connector)) {
+ DRM_ERROR("Unable to create bridge connector\n");
+ ret = PTR_ERR(dsi->connector);
+ goto err_cleanup_encoder;
+ }
+ drm_connector_attach_encoder(dsi->connector, &dsi->encoder);
+
+ return 0;
+
+err_cleanup_encoder:
+ drm_encoder_cleanup(&dsi->encoder);
+ return ret;
+}
+
+static int mtk_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+ int ret;
+ struct drm_device *drm = data;
+ struct mtk_dsi *dsi = dev_get_drvdata(dev);
+
+ ret = mtk_dsi_encoder_init(drm, dsi);
+ if (ret)
+ return ret;
+
+ return device_reset_optional(dev);
+}
+
+static void mtk_dsi_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct mtk_dsi *dsi = dev_get_drvdata(dev);
+
+ drm_encoder_cleanup(&dsi->encoder);
+}
+
+static const struct component_ops mtk_dsi_component_ops = {
+ .bind = mtk_dsi_bind,
+ .unbind = mtk_dsi_unbind,
+};
+
static int mtk_dsi_host_attach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
struct mtk_dsi *dsi = host_to_dsi(host);
+ struct device *dev = host->dev;
+ int ret;
dsi->lanes = device->lanes;
dsi->format = device->format;
dsi->mode_flags = device->mode_flags;
+ dsi->next_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 0, 0);
+ if (IS_ERR(dsi->next_bridge))
+ return PTR_ERR(dsi->next_bridge);
+
+ drm_bridge_add(&dsi->bridge);
+
+ ret = component_add(host->dev, &mtk_dsi_component_ops);
+ if (ret) {
+ DRM_ERROR("failed to add dsi_host component: %d\n", ret);
+ drm_bridge_remove(&dsi->bridge);
+ return ret;
+ }
return 0;
}
+static int mtk_dsi_host_detach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *device)
+{
+ struct mtk_dsi *dsi = host_to_dsi(host);
+
+ component_del(host->dev, &mtk_dsi_component_ops);
+ drm_bridge_remove(&dsi->bridge);
+ return 0;
+}
+
static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi)
{
int ret;
@@ -938,73 +1021,14 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
static const struct mipi_dsi_host_ops mtk_dsi_ops = {
.attach = mtk_dsi_host_attach,
+ .detach = mtk_dsi_host_detach,
.transfer = mtk_dsi_host_transfer,
};
-static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi)
-{
- int ret;
-
- ret = drm_simple_encoder_init(drm, &dsi->encoder,
- DRM_MODE_ENCODER_DSI);
- if (ret) {
- DRM_ERROR("Failed to encoder init to drm\n");
- return ret;
- }
-
- dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev);
-
- ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL,
- DRM_BRIDGE_ATTACH_NO_CONNECTOR);
- if (ret)
- goto err_cleanup_encoder;
-
- dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder);
- if (IS_ERR(dsi->connector)) {
- DRM_ERROR("Unable to create bridge connector\n");
- ret = PTR_ERR(dsi->connector);
- goto err_cleanup_encoder;
- }
- drm_connector_attach_encoder(dsi->connector, &dsi->encoder);
-
- return 0;
-
-err_cleanup_encoder:
- drm_encoder_cleanup(&dsi->encoder);
- return ret;
-}
-
-static int mtk_dsi_bind(struct device *dev, struct device *master, void *data)
-{
- int ret;
- struct drm_device *drm = data;
- struct mtk_dsi *dsi = dev_get_drvdata(dev);
-
- ret = mtk_dsi_encoder_init(drm, dsi);
- if (ret)
- return ret;
-
- return device_reset_optional(dev);
-}
-
-static void mtk_dsi_unbind(struct device *dev, struct device *master,
- void *data)
-{
- struct mtk_dsi *dsi = dev_get_drvdata(dev);
-
- drm_encoder_cleanup(&dsi->encoder);
-}
-
-static const struct component_ops mtk_dsi_component_ops = {
- .bind = mtk_dsi_bind,
- .unbind = mtk_dsi_unbind,
-};
-
static int mtk_dsi_probe(struct platform_device *pdev)
{
struct mtk_dsi *dsi;
struct device *dev = &pdev->dev;
- struct drm_panel *panel;
struct resource *regs;
int irq_num;
int ret;
@@ -1021,19 +1045,6 @@ static int mtk_dsi_probe(struct platform_device *pdev)
return ret;
}
- ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
- &panel, &dsi->next_bridge);
- if (ret)
- goto err_unregister_host;
-
- if (panel) {
- dsi->next_bridge = devm_drm_panel_bridge_add(dev, panel);
- if (IS_ERR(dsi->next_bridge)) {
- ret = PTR_ERR(dsi->next_bridge);
- goto err_unregister_host;
- }
- }
-
dsi->driver_data = of_device_get_match_data(dev);
dsi->engine_clk = devm_clk_get(dev, "engine");
@@ -1098,14 +1109,6 @@ static int mtk_dsi_probe(struct platform_device *pdev)
dsi->bridge.of_node = dev->of_node;
dsi->bridge.type = DRM_MODE_CONNECTOR_DSI;
- drm_bridge_add(&dsi->bridge);
-
- ret = component_add(&pdev->dev, &mtk_dsi_component_ops);
- if (ret) {
- dev_err(&pdev->dev, "failed to add component: %d\n", ret);
- goto err_unregister_host;
- }
-
return 0;
err_unregister_host:
@@ -1118,8 +1121,6 @@ static int mtk_dsi_remove(struct platform_device *pdev)
struct mtk_dsi *dsi = platform_get_drvdata(pdev);
mtk_output_dsi_disable(dsi);
- drm_bridge_remove(&dsi->bridge);
- component_del(&pdev->dev, &mtk_dsi_component_ops);
mipi_dsi_host_unregister(&dsi->host);
return 0;
diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c
index e9ae22b4f813..52be08b744ad 100644
--- a/drivers/gpu/drm/mgag200/mgag200_pll.c
+++ b/drivers/gpu/drm/mgag200/mgag200_pll.c
@@ -404,9 +404,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl
udelay(50);
/* program pixel pll register */
- WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn);
- WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm);
- WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp);
+ WREG_DAC(MGA1064_WB_PIX_PLLC_N, xpixpllcn);
+ WREG_DAC(MGA1064_WB_PIX_PLLC_M, xpixpllcm);
+ WREG_DAC(MGA1064_WB_PIX_PLLC_P, xpixpllcp);
udelay(50);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 51b83776951b..17cfad6424db 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1560,6 +1560,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
for (i = 0; i < gpu->nr_rings; i++)
a6xx_gpu->shadow[i] = 0;
+ gpu->suspend_count++;
+
return 0;
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 93005839b5da..fb261930ad1c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -608,9 +608,27 @@ static int adreno_resume(struct device *dev)
return gpu->funcs->pm_resume(gpu);
}
+static int active_submits(struct msm_gpu *gpu)
+{
+ int active_submits;
+ mutex_lock(&gpu->active_lock);
+ active_submits = gpu->active_submits;
+ mutex_unlock(&gpu->active_lock);
+ return active_submits;
+}
+
static int adreno_suspend(struct device *dev)
{
struct msm_gpu *gpu = dev_to_gpu(dev);
+ int remaining;
+
+ remaining = wait_event_timeout(gpu->retire_event,
+ active_submits(gpu) == 0,
+ msecs_to_jiffies(1000));
+ if (remaining == 0) {
+ dev_err(dev, "Timeout waiting for GPU to suspend\n");
+ return -EBUSY;
+ }
return gpu->funcs->pm_suspend(gpu);
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
index a98e964c3b6f..355894a3b48c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
@@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx,
struct dpu_hw_pcc_cfg *cfg)
{
- u32 base = ctx->cap->sblk->pcc.base;
+ u32 base;
- if (!ctx || !base) {
+ if (!ctx) {
+ DRM_ERROR("invalid ctx %pK\n", ctx);
+ return;
+ }
+
+ base = ctx->cap->sblk->pcc.base;
+
+ if (!base) {
DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base);
return;
}
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 052548883d27..0fe02529b5e7 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi)
of_node_put(phy_node);
- if (!phy_pdev || !msm_dsi->phy) {
+ if (!phy_pdev) {
+ DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
+ return -EPROBE_DEFER;
+ }
+ if (!msm_dsi->phy) {
+ put_device(&phy_pdev->dev);
DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
return -EPROBE_DEFER;
}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index c2ed177717c7..2027b38617ab 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -808,12 +808,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
struct msm_dsi_phy_clk_request *clk_req,
struct msm_dsi_phy_shared_timings *shared_timings)
{
- struct device *dev = &phy->pdev->dev;
+ struct device *dev;
int ret;
if (!phy || !phy->cfg->ops.enable)
return -EINVAL;
+ dev = &phy->pdev->dev;
+
ret = dsi_phy_enable_resource(phy);
if (ret) {
DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n",
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 3acdeae25caf..719720709e9e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi)
of_node_put(phy_node);
- if (!phy_pdev || !hdmi->phy) {
+ if (!phy_pdev) {
DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
return -EPROBE_DEFER;
}
+ if (!hdmi->phy) {
+ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
+ put_device(&phy_pdev->dev);
+ return -EPROBE_DEFER;
+ }
hdmi->phy_dev = get_device(&phy_pdev->dev);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ad35a5d94053..555666e3f960 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -461,7 +461,7 @@ static int msm_init_vram(struct drm_device *dev)
of_node_put(node);
if (ret)
return ret;
- size = r.end - r.start;
+ size = r.end - r.start + 1;
DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
/* if we have no IOMMU, then we need to use carveout allocator.
@@ -510,7 +510,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
struct msm_drm_private *priv = dev_get_drvdata(dev);
struct drm_device *ddev;
struct msm_kms *kms;
- struct msm_mdss *mdss;
int ret, i;
ddev = drm_dev_alloc(drv, dev);
@@ -521,8 +520,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
ddev->dev_private = priv;
priv->dev = ddev;
- mdss = priv->mdss;
-
priv->wq = alloc_ordered_workqueue("msm", 0);
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0f78c2615272..2c1049c0ea14 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -703,6 +703,8 @@ static void retire_submits(struct msm_gpu *gpu)
}
}
}
+
+ wake_up_all(&gpu->retire_event);
}
static void retire_worker(struct kthread_work *work)
@@ -848,6 +850,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
INIT_LIST_HEAD(&gpu->active_list);
mutex_init(&gpu->active_lock);
mutex_init(&gpu->lock);
+ init_waitqueue_head(&gpu->retire_event);
kthread_init_work(&gpu->retire_work, retire_worker);
kthread_init_work(&gpu->recover_work, recover_worker);
kthread_init_work(&gpu->fault_work, fault_worker);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 445c6bfd4b6b..92aa1e9196c6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -230,6 +230,9 @@ struct msm_gpu {
/* work for handling GPU recovery: */
struct kthread_work recover_work;
+ /** retire_event: notified when submits are retired: */
+ wait_queue_head_t retire_event;
+
/* work for handling active-list retiring: */
struct kthread_work retire_work;
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
index 62405e980925..9bf319be11f6 100644
--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
@@ -133,6 +133,18 @@ void msm_devfreq_init(struct msm_gpu *gpu)
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
}
+static void cancel_idle_work(struct msm_gpu_devfreq *df)
+{
+ hrtimer_cancel(&df->idle_work.timer);
+ kthread_cancel_work_sync(&df->idle_work.work);
+}
+
+static void cancel_boost_work(struct msm_gpu_devfreq *df)
+{
+ hrtimer_cancel(&df->boost_work.timer);
+ kthread_cancel_work_sync(&df->boost_work.work);
+}
+
void msm_devfreq_cleanup(struct msm_gpu *gpu)
{
struct msm_gpu_devfreq *df = &gpu->devfreq;
@@ -152,7 +164,12 @@ void msm_devfreq_resume(struct msm_gpu *gpu)
void msm_devfreq_suspend(struct msm_gpu *gpu)
{
- devfreq_suspend_device(gpu->devfreq.devfreq);
+ struct msm_gpu_devfreq *df = &gpu->devfreq;
+
+ devfreq_suspend_device(df->devfreq);
+
+ cancel_idle_work(df);
+ cancel_boost_work(df);
}
static void msm_devfreq_boost_work(struct kthread_work *work)
@@ -196,7 +213,7 @@ void msm_devfreq_active(struct msm_gpu *gpu)
/*
* Cancel any pending transition to idle frequency:
*/
- hrtimer_cancel(&df->idle_work.timer);
+ cancel_idle_work(df);
idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time));
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
index 0655582ae8ed..4cfb6c001679 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
@@ -361,7 +361,11 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc,
bridge_state =
drm_atomic_get_new_bridge_state(state,
mxsfb->bridge);
- bus_format = bridge_state->input_bus_cfg.format;
+ if (!bridge_state)
+ bus_format = MEDIA_BUS_FMT_FIXED;
+ else
+ bus_format = bridge_state->input_bus_cfg.format;
+
if (bus_format == MEDIA_BUS_FMT_FIXED) {
dev_warn_once(drm->dev,
"Bridge does not provide bus format, assuming MEDIA_BUS_FMT_RGB888_1X24.\n"
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 3828aafd3ac4..7ba66ad68a8a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -39,6 +39,8 @@
#include <linux/sched/mm.h>
#include <linux/hmm.h>
+#include <linux/memremap.h>
+#include <linux/migrate.h>
/*
* FIXME: this is ugly right now we are using TTM to allocate vram and we pin
@@ -324,7 +326,6 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
return NULL;
}
- get_page(page);
lock_page(page);
return page;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 266809e511e2..090b9b47708c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -35,6 +35,7 @@
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/hmm.h>
+#include <linux/memremap.h>
#include <linux/rmap.h>
struct nouveau_svm {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
index d0f52d59fc2f..64e423dddd9e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
@@ -38,7 +38,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
*addr += bios->imaged_addr;
}
- if (unlikely(*addr + size >= bios->size)) {
+ if (unlikely(*addr + size > bios->size)) {
nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
return false;
}
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 434c2861bb40..9989a316fe88 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -106,6 +106,8 @@ config DRM_PANEL_EDP
depends on PM
select VIDEOMODE_HELPERS
select DRM_DP_AUX_BUS
+ select DRM_DP_HELPER
+ select DRM_KMS_HELPER
help
DRM panel driver for dumb eDP panels that need at most a regulator and
a GPIO to be powered up. Optionally a backlight can be attached so
diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
index f043b484055b..ed626fdc08e8 100644
--- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
+++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
@@ -293,15 +293,13 @@ static int y030xx067a_probe(struct spi_device *spi)
return 0;
}
-static int y030xx067a_remove(struct spi_device *spi)
+static void y030xx067a_remove(struct spi_device *spi)
{
struct y030xx067a *priv = spi_get_drvdata(spi);
drm_panel_remove(&priv->panel);
drm_panel_disable(&priv->panel);
drm_panel_unprepare(&priv->panel);
-
- return 0;
}
static const struct drm_display_mode y030xx067a_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
index 8e84df9a0033..3dfafa585127 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -896,14 +896,12 @@ static int ili9322_probe(struct spi_device *spi)
return 0;
}
-static int ili9322_remove(struct spi_device *spi)
+static void ili9322_remove(struct spi_device *spi)
{
struct ili9322 *ili = spi_get_drvdata(spi);
ili9322_power_off(ili);
drm_panel_remove(&ili->panel);
-
- return 0;
}
/*
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
index 2c3378a259b1..a07ef26234e5 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
@@ -728,7 +728,7 @@ static int ili9341_probe(struct spi_device *spi)
return -1;
}
-static int ili9341_remove(struct spi_device *spi)
+static void ili9341_remove(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
struct ili9341 *ili = spi_get_drvdata(spi);
@@ -741,7 +741,6 @@ static int ili9341_remove(struct spi_device *spi)
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
}
- return 0;
}
static void ili9341_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
index c558de3f99be..e3b1daa0cb72 100644
--- a/drivers/gpu/drm/panel/panel-innolux-ej030na.c
+++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
@@ -219,15 +219,13 @@ static int ej030na_probe(struct spi_device *spi)
return 0;
}
-static int ej030na_remove(struct spi_device *spi)
+static void ej030na_remove(struct spi_device *spi)
{
struct ej030na *priv = spi_get_drvdata(spi);
drm_panel_remove(&priv->panel);
drm_panel_disable(&priv->panel);
drm_panel_unprepare(&priv->panel);
-
- return 0;
}
static const struct drm_display_mode ej030na_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
index f3183b68704f..9d0d4faa3f58 100644
--- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c
+++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
@@ -203,14 +203,12 @@ static int lb035q02_probe(struct spi_device *spi)
return 0;
}
-static int lb035q02_remove(struct spi_device *spi)
+static void lb035q02_remove(struct spi_device *spi)
{
struct lb035q02_device *lcd = spi_get_drvdata(spi);
drm_panel_remove(&lcd->panel);
drm_panel_disable(&lcd->panel);
-
- return 0;
}
static const struct of_device_id lb035q02_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-lg-lg4573.c b/drivers/gpu/drm/panel/panel-lg-lg4573.c
index 8e5160af1de5..cf246d15b7b6 100644
--- a/drivers/gpu/drm/panel/panel-lg-lg4573.c
+++ b/drivers/gpu/drm/panel/panel-lg-lg4573.c
@@ -266,14 +266,12 @@ static int lg4573_probe(struct spi_device *spi)
return 0;
}
-static int lg4573_remove(struct spi_device *spi)
+static void lg4573_remove(struct spi_device *spi)
{
struct lg4573 *ctx = spi_get_drvdata(spi);
lg4573_display_off(ctx);
drm_panel_remove(&ctx->panel);
-
- return 0;
}
static const struct of_device_id lg4573_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
index 6e5ab1debc8b..81c5c541a351 100644
--- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
@@ -212,15 +212,13 @@ static int nl8048_probe(struct spi_device *spi)
return 0;
}
-static int nl8048_remove(struct spi_device *spi)
+static void nl8048_remove(struct spi_device *spi)
{
struct nl8048_panel *lcd = spi_get_drvdata(spi);
drm_panel_remove(&lcd->panel);
drm_panel_disable(&lcd->panel);
drm_panel_unprepare(&lcd->panel);
-
- return 0;
}
static const struct of_device_id nl8048_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt39016.c b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
index d036853db865..f58cfb10b58a 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt39016.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
@@ -292,7 +292,7 @@ static int nt39016_probe(struct spi_device *spi)
return 0;
}
-static int nt39016_remove(struct spi_device *spi)
+static void nt39016_remove(struct spi_device *spi)
{
struct nt39016 *panel = spi_get_drvdata(spi);
@@ -300,8 +300,6 @@ static int nt39016_remove(struct spi_device *spi)
nt39016_disable(&panel->drm_panel);
nt39016_unprepare(&panel->drm_panel);
-
- return 0;
}
static const struct drm_display_mode kd035g6_display_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-db7430.c b/drivers/gpu/drm/panel/panel-samsung-db7430.c
index ead479719f00..04640c5256a8 100644
--- a/drivers/gpu/drm/panel/panel-samsung-db7430.c
+++ b/drivers/gpu/drm/panel/panel-samsung-db7430.c
@@ -314,12 +314,11 @@ static int db7430_probe(struct spi_device *spi)
return 0;
}
-static int db7430_remove(struct spi_device *spi)
+static void db7430_remove(struct spi_device *spi)
{
struct db7430 *db = spi_get_drvdata(spi);
drm_panel_remove(&db->panel);
- return 0;
}
/*
diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
index c4b388850a13..01eb211f32f7 100644
--- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c
+++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
@@ -358,14 +358,12 @@ static int ld9040_probe(struct spi_device *spi)
return 0;
}
-static int ld9040_remove(struct spi_device *spi)
+static void ld9040_remove(struct spi_device *spi)
{
struct ld9040 *ctx = spi_get_drvdata(spi);
ld9040_power_off(ctx);
drm_panel_remove(&ctx->panel);
-
- return 0;
}
static const struct of_device_id ld9040_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
index 1696ceb36aa0..2adb223a895c 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
@@ -291,12 +291,11 @@ static int s6d27a1_probe(struct spi_device *spi)
return 0;
}
-static int s6d27a1_remove(struct spi_device *spi)
+static void s6d27a1_remove(struct spi_device *spi)
{
struct s6d27a1 *ctx = spi_get_drvdata(spi);
drm_panel_remove(&ctx->panel);
- return 0;
}
static const struct of_device_id s6d27a1_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
index c178d962b0d5..d99afcc672ca 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
@@ -62,10 +62,9 @@ static int s6e63m0_spi_probe(struct spi_device *spi)
s6e63m0_spi_dcs_write, false);
}
-static int s6e63m0_spi_remove(struct spi_device *spi)
+static void s6e63m0_spi_remove(struct spi_device *spi)
{
s6e63m0_remove(&spi->dev);
- return 0;
}
static const struct of_device_id s6e63m0_spi_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 9e46db5e359c..b42c1d816e79 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -588,6 +588,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
err = panel_dpi_probe(dev, panel);
if (err)
goto free_ddc;
+ desc = panel->desc;
} else {
if (!of_get_display_timing(dev->of_node, "panel-timing", &dt))
panel_simple_parse_panel_timing_node(dev, panel, &dt);
@@ -2016,7 +2017,7 @@ static const struct display_timing innolux_g070y2_l01_timing = {
static const struct panel_desc innolux_g070y2_l01 = {
.timings = &innolux_g070y2_l01_timing,
.num_timings = 1,
- .bpc = 6,
+ .bpc = 8,
.size = {
.width = 152,
.height = 91,
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
index 61e565524542..bbc4569cbcdc 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
@@ -387,13 +387,11 @@ static int st7789v_probe(struct spi_device *spi)
return 0;
}
-static int st7789v_remove(struct spi_device *spi)
+static void st7789v_remove(struct spi_device *spi)
{
struct st7789v *ctx = spi_get_drvdata(spi);
drm_panel_remove(&ctx->panel);
-
- return 0;
}
static const struct of_device_id st7789v_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
index ba0b3ead150f..0d7541a33f87 100644
--- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
@@ -655,7 +655,7 @@ static int acx565akm_probe(struct spi_device *spi)
return 0;
}
-static int acx565akm_remove(struct spi_device *spi)
+static void acx565akm_remove(struct spi_device *spi)
{
struct acx565akm_panel *lcd = spi_get_drvdata(spi);
@@ -666,8 +666,6 @@ static int acx565akm_remove(struct spi_device *spi)
drm_panel_disable(&lcd->panel);
drm_panel_unprepare(&lcd->panel);
-
- return 0;
}
static const struct of_device_id acx565akm_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
index ba0c00d1a001..4dbf8b88f264 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
@@ -350,15 +350,13 @@ static int td028ttec1_probe(struct spi_device *spi)
return 0;
}
-static int td028ttec1_remove(struct spi_device *spi)
+static void td028ttec1_remove(struct spi_device *spi)
{
struct td028ttec1_panel *lcd = spi_get_drvdata(spi);
drm_panel_remove(&lcd->panel);
drm_panel_disable(&lcd->panel);
drm_panel_unprepare(&lcd->panel);
-
- return 0;
}
static const struct of_device_id td028ttec1_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
index 1866cdb8f9c1..cf4609bb9b1d 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
@@ -463,7 +463,7 @@ static int td043mtea1_probe(struct spi_device *spi)
return 0;
}
-static int td043mtea1_remove(struct spi_device *spi)
+static void td043mtea1_remove(struct spi_device *spi)
{
struct td043mtea1_panel *lcd = spi_get_drvdata(spi);
@@ -472,8 +472,6 @@ static int td043mtea1_remove(struct spi_device *spi)
drm_panel_unprepare(&lcd->panel);
sysfs_remove_group(&spi->dev.kobj, &td043mtea1_attr_group);
-
- return 0;
}
static const struct of_device_id td043mtea1_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
index e3791dad6830..0b1f5a11a055 100644
--- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c
+++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
@@ -450,12 +450,11 @@ static int tpg110_probe(struct spi_device *spi)
return 0;
}
-static int tpg110_remove(struct spi_device *spi)
+static void tpg110_remove(struct spi_device *spi)
{
struct tpg110 *tpg = spi_get_drvdata(spi);
drm_panel_remove(&tpg->panel);
- return 0;
}
static const struct of_device_id tpg110_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
index 8bc976f54b80..236f3cb2b594 100644
--- a/drivers/gpu/drm/panel/panel-widechips-ws2401.c
+++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
@@ -407,12 +407,11 @@ static int ws2401_probe(struct spi_device *spi)
return 0;
}
-static int ws2401_remove(struct spi_device *spi)
+static void ws2401_remove(struct spi_device *spi)
{
struct ws2401 *ws = spi_get_drvdata(spi);
drm_panel_remove(&ws->panel);
- return 0;
}
/*
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 0fce73b9a646..70bd84b7ef2b 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -198,7 +198,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
* so don't register a backlight device
*/
if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
- (rdev->pdev->device == 0x6741))
+ (rdev->pdev->device == 0x6741) &&
+ !dmi_match(DMI_PRODUCT_NAME, "iMac12,1"))
return;
if (!radeon_encoder->enc_priv)
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 377f9cdb5b53..84013faa4756 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -470,8 +470,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
int32_t *msg, msg_type, handle;
unsigned img_size = 0;
void *ptr;
-
- int i, r;
+ long r;
+ int i;
if (offset & 0x3F) {
DRM_ERROR("UVD messages must be 64 byte aligned!\n");
@@ -481,13 +481,13 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
MAX_SCHEDULE_TIMEOUT);
if (r <= 0) {
- DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
+ DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
return r ? r : -ETIME;
}
r = radeon_bo_kmap(bo, &ptr);
if (r) {
- DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
+ DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
return r;
}
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 830bdd5e9b7c..8677c8271678 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -529,13 +529,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
return ret;
}
- ret = clk_prepare_enable(hdmi->vpll_clk);
- if (ret) {
- DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n",
- ret);
- return ret;
- }
-
hdmi->phy = devm_phy_optional_get(dev, "hdmi");
if (IS_ERR(hdmi->phy)) {
ret = PTR_ERR(hdmi->phy);
@@ -544,6 +537,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
return ret;
}
+ ret = clk_prepare_enable(hdmi->vpll_clk);
+ if (ret) {
+ DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n",
+ ret);
+ return ret;
+ }
+
drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 1f7353f0684a..798b542e5916 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -902,6 +902,7 @@ static const struct vop_win_phy rk3399_win01_data = {
.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
+ .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
.y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22),
.act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
.dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0),
@@ -912,6 +913,7 @@ static const struct vop_win_phy rk3399_win01_data = {
.uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16),
.src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0),
.dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0),
+ .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0),
};
/*
@@ -922,11 +924,11 @@ static const struct vop_win_phy rk3399_win01_data = {
static const struct vop_win_data rk3399_vop_win_data[] = {
{ .base = 0x00, .phy = &rk3399_win01_data,
.type = DRM_PLANE_TYPE_PRIMARY },
- { .base = 0x40, .phy = &rk3288_win01_data,
+ { .base = 0x40, .phy = &rk3368_win01_data,
.type = DRM_PLANE_TYPE_OVERLAY },
- { .base = 0x00, .phy = &rk3288_win23_data,
+ { .base = 0x00, .phy = &rk3368_win23_data,
.type = DRM_PLANE_TYPE_OVERLAY },
- { .base = 0x50, .phy = &rk3288_win23_data,
+ { .base = 0x50, .phy = &rk3368_win23_data,
.type = DRM_PLANE_TYPE_CURSOR },
};
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
index 145833a9d82d..5b3fbee18671 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
@@ -111,10 +111,10 @@
/* format 13 is semi-planar YUV411 VUVU */
#define SUN8I_MIXER_FBFMT_YUV411 14
/* format 15 doesn't exist */
-/* format 16 is P010 YVU */
-#define SUN8I_MIXER_FBFMT_P010_YUV 17
-/* format 18 is P210 YVU */
-#define SUN8I_MIXER_FBFMT_P210_YUV 19
+#define SUN8I_MIXER_FBFMT_P010_YUV 16
+/* format 17 is P010 YVU */
+#define SUN8I_MIXER_FBFMT_P210_YUV 18
+/* format 19 is P210 YVU */
/* format 20 is packed YVU444 10-bit */
/* format 21 is packed YUV444 10-bit */
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 8cf5aeb9db6c..201f5175ecfe 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -5,6 +5,7 @@ config DRM_TEGRA
depends on COMMON_CLK
depends on DRM
depends on OF
+ select DRM_DP_AUX_BUS
select DRM_KMS_HELPER
select DRM_MIPI_DSI
select DRM_PANEL
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 1f96e416fa08..d7a731d287d2 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <drm/drm_dp_helper.h>
+#include <drm/drm_dp_aux_bus.h>
#include <drm/drm_panel.h>
#include "dp.h"
@@ -570,6 +571,12 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
list_add_tail(&dpaux->list, &dpaux_list);
mutex_unlock(&dpaux_lock);
+ err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux);
+ if (err < 0) {
+ dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err);
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index 223ab2ceb7e6..3762d87759d9 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -63,7 +63,7 @@ static void falcon_copy_firmware_image(struct falcon *falcon,
/* copy the whole thing taking into account endianness */
for (i = 0; i < firmware->size / sizeof(u32); i++)
- virt[i] = le32_to_cpu(((u32 *)firmware->data)[i]);
+ virt[i] = le32_to_cpu(((__le32 *)firmware->data)[i]);
}
static int falcon_parse_firmware_image(struct falcon *falcon)
diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c
index 9b33c05732aa..ebb025543f8d 100644
--- a/drivers/gpu/drm/tiny/hx8357d.c
+++ b/drivers/gpu/drm/tiny/hx8357d.c
@@ -263,14 +263,12 @@ static int hx8357d_probe(struct spi_device *spi)
return 0;
}
-static int hx8357d_remove(struct spi_device *spi)
+static void hx8357d_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void hx8357d_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c
index bcc181351236..fc8ed245b0bc 100644
--- a/drivers/gpu/drm/tiny/ili9163.c
+++ b/drivers/gpu/drm/tiny/ili9163.c
@@ -193,14 +193,12 @@ static int ili9163_probe(struct spi_device *spi)
return 0;
}
-static int ili9163_remove(struct spi_device *spi)
+static void ili9163_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void ili9163_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index 976d3209f164..cc92eb9f2a07 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -411,14 +411,12 @@ static int ili9225_probe(struct spi_device *spi)
return 0;
}
-static int ili9225_remove(struct spi_device *spi)
+static void ili9225_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void ili9225_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c
index 37e0c33399c8..5b8cc770ee7b 100644
--- a/drivers/gpu/drm/tiny/ili9341.c
+++ b/drivers/gpu/drm/tiny/ili9341.c
@@ -225,14 +225,12 @@ static int ili9341_probe(struct spi_device *spi)
return 0;
}
-static int ili9341_remove(struct spi_device *spi)
+static void ili9341_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void ili9341_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c
index e9a63f4b2993..6d655e18e0aa 100644
--- a/drivers/gpu/drm/tiny/ili9486.c
+++ b/drivers/gpu/drm/tiny/ili9486.c
@@ -243,14 +243,12 @@ static int ili9486_probe(struct spi_device *spi)
return 0;
}
-static int ili9486_remove(struct spi_device *spi)
+static void ili9486_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void ili9486_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c
index 023de49e7a8e..5e060f6910bb 100644
--- a/drivers/gpu/drm/tiny/mi0283qt.c
+++ b/drivers/gpu/drm/tiny/mi0283qt.c
@@ -233,14 +233,12 @@ static int mi0283qt_probe(struct spi_device *spi)
return 0;
}
-static int mi0283qt_remove(struct spi_device *spi)
+static void mi0283qt_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void mi0283qt_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index 97a775c48cea..beeeb170d0b1 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -1140,14 +1140,12 @@ static int repaper_probe(struct spi_device *spi)
return 0;
}
-static int repaper_remove(struct spi_device *spi)
+static void repaper_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void repaper_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index 51b9b9fb3ead..3f38faa1cd8c 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -360,14 +360,12 @@ static int st7586_probe(struct spi_device *spi)
return 0;
}
-static int st7586_remove(struct spi_device *spi)
+static void st7586_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void st7586_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c
index fc40dd10efa8..29d618093e94 100644
--- a/drivers/gpu/drm/tiny/st7735r.c
+++ b/drivers/gpu/drm/tiny/st7735r.c
@@ -247,14 +247,12 @@ static int st7735r_probe(struct spi_device *spi)
return 0;
}
-static int st7735r_remove(struct spi_device *spi)
+static void st7735r_remove(struct spi_device *spi)
{
struct drm_device *drm = spi_get_drvdata(spi);
drm_dev_unplug(drm);
drm_atomic_helper_shutdown(drm);
-
- return 0;
}
static void st7735r_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 287dbc89ad64..783890e8d43a 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -525,9 +525,11 @@ int vc4_crtc_disable_at_boot(struct drm_crtc *crtc)
if (ret)
return ret;
- ret = pm_runtime_put(&vc4_hdmi->pdev->dev);
- if (ret)
- return ret;
+ /*
+ * post_crtc_powerdown will have called pm_runtime_put, so we
+ * don't need it here otherwise we'll get the reference counting
+ * wrong.
+ */
return 0;
}
@@ -671,7 +673,6 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
const struct drm_display_mode *mode = &crtc_state->adjusted_mode;
struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder);
- mode = &crtc_state->adjusted_mode;
if (vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0) {
vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 1000,
mode->clock * 9 / 10) * 1000;
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index a229da58962a..9300d3354c51 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -1262,7 +1262,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
struct vc4_dsi *dsi = host_to_dsi(host);
- int ret;
dsi->lanes = device->lanes;
dsi->channel = device->channel;
@@ -1297,18 +1296,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
return 0;
}
- ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops);
- if (ret) {
- mipi_dsi_host_unregister(&dsi->dsi_host);
- return ret;
- }
-
- return 0;
+ return component_add(&dsi->pdev->dev, &vc4_dsi_ops);
}
static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
struct mipi_dsi_device *device)
{
+ struct vc4_dsi *dsi = host_to_dsi(host);
+
+ component_del(&dsi->pdev->dev, &vc4_dsi_ops);
return 0;
}
@@ -1686,9 +1682,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct vc4_dsi *dsi = dev_get_drvdata(dev);
- component_del(&pdev->dev, &vc4_dsi_ops);
mipi_dsi_host_unregister(&dsi->dsi_host);
-
return 0;
}
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 053fbaf765ca..3a1626f261e5 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -196,14 +196,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio))
connected = true;
} else {
- unsigned long flags;
- u32 hotplug;
-
- spin_lock_irqsave(&vc4_hdmi->hw_lock, flags);
- hotplug = HDMI_READ(HDMI_HOTPLUG);
- spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags);
-
- if (hotplug & VC4_HDMI_HOTPLUG_CONNECTED)
+ if (vc4_hdmi->variant->hp_detect &&
+ vc4_hdmi->variant->hp_detect(vc4_hdmi))
connected = true;
}
@@ -1251,6 +1245,7 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
unsigned long long tmds_rate;
if (vc4_hdmi->variant->unsupported_odd_h_timings &&
+ !(mode->flags & DRM_MODE_FLAG_DBLCLK) &&
((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
(mode->hsync_end % 2) || (mode->htotal % 2)))
return -EINVAL;
@@ -1298,6 +1293,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder,
struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
if (vc4_hdmi->variant->unsupported_odd_h_timings &&
+ !(mode->flags & DRM_MODE_FLAG_DBLCLK) &&
((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
(mode->hsync_end % 2) || (mode->htotal % 2)))
return MODE_H_ILLEGAL;
@@ -1343,6 +1339,18 @@ static u32 vc5_hdmi_channel_map(struct vc4_hdmi *vc4_hdmi, u32 channel_mask)
return channel_map;
}
+static bool vc5_hdmi_hp_detect(struct vc4_hdmi *vc4_hdmi)
+{
+ unsigned long flags;
+ u32 hotplug;
+
+ spin_lock_irqsave(&vc4_hdmi->hw_lock, flags);
+ hotplug = HDMI_READ(HDMI_HOTPLUG);
+ spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags);
+
+ return !!(hotplug & VC4_HDMI_HOTPLUG_CONNECTED);
+}
+
/* HDMI audio codec callbacks */
static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi,
unsigned int samplerate)
@@ -1741,6 +1749,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
dev_err(dev, "Couldn't register the HDMI codec: %ld\n", PTR_ERR(codec_pdev));
return PTR_ERR(codec_pdev);
}
+ vc4_hdmi->audio.codec_pdev = codec_pdev;
dai_link->cpus = &vc4_hdmi->audio.cpu;
dai_link->codecs = &vc4_hdmi->audio.codec;
@@ -1780,6 +1789,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
}
+static void vc4_hdmi_audio_exit(struct vc4_hdmi *vc4_hdmi)
+{
+ platform_device_unregister(vc4_hdmi->audio.codec_pdev);
+ vc4_hdmi->audio.codec_pdev = NULL;
+}
+
static irqreturn_t vc4_hdmi_hpd_irq_thread(int irq, void *priv)
{
struct vc4_hdmi *vc4_hdmi = priv;
@@ -2504,7 +2519,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
* vc4_hdmi_disable_scrambling() will thus run at boot, make
* sure it's disabled, and avoid any inconsistency.
*/
- vc4_hdmi->scdc_enabled = true;
+ if (variant->max_pixel_clock > HDMI_14_MAX_TMDS_CLK)
+ vc4_hdmi->scdc_enabled = true;
ret = variant->init_resources(vc4_hdmi);
if (ret)
@@ -2651,6 +2667,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master,
kfree(vc4_hdmi->hdmi_regset.regs);
kfree(vc4_hdmi->hd_regset.regs);
+ vc4_hdmi_audio_exit(vc4_hdmi);
vc4_hdmi_cec_exit(vc4_hdmi);
vc4_hdmi_hotplug_exit(vc4_hdmi);
vc4_hdmi_connector_destroy(&vc4_hdmi->connector);
@@ -2723,6 +2740,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi0_variant = {
.phy_rng_disable = vc5_hdmi_phy_rng_disable,
.channel_map = vc5_hdmi_channel_map,
.supports_hdr = true,
+ .hp_detect = vc5_hdmi_hp_detect,
};
static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = {
@@ -2751,6 +2769,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = {
.phy_rng_disable = vc5_hdmi_phy_rng_disable,
.channel_map = vc5_hdmi_channel_map,
.supports_hdr = true,
+ .hp_detect = vc5_hdmi_hp_detect,
};
static const struct of_device_id vc4_hdmi_dt_match[] = {
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h
index 36c0b082a43b..6ffdd4ec5fb6 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.h
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.h
@@ -102,6 +102,9 @@ struct vc4_hdmi_variant {
/* Enables HDR metadata */
bool supports_hdr;
+
+ /* Callback for hardware specific hotplug detect */
+ bool (*hp_detect)(struct vc4_hdmi *vc4_hdmi);
};
/* HDMI audio information */
@@ -113,6 +116,7 @@ struct vc4_hdmi_audio {
struct snd_soc_dai_link_component platform;
struct snd_dmaengine_dai_dma_data dma_data;
struct hdmi_audio_infoframe infoframe;
+ struct platform_device *codec_pdev;
bool streaming;
};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d6b66636a19b..ea3ecdda561d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1140,15 +1140,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
struct vmw_private *dev_priv,
struct vmw_fence_obj **p_fence,
uint32_t *p_handle);
-extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
struct vmw_fpriv *vmw_fp,
int ret,
struct drm_vmw_fence_rep __user
*user_fence_rep,
struct vmw_fence_obj *fence,
uint32_t fence_handle,
- int32_t out_fence_fd,
- struct sync_file *sync_file);
+ int32_t out_fence_fd);
bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd);
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 44ca23b0ea4e..dd2ff441068e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3879,17 +3879,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
* Also if copying fails, user-space will be unable to signal the fence object
* so we wait for it immediately, and then unreference the user-space reference.
*/
-void
+int
vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
struct vmw_fpriv *vmw_fp, int ret,
struct drm_vmw_fence_rep __user *user_fence_rep,
struct vmw_fence_obj *fence, uint32_t fence_handle,
- int32_t out_fence_fd, struct sync_file *sync_file)
+ int32_t out_fence_fd)
{
struct drm_vmw_fence_rep fence_rep;
if (user_fence_rep == NULL)
- return;
+ return 0;
memset(&fence_rep, 0, sizeof(fence_rep));
@@ -3917,19 +3917,13 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
* handle.
*/
if (unlikely(ret != 0) && (fence_rep.error == 0)) {
- if (sync_file)
- fput(sync_file->file);
-
- if (fence_rep.fd != -1) {
- put_unused_fd(fence_rep.fd);
- fence_rep.fd = -1;
- }
-
ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle);
VMW_DEBUG_USER("Fence copy error. Syncing.\n");
(void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
}
+
+ return ret ? -EFAULT : 0;
}
/**
@@ -4266,16 +4260,23 @@ int vmw_execbuf_process(struct drm_file *file_priv,
(void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
+ }
+ }
+
+ ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+ user_fence_rep, fence, handle, out_fence_fd);
+
+ if (sync_file) {
+ if (ret) {
+ /* usercopy of fence failed, put the file object */
+ fput(sync_file->file);
+ put_unused_fd(out_fence_fd);
} else {
/* Link the fence with the FD created earlier */
fd_install(out_fence_fd, sync_file->file);
}
}
- vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
- user_fence_rep, fence, handle, out_fence_fd,
- sync_file);
-
/* Don't unreference when handing fence out */
if (unlikely(out_fence != NULL)) {
*out_fence = fence;
@@ -4293,7 +4294,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
*/
vmw_validation_unref_lists(&val_ctx);
- return 0;
+ return ret;
out_unlock_binding:
mutex_unlock(&dev_priv->binding_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index c60d395f9e2e..5001b87aebe8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1128,7 +1128,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
}
vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
- handle, -1, NULL);
+ handle, -1);
vmw_fence_obj_unreference(&fence);
return 0;
out_no_create:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 4e693e8de2c3..bbd2f4ec08ec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2501,7 +2501,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
if (file_priv)
vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv),
ret, user_fence_rep, fence,
- handle, -1, NULL);
+ handle, -1);
if (out_fence)
*out_fence = fence;
else
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index e08e331e46ae..f87a8705f518 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -137,8 +137,15 @@ void host1x_syncpt_restore(struct host1x *host)
struct host1x_syncpt *sp_base = host->syncpt;
unsigned int i;
- for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+ for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+ /*
+ * Unassign syncpt from channels for purposes of Tegra186
+ * syncpoint protection. This prevents any channel from
+ * accessing it until it is reassigned.
+ */
+ host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL);
host1x_hw_syncpt_restore(host, sp_base + i);
+ }
for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
@@ -227,27 +234,12 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
void *ref;
struct host1x_waitlist *waiter;
int err = 0, check_count = 0;
- u32 val;
if (value)
- *value = 0;
-
- /* first check cache */
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = host1x_syncpt_load(sp);
+ *value = host1x_syncpt_load(sp);
+ if (host1x_syncpt_is_expired(sp, thresh))
return 0;
- }
-
- /* try to read from register */
- val = host1x_hw_syncpt_load(sp->host, sp);
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = val;
-
- goto done;
- }
if (!timeout) {
err = -EAGAIN;
@@ -352,13 +344,6 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
-
- /*
- * Unassign syncpt from channels for purposes of Tegra186
- * syncpoint protection. This prevents any channel from
- * accessing it until it is reassigned.
- */
- host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
}
for (i = 0; i < host->info->nb_bases; i++)
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index 2503be0253d3..19fa734a9a79 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -37,11 +37,11 @@ static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_
{
union cmd_response cmd_resp;
- /* Get response with status within a max of 800 ms timeout */
+ /* Get response with status within a max of 1600 ms timeout */
if (!readl_poll_timeout(mp2->mmio + AMD_P2C_MSG(0), cmd_resp.resp,
(cmd_resp.response_v2.response == sensor_sts &&
cmd_resp.response_v2.status == 0 && (sid == 0xff ||
- cmd_resp.response_v2.sensor_id == sid)), 500, 800000))
+ cmd_resp.response_v2.sensor_id == sid)), 500, 1600000))
return cmd_resp.response_v2.response;
return SENSOR_DISABLED;
@@ -53,6 +53,7 @@ static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sen
cmd_base.ul = 0;
cmd_base.cmd_v2.cmd_id = ENABLE_SENSOR;
+ cmd_base.cmd_v2.intr_disable = 1;
cmd_base.cmd_v2.period = info.period;
cmd_base.cmd_v2.sensor_id = info.sensor_idx;
cmd_base.cmd_v2.length = 16;
@@ -70,6 +71,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx)
cmd_base.ul = 0;
cmd_base.cmd_v2.cmd_id = DISABLE_SENSOR;
+ cmd_base.cmd_v2.intr_disable = 1;
cmd_base.cmd_v2.period = 0;
cmd_base.cmd_v2.sensor_id = sensor_idx;
cmd_base.cmd_v2.length = 16;
@@ -83,12 +85,51 @@ static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata)
union sfh_cmd_base cmd_base;
cmd_base.cmd_v2.cmd_id = STOP_ALL_SENSORS;
+ cmd_base.cmd_v2.intr_disable = 1;
cmd_base.cmd_v2.period = 0;
cmd_base.cmd_v2.sensor_id = 0;
writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
}
+static void amd_sfh_clear_intr_v2(struct amd_mp2_dev *privdata)
+{
+ if (readl(privdata->mmio + AMD_P2C_MSG(4))) {
+ writel(0, privdata->mmio + AMD_P2C_MSG(4));
+ writel(0xf, privdata->mmio + AMD_P2C_MSG(5));
+ }
+}
+
+static void amd_sfh_clear_intr(struct amd_mp2_dev *privdata)
+{
+ if (privdata->mp2_ops->clear_intr)
+ privdata->mp2_ops->clear_intr(privdata);
+}
+
+static irqreturn_t amd_sfh_irq_handler(int irq, void *data)
+{
+ amd_sfh_clear_intr(data);
+
+ return IRQ_HANDLED;
+}
+
+static int amd_sfh_irq_init_v2(struct amd_mp2_dev *privdata)
+{
+ int rc;
+
+ pci_intx(privdata->pdev, true);
+
+ rc = devm_request_irq(&privdata->pdev->dev, privdata->pdev->irq,
+ amd_sfh_irq_handler, 0, DRIVER_NAME, privdata);
+ if (rc) {
+ dev_err(&privdata->pdev->dev, "failed to request irq %d err=%d\n",
+ privdata->pdev->irq, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
{
union sfh_cmd_param cmd_param;
@@ -193,6 +234,8 @@ static void amd_mp2_pci_remove(void *privdata)
struct amd_mp2_dev *mp2 = privdata;
amd_sfh_hid_client_deinit(privdata);
mp2->mp2_ops->stop_all(mp2);
+ pci_intx(mp2->pdev, false);
+ amd_sfh_clear_intr(mp2);
}
static const struct amd_mp2_ops amd_sfh_ops_v2 = {
@@ -200,6 +243,8 @@ static const struct amd_mp2_ops amd_sfh_ops_v2 = {
.stop = amd_stop_sensor_v2,
.stop_all = amd_stop_all_sensor_v2,
.response = amd_sfh_wait_response_v2,
+ .clear_intr = amd_sfh_clear_intr_v2,
+ .init_intr = amd_sfh_irq_init_v2,
};
static const struct amd_mp2_ops amd_sfh_ops = {
@@ -225,6 +270,14 @@ static void mp2_select_ops(struct amd_mp2_dev *privdata)
}
}
+static int amd_sfh_irq_init(struct amd_mp2_dev *privdata)
+{
+ if (privdata->mp2_ops->init_intr)
+ return privdata->mp2_ops->init_intr(privdata);
+
+ return 0;
+}
+
static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct amd_mp2_dev *privdata;
@@ -261,9 +314,20 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
mp2_select_ops(privdata);
+ rc = amd_sfh_irq_init(privdata);
+ if (rc) {
+ dev_err(&pdev->dev, "amd_sfh_irq_init failed\n");
+ return rc;
+ }
+
rc = amd_sfh_hid_client_init(privdata);
- if (rc)
+ if (rc) {
+ amd_sfh_clear_intr(privdata);
+ dev_err(&pdev->dev, "amd_sfh_hid_client_init failed\n");
return rc;
+ }
+
+ amd_sfh_clear_intr(privdata);
return devm_add_action_or_reset(&pdev->dev, amd_mp2_pci_remove, privdata);
}
@@ -290,6 +354,9 @@ static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
}
}
+ schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
+ amd_sfh_clear_intr(mp2);
+
return 0;
}
@@ -312,6 +379,9 @@ static int __maybe_unused amd_mp2_pci_suspend(struct device *dev)
}
}
+ cancel_delayed_work_sync(&cl_data->work_buffer);
+ amd_sfh_clear_intr(mp2);
+
return 0;
}
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
index ae30e059f847..97b99861fae2 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
@@ -49,7 +49,7 @@ union sfh_cmd_base {
} s;
struct {
u32 cmd_id : 4;
- u32 intr_enable : 1;
+ u32 intr_disable : 1;
u32 rsvd1 : 3;
u32 length : 7;
u32 mem_type : 1;
@@ -141,5 +141,7 @@ struct amd_mp2_ops {
void (*stop)(struct amd_mp2_dev *privdata, u16 sensor_idx);
void (*stop_all)(struct amd_mp2_dev *privdata);
int (*response)(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
+ void (*clear_intr)(struct amd_mp2_dev *privdata);
+ int (*init_intr)(struct amd_mp2_dev *privdata);
};
#endif
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
index be41f83b0289..76095bd53c65 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
@@ -27,6 +27,7 @@
#define HID_USAGE_SENSOR_STATE_READY_ENUM 0x02
#define HID_USAGE_SENSOR_STATE_INITIALIZING_ENUM 0x05
#define HID_USAGE_SENSOR_EVENT_DATA_UPDATED_ENUM 0x04
+#define ILLUMINANCE_MASK GENMASK(14, 0)
int get_report_descriptor(int sensor_idx, u8 *rep_desc)
{
@@ -246,7 +247,8 @@ u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct amd_
get_common_inputs(&als_input.common_property, report_id);
/* For ALS ,V2 Platforms uses C2P_MSG5 register instead of DRAM access method */
if (supported_input == V2_STATUS)
- als_input.illuminance_value = (int)readl(privdata->mmio + AMD_C2P_MSG(5));
+ als_input.illuminance_value =
+ readl(privdata->mmio + AMD_C2P_MSG(5)) & ILLUMINANCE_MASK;
else
als_input.illuminance_value =
(int)sensor_virt_addr[0] / AMD_SFH_FW_MULTIPLIER;
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 24802a4a636e..7dc89dc6b0f0 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -691,49 +691,49 @@ static const struct hid_device_id apple_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
.driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
- .driver_data = APPLE_HAS_FN },
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 26c31d759914..81e7e404a5fc 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -860,7 +860,9 @@ static const char *keys[KEY_MAX + 1] = {
[KEY_F22] = "F22", [KEY_F23] = "F23",
[KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD",
[KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3",
- [KEY_PROG4] = "Prog4", [KEY_SUSPEND] = "Suspend",
+ [KEY_PROG4] = "Prog4",
+ [KEY_ALL_APPLICATIONS] = "AllApplications",
+ [KEY_SUSPEND] = "Suspend",
[KEY_CLOSE] = "Close", [KEY_PLAY] = "Play",
[KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost",
[KEY_PRINT] = "Print", [KEY_HP] = "HP",
@@ -969,6 +971,7 @@ static const char *keys[KEY_MAX + 1] = {
[KEY_ASSISTANT] = "Assistant",
[KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext",
[KEY_EMOJI_PICKER] = "EmojiPicker",
+ [KEY_DICTATE] = "Dictate",
[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
[KEY_BRIGHTNESS_MAX] = "BrightnessMax",
[KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index 8e960d7b233b..2876cb6a7dca 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -228,7 +228,6 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
struct elo_priv *priv;
int ret;
- struct usb_device *udev;
if (!hid_is_usb(hdev))
return -EINVAL;
@@ -238,8 +237,7 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
return -ENOMEM;
INIT_DELAYED_WORK(&priv->work, elo_work);
- udev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
- priv->usbdev = usb_get_dev(udev);
+ priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
hid_set_drvdata(hdev, priv);
@@ -270,8 +268,6 @@ static void elo_remove(struct hid_device *hdev)
{
struct elo_priv *priv = hid_get_drvdata(hdev);
- usb_put_dev(priv->usbdev);
-
hid_hw_stop(hdev);
cancel_delayed_work_sync(&priv->work);
kfree(priv);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 85975031389b..78bd3ddda442 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1370,6 +1370,7 @@
#define USB_VENDOR_ID_UGTIZER 0x2179
#define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053
#define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077
+#define USB_DEVICE_ID_UGTIZER_TABLET_WP5540 0x0004
#define USB_VENDOR_ID_VIEWSONIC 0x0543
#define USB_DEVICE_ID_VIEWSONIC_PD1011 0xe621
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 112901d2d8d2..56ec27398a00 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -992,6 +992,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break;
case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break;
+ case 0x0d8: map_key_clear(KEY_DICTATE); break;
case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break;
case 0x0e0: map_abs_clear(ABS_VOLUME); break;
@@ -1083,6 +1084,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break;
+ case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break;
+
case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break;
case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break;
case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break;
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 7106b921b53c..c358778e070b 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -1068,6 +1068,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
workitem.reports_supported |= STD_KEYBOARD;
break;
case 0x0f:
+ case 0x11:
device_type = "eQUAD Lightspeed 1.2";
logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
workitem.reports_supported |= STD_KEYBOARD;
diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index b6a9a0f3966e..2204de889739 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -2128,6 +2128,10 @@ static int nintendo_hid_probe(struct hid_device *hdev,
spin_lock_init(&ctlr->lock);
ctlr->rumble_queue = alloc_workqueue("hid-nintendo-rumble_wq",
WQ_FREEZABLE | WQ_MEM_RECLAIM, 0);
+ if (!ctlr->rumble_queue) {
+ ret = -ENOMEM;
+ goto err;
+ }
INIT_WORK(&ctlr->rumble_worker, joycon_rumble_worker);
ret = hid_parse(hdev);
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 9af1dc8ae3a2..c066ba901867 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -187,6 +187,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index 03b935ff02d5..c3e6d69fdfbd 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -64,7 +64,9 @@ struct tm_wheel_info {
*/
static const struct tm_wheel_info tm_wheels_infos[] = {
{0x0306, 0x0006, "Thrustmaster T150RS"},
+ {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"},
{0x0206, 0x0005, "Thrustmaster T300RS"},
+ {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"},
{0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"},
{0x0002, 0x0002, "Thrustmaster T500RS"}
//{0x0407, 0x0001, "Thrustmaster TMX"}
@@ -158,6 +160,12 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
return;
}
+ if (usbif->cur_altsetting->desc.bNumEndpoints < 2) {
+ kfree(send_buf);
+ hid_err(hdev, "Wrong number of endpoints?\n");
+ return;
+ }
+
ep = &usbif->cur_altsetting->endpoint[1];
b_ep = ep->desc.bEndpointAddress;
diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
index efa6140915f4..42ceb2058a09 100644
--- a/drivers/hid/hid-vivaldi.c
+++ b/drivers/hid/hid-vivaldi.c
@@ -144,7 +144,7 @@ out:
static int vivaldi_input_configured(struct hid_device *hdev,
struct hid_input *hidinput)
{
- return sysfs_create_group(&hdev->dev.kobj, &input_attribute_group);
+ return devm_device_add_group(&hdev->dev, &input_attribute_group);
}
static const struct hid_device_id vivaldi_table[] = {
diff --git a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
index b4dad66fa954..ec6c73f75ffe 100644
--- a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
@@ -27,7 +27,6 @@ struct i2c_hid_of_goodix {
struct regulator *vdd;
struct notifier_block nb;
- struct mutex regulator_mutex;
struct gpio_desc *reset_gpio;
const struct goodix_i2c_hid_timing_data *timings;
};
@@ -67,8 +66,6 @@ static int ihid_goodix_vdd_notify(struct notifier_block *nb,
container_of(nb, struct i2c_hid_of_goodix, nb);
int ret = NOTIFY_OK;
- mutex_lock(&ihid_goodix->regulator_mutex);
-
switch (event) {
case REGULATOR_EVENT_PRE_DISABLE:
gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 1);
@@ -87,8 +84,6 @@ static int ihid_goodix_vdd_notify(struct notifier_block *nb,
break;
}
- mutex_unlock(&ihid_goodix->regulator_mutex);
-
return ret;
}
@@ -102,8 +97,6 @@ static int i2c_hid_of_goodix_probe(struct i2c_client *client,
if (!ihid_goodix)
return -ENOMEM;
- mutex_init(&ihid_goodix->regulator_mutex);
-
ihid_goodix->ops.power_up = goodix_i2c_hid_power_up;
ihid_goodix->ops.power_down = goodix_i2c_hid_power_down;
@@ -130,25 +123,28 @@ static int i2c_hid_of_goodix_probe(struct i2c_client *client,
* long. Holding the controller in reset apparently draws extra
* power.
*/
- mutex_lock(&ihid_goodix->regulator_mutex);
ihid_goodix->nb.notifier_call = ihid_goodix_vdd_notify;
ret = devm_regulator_register_notifier(ihid_goodix->vdd, &ihid_goodix->nb);
- if (ret) {
- mutex_unlock(&ihid_goodix->regulator_mutex);
+ if (ret)
return dev_err_probe(&client->dev, ret,
"regulator notifier request failed\n");
- }
/*
* If someone else is holding the regulator on (or the regulator is
* an always-on one) we might never be told to deassert reset. Do it
- * now. Here we'll assume that someone else might have _just
- * barely_ turned the regulator on so we'll do the full
- * "post_power_delay" just in case.
+ * now... and temporarily bump the regulator reference count just to
+ * make sure it is impossible for this to race with our own notifier!
+ * We also assume that someone else might have _just barely_ turned
+ * the regulator on so we'll do the full "post_power_delay" just in
+ * case.
*/
- if (ihid_goodix->reset_gpio && regulator_is_enabled(ihid_goodix->vdd))
+ if (ihid_goodix->reset_gpio && regulator_is_enabled(ihid_goodix->vdd)) {
+ ret = regulator_enable(ihid_goodix->vdd);
+ if (ret)
+ return ret;
goodix_i2c_hid_deassert_reset(ihid_goodix, true);
- mutex_unlock(&ihid_goodix->regulator_mutex);
+ regulator_disable(ihid_goodix->vdd);
+ }
return i2c_hid_core_probe(client, &ihid_goodix->ops, 0x0001, 0);
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ca873a3b98db..f2d05bff4245 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1660,6 +1660,13 @@ static int balloon_connect_vsp(struct hv_device *dev)
unsigned long t;
int ret;
+ /*
+ * max_pkt_size should be large enough for one vmbus packet header plus
+ * our receive buffer size. Hyper-V sends messages up to
+ * HV_HYP_PAGE_SIZE bytes long on balloon channel.
+ */
+ dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
+
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
balloon_onchannelcallback, dev);
if (ret)
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index eb2833d2b5d0..832885198643 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -13,7 +13,7 @@
#include "hv_utils_transport.h"
static DEFINE_SPINLOCK(hvt_list_lock);
-static struct list_head hvt_list = LIST_HEAD_INIT(hvt_list);
+static LIST_HEAD(hvt_list);
static void hvt_reset(struct hvutil_transport *hvt)
{
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 17bf55fe3169..12a2b37e87f3 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2028,8 +2028,10 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
kobj->kset = dev->channels_kset;
ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL,
"%u", relid);
- if (ret)
+ if (ret) {
+ kobject_put(kobj);
return ret;
+ }
ret = sysfs_create_group(kobj, &vmbus_chan_group);
@@ -2038,6 +2040,7 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
* The calling functions' error handling paths will cleanup the
* empty channel directory.
*/
+ kobject_put(kobj);
dev_err(device, "Unable to set up channel sysfs files\n");
return ret;
}
@@ -2079,7 +2082,6 @@ struct hv_device *vmbus_device_create(const guid_t *type,
return child_device_obj;
}
-static u64 vmbus_dma_mask = DMA_BIT_MASK(64);
/*
* vmbus_device_register - Register the child device
*/
@@ -2120,8 +2122,9 @@ int vmbus_device_register(struct hv_device *child_device_obj)
}
hv_debug_add_dev_dir(child_device_obj);
- child_device_obj->device.dma_mask = &vmbus_dma_mask;
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
+ child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
+ dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
return 0;
err_kset_unregister:
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 8df25f1079ba..9ab4e9b3d27b 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -174,6 +174,7 @@ config SENSORS_ADM9240
config SENSORS_ADT7X10
tristate
+ select REGMAP
help
This module contains common code shared by the ADT7310/ADT7320 and
ADT7410/ADT7420 temperature monitoring chip drivers.
@@ -505,6 +506,21 @@ config SENSORS_DELL_SMM
When option I8K is also enabled this driver provides legacy /proc/i8k
userspace interface for i8kutils package.
+config I8K
+ bool "Legacy /proc/i8k interface of Dell laptop SMM BIOS hwmon driver"
+ depends on SENSORS_DELL_SMM
+ depends on PROC_FS
+ help
+ This option enables the legacy /proc/i8k userspace interface of the
+ dell-smm-hwmon driver. The character file /proc/i8k exposes the BIOS
+ version, temperatures and allows control of fan speeds of some Dell
+ laptops. Sometimes it also reports power and hotkey status.
+
+ This interface is required to run programs from the i8kutils package.
+
+ Say Y if you intend to run userspace programs that use this interface.
+ Say N otherwise.
+
config SENSORS_DA9052_ADC
tristate "Dialog DA9052/DA9053 ADC"
depends on PMIC_DA9052
@@ -1208,8 +1224,8 @@ config SENSORS_LM70
depends on SPI_MASTER
help
If you say yes here you get support for the National Semiconductor
- LM70, LM71, LM74 and Texas Instruments TMP121/TMP123 digital tempera-
- ture sensor chips.
+ LM70, LM71, LM74 and Texas Instruments TMP121/TMP123, TMP122/TMP124,
+ TMP125 digital temperature sensor chips.
This driver can also be built as a module. If so, the module
will be called lm70.
@@ -1288,6 +1304,7 @@ config SENSORS_LM80
config SENSORS_LM83
tristate "National Semiconductor LM83 and compatibles"
depends on I2C
+ select REGMAP
help
If you say yes here you get support for National Semiconductor
LM82 and LM83 sensor chips.
@@ -1979,6 +1996,17 @@ config SENSORS_TMP421
This driver can also be built as a module. If so, the module
will be called tmp421.
+config SENSORS_TMP464
+ tristate "Texas Instruments TMP464 and compatible"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ If you say yes here you get support for Texas Instruments TMP464
+ and TMP468 temperature sensor chips.
+
+ This driver can also be built as a module. If so, the module
+ will be called tmp464.
+
config SENSORS_TMP513
tristate "Texas Instruments TMP513 and compatibles"
depends on I2C
@@ -2252,16 +2280,31 @@ config SENSORS_ASUS_WMI
config SENSORS_ASUS_WMI_EC
tristate "ASUS WMI B550/X570"
- depends on ACPI_WMI
+ depends on ACPI_WMI && SENSORS_ASUS_EC=n
help
If you say yes here you get support for the ACPI embedded controller
hardware monitoring interface found in B550/X570 ASUS motherboards.
This driver will provide readings of fans, voltages and temperatures
through the system firmware.
+ This driver is deprecated in favor of the ASUS EC Sensors driver
+ which provides fully compatible output.
+
This driver can also be built as a module. If so, the module
will be called asus_wmi_sensors_ec.
+config SENSORS_ASUS_EC
+ tristate "ASUS EC Sensors"
+ depends on X86
+ help
+ If you say yes here you get support for the ACPI embedded controller
+ hardware monitoring interface found in ASUS motherboards. The driver
+ currently supports B550/X570 boards, although other ASUS boards might
+ provide this monitoring interface as well.
+
+ This driver can also be built as a module. If so, the module
+ will be called asus_ec_sensors.
+
endif # ACPI
endif # HWMON
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 185f946d698b..4ed138d0621f 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_HWMON_VID) += hwmon-vid.o
# APCI drivers
obj-$(CONFIG_SENSORS_ACPI_POWER) += acpi_power_meter.o
obj-$(CONFIG_SENSORS_ATK0110) += asus_atk0110.o
+obj-$(CONFIG_SENSORS_ASUS_EC) += asus-ec-sensors.o
obj-$(CONFIG_SENSORS_ASUS_WMI) += asus_wmi_sensors.o
obj-$(CONFIG_SENSORS_ASUS_WMI_EC) += asus_wmi_ec_sensors.o
@@ -194,6 +195,7 @@ obj-$(CONFIG_SENSORS_TMP103) += tmp103.o
obj-$(CONFIG_SENSORS_TMP108) += tmp108.o
obj-$(CONFIG_SENSORS_TMP401) += tmp401.o
obj-$(CONFIG_SENSORS_TMP421) += tmp421.o
+obj-$(CONFIG_SENSORS_TMP464) += tmp464.o
obj-$(CONFIG_SENSORS_TMP513) += tmp513.o
obj-$(CONFIG_SENSORS_VEXPRESS) += vexpress-hwmon.o
obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
diff --git a/drivers/hwmon/adcxx.c b/drivers/hwmon/adcxx.c
index e5bc5ce09f4e..de37bce24fa6 100644
--- a/drivers/hwmon/adcxx.c
+++ b/drivers/hwmon/adcxx.c
@@ -194,7 +194,7 @@ out_err:
return status;
}
-static int adcxx_remove(struct spi_device *spi)
+static void adcxx_remove(struct spi_device *spi)
{
struct adcxx *adc = spi_get_drvdata(spi);
int i;
@@ -205,8 +205,6 @@ static int adcxx_remove(struct spi_device *spi)
device_remove_file(&spi->dev, &ad_input[i].dev_attr);
mutex_unlock(&adc->lock);
-
- return 0;
}
static const struct spi_device_id adcxx_ids[] = {
diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c
index c40cac16af68..1efc0bdcceab 100644
--- a/drivers/hwmon/adt7310.c
+++ b/drivers/hwmon/adt7310.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/regmap.h>
#include <linux/spi/spi.h>
#include <asm/unaligned.h>
@@ -38,16 +39,13 @@ static const u8 adt7310_reg_table[] = {
#define AD7310_COMMAND(reg) (adt7310_reg_table[(reg)] << ADT7310_CMD_REG_OFFSET)
-static int adt7310_spi_read_word(struct device *dev, u8 reg)
+static int adt7310_spi_read_word(struct spi_device *spi, u8 reg)
{
- struct spi_device *spi = to_spi_device(dev);
-
return spi_w8r16be(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ);
}
-static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data)
+static int adt7310_spi_write_word(struct spi_device *spi, u8 reg, u16 data)
{
- struct spi_device *spi = to_spi_device(dev);
u8 buf[3];
buf[0] = AD7310_COMMAND(reg);
@@ -56,17 +54,13 @@ static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data)
return spi_write(spi, buf, sizeof(buf));
}
-static int adt7310_spi_read_byte(struct device *dev, u8 reg)
+static int adt7310_spi_read_byte(struct spi_device *spi, u8 reg)
{
- struct spi_device *spi = to_spi_device(dev);
-
return spi_w8r8(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ);
}
-static int adt7310_spi_write_byte(struct device *dev, u8 reg,
- u8 data)
+static int adt7310_spi_write_byte(struct spi_device *spi, u8 reg, u8 data)
{
- struct spi_device *spi = to_spi_device(dev);
u8 buf[2];
buf[0] = AD7310_COMMAND(reg);
@@ -75,25 +69,79 @@ static int adt7310_spi_write_byte(struct device *dev, u8 reg,
return spi_write(spi, buf, sizeof(buf));
}
-static const struct adt7x10_ops adt7310_spi_ops = {
- .read_word = adt7310_spi_read_word,
- .write_word = adt7310_spi_write_word,
- .read_byte = adt7310_spi_read_byte,
- .write_byte = adt7310_spi_write_byte,
-};
-
-static int adt7310_spi_probe(struct spi_device *spi)
+static bool adt7310_regmap_is_volatile(struct device *dev, unsigned int reg)
{
- return adt7x10_probe(&spi->dev, spi_get_device_id(spi)->name, spi->irq,
- &adt7310_spi_ops);
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_STATUS:
+ return true;
+ default:
+ return false;
+ }
}
-static int adt7310_spi_remove(struct spi_device *spi)
+static int adt7310_reg_read(void *context, unsigned int reg, unsigned int *val)
{
- adt7x10_remove(&spi->dev, spi->irq);
+ struct spi_device *spi = context;
+ int regval;
+
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_T_ALARM_HIGH:
+ case ADT7X10_T_ALARM_LOW:
+ case ADT7X10_T_CRIT:
+ regval = adt7310_spi_read_word(spi, reg);
+ break;
+ default:
+ regval = adt7310_spi_read_byte(spi, reg);
+ break;
+ }
+ if (regval < 0)
+ return regval;
+ *val = regval;
return 0;
}
+static int adt7310_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+ struct spi_device *spi = context;
+ int ret;
+
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_T_ALARM_HIGH:
+ case ADT7X10_T_ALARM_LOW:
+ case ADT7X10_T_CRIT:
+ ret = adt7310_spi_write_word(spi, reg, val);
+ break;
+ default:
+ ret = adt7310_spi_write_byte(spi, reg, val);
+ break;
+ }
+ return ret;
+}
+
+static const struct regmap_config adt7310_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = adt7310_regmap_is_volatile,
+ .reg_read = adt7310_reg_read,
+ .reg_write = adt7310_reg_write,
+};
+
+static int adt7310_spi_probe(struct spi_device *spi)
+{
+ struct regmap *regmap;
+
+ regmap = devm_regmap_init(&spi->dev, NULL, spi, &adt7310_regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ return adt7x10_probe(&spi->dev, spi_get_device_id(spi)->name, spi->irq,
+ regmap);
+}
+
static const struct spi_device_id adt7310_id[] = {
{ "adt7310", 0 },
{ "adt7320", 0 },
@@ -107,7 +155,6 @@ static struct spi_driver adt7310_driver = {
.pm = ADT7X10_DEV_PM_OPS,
},
.probe = adt7310_spi_probe,
- .remove = adt7310_spi_remove,
.id_table = adt7310_id,
};
module_spi_driver(adt7310_driver);
diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c
index 973db057427b..aede5baca7b9 100644
--- a/drivers/hwmon/adt7410.c
+++ b/drivers/hwmon/adt7410.c
@@ -9,49 +9,82 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/i2c.h>
+#include <linux/regmap.h>
#include "adt7x10.h"
-static int adt7410_i2c_read_word(struct device *dev, u8 reg)
+static bool adt7410_regmap_is_volatile(struct device *dev, unsigned int reg)
{
- return i2c_smbus_read_word_swapped(to_i2c_client(dev), reg);
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_STATUS:
+ return true;
+ default:
+ return false;
+ }
}
-static int adt7410_i2c_write_word(struct device *dev, u8 reg, u16 data)
+static int adt7410_reg_read(void *context, unsigned int reg, unsigned int *val)
{
- return i2c_smbus_write_word_swapped(to_i2c_client(dev), reg, data);
-}
+ struct i2c_client *client = context;
+ int regval;
-static int adt7410_i2c_read_byte(struct device *dev, u8 reg)
-{
- return i2c_smbus_read_byte_data(to_i2c_client(dev), reg);
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_T_ALARM_HIGH:
+ case ADT7X10_T_ALARM_LOW:
+ case ADT7X10_T_CRIT:
+ regval = i2c_smbus_read_word_swapped(client, reg);
+ break;
+ default:
+ regval = i2c_smbus_read_byte_data(client, reg);
+ break;
+ }
+ if (regval < 0)
+ return regval;
+ *val = regval;
+ return 0;
}
-static int adt7410_i2c_write_byte(struct device *dev, u8 reg, u8 data)
+static int adt7410_reg_write(void *context, unsigned int reg, unsigned int val)
{
- return i2c_smbus_write_byte_data(to_i2c_client(dev), reg, data);
+ struct i2c_client *client = context;
+ int ret;
+
+ switch (reg) {
+ case ADT7X10_TEMPERATURE:
+ case ADT7X10_T_ALARM_HIGH:
+ case ADT7X10_T_ALARM_LOW:
+ case ADT7X10_T_CRIT:
+ ret = i2c_smbus_write_word_swapped(client, reg, val);
+ break;
+ default:
+ ret = i2c_smbus_write_byte_data(client, reg, val);
+ break;
+ }
+ return ret;
}
-static const struct adt7x10_ops adt7410_i2c_ops = {
- .read_word = adt7410_i2c_read_word,
- .write_word = adt7410_i2c_write_word,
- .read_byte = adt7410_i2c_read_byte,
- .write_byte = adt7410_i2c_write_byte,
+static const struct regmap_config adt7410_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .max_register = ADT7X10_ID,
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = adt7410_regmap_is_volatile,
+ .reg_read = adt7410_reg_read,
+ .reg_write = adt7410_reg_write,
};
static int adt7410_i2c_probe(struct i2c_client *client)
{
- if (!i2c_check_functionality(client->adapter,
- I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA))
- return -ENODEV;
+ struct regmap *regmap;
- return adt7x10_probe(&client->dev, NULL, client->irq, &adt7410_i2c_ops);
-}
+ regmap = devm_regmap_init(&client->dev, NULL, client,
+ &adt7410_regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
-static int adt7410_i2c_remove(struct i2c_client *client)
-{
- adt7x10_remove(&client->dev, client->irq);
- return 0;
+ return adt7x10_probe(&client->dev, client->name, client->irq, regmap);
}
static const struct i2c_device_id adt7410_ids[] = {
@@ -68,7 +101,6 @@ static struct i2c_driver adt7410_driver = {
.pm = ADT7X10_DEV_PM_OPS,
},
.probe_new = adt7410_i2c_probe,
- .remove = adt7410_i2c_remove,
.id_table = adt7410_ids,
.address_list = I2C_ADDRS(0x48, 0x49, 0x4a, 0x4b),
};
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index d519aca4a9d6..fb6d14d213a1 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val
struct adt7470_data *data = dev_get_drvdata(dev);
int err;
+ if (val <= 0)
+ return -EINVAL;
+
val = FAN_RPM_TO_PERIOD(val);
val = clamp_val(val, 1, 65534);
diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index e9d33aa78a19..ce54bffab2ec 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -8,16 +8,17 @@
* and adt7410.c from iio-staging by Sonic Zhang <sonic.zhang@analog.com>
*/
+#include <linux/device.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
#include <linux/err.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/regmap.h>
#include "adt7x10.h"
@@ -53,80 +54,57 @@
/* Each client has this additional data */
struct adt7x10_data {
- const struct adt7x10_ops *ops;
- const char *name;
- struct device *hwmon_dev;
+ struct regmap *regmap;
struct mutex update_lock;
u8 config;
u8 oldconfig;
- bool valid; /* true if registers valid */
- unsigned long last_updated; /* In jiffies */
- s16 temp[4]; /* Register values,
- 0 = input
- 1 = high
- 2 = low
- 3 = critical */
- u8 hyst; /* hysteresis offset */
+ bool valid; /* true if temperature valid */
};
-static int adt7x10_read_byte(struct device *dev, u8 reg)
-{
- struct adt7x10_data *d = dev_get_drvdata(dev);
- return d->ops->read_byte(dev, reg);
-}
-
-static int adt7x10_write_byte(struct device *dev, u8 reg, u8 data)
-{
- struct adt7x10_data *d = dev_get_drvdata(dev);
- return d->ops->write_byte(dev, reg, data);
-}
-
-static int adt7x10_read_word(struct device *dev, u8 reg)
-{
- struct adt7x10_data *d = dev_get_drvdata(dev);
- return d->ops->read_word(dev, reg);
-}
-
-static int adt7x10_write_word(struct device *dev, u8 reg, u16 data)
-{
- struct adt7x10_data *d = dev_get_drvdata(dev);
- return d->ops->write_word(dev, reg, data);
-}
+enum {
+ adt7x10_temperature = 0,
+ adt7x10_t_alarm_high,
+ adt7x10_t_alarm_low,
+ adt7x10_t_crit,
+};
-static const u8 ADT7X10_REG_TEMP[4] = {
- ADT7X10_TEMPERATURE, /* input */
- ADT7X10_T_ALARM_HIGH, /* high */
- ADT7X10_T_ALARM_LOW, /* low */
- ADT7X10_T_CRIT, /* critical */
+static const u8 ADT7X10_REG_TEMP[] = {
+ [adt7x10_temperature] = ADT7X10_TEMPERATURE, /* input */
+ [adt7x10_t_alarm_high] = ADT7X10_T_ALARM_HIGH, /* high */
+ [adt7x10_t_alarm_low] = ADT7X10_T_ALARM_LOW, /* low */
+ [adt7x10_t_crit] = ADT7X10_T_CRIT, /* critical */
};
static irqreturn_t adt7x10_irq_handler(int irq, void *private)
{
struct device *dev = private;
- int status;
+ struct adt7x10_data *d = dev_get_drvdata(dev);
+ unsigned int status;
+ int ret;
- status = adt7x10_read_byte(dev, ADT7X10_STATUS);
- if (status < 0)
+ ret = regmap_read(d->regmap, ADT7X10_STATUS, &status);
+ if (ret < 0)
return IRQ_HANDLED;
if (status & ADT7X10_STAT_T_HIGH)
- sysfs_notify(&dev->kobj, NULL, "temp1_max_alarm");
+ hwmon_notify_event(dev, hwmon_temp, hwmon_temp_max_alarm, 0);
if (status & ADT7X10_STAT_T_LOW)
- sysfs_notify(&dev->kobj, NULL, "temp1_min_alarm");
+ hwmon_notify_event(dev, hwmon_temp, hwmon_temp_min_alarm, 0);
if (status & ADT7X10_STAT_T_CRIT)
- sysfs_notify(&dev->kobj, NULL, "temp1_crit_alarm");
+ hwmon_notify_event(dev, hwmon_temp, hwmon_temp_crit_alarm, 0);
return IRQ_HANDLED;
}
-static int adt7x10_temp_ready(struct device *dev)
+static int adt7x10_temp_ready(struct regmap *regmap)
{
- int i, status;
+ unsigned int status;
+ int i, ret;
for (i = 0; i < 6; i++) {
- status = adt7x10_read_byte(dev, ADT7X10_STATUS);
- if (status < 0)
- return status;
+ ret = regmap_read(regmap, ADT7X10_STATUS, &status);
+ if (ret < 0)
+ return ret;
if (!(status & ADT7X10_STAT_NOT_RDY))
return 0;
msleep(60);
@@ -134,71 +112,10 @@ static int adt7x10_temp_ready(struct device *dev)
return -ETIMEDOUT;
}
-static int adt7x10_update_temp(struct device *dev)
-{
- struct adt7x10_data *data = dev_get_drvdata(dev);
- int ret = 0;
-
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
- || !data->valid) {
- int temp;
-
- dev_dbg(dev, "Starting update\n");
-
- ret = adt7x10_temp_ready(dev); /* check for new value */
- if (ret)
- goto abort;
-
- temp = adt7x10_read_word(dev, ADT7X10_REG_TEMP[0]);
- if (temp < 0) {
- ret = temp;
- dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
- ADT7X10_REG_TEMP[0], ret);
- goto abort;
- }
- data->temp[0] = temp;
- data->last_updated = jiffies;
- data->valid = true;
- }
-
-abort:
- mutex_unlock(&data->update_lock);
- return ret;
-}
-
-static int adt7x10_fill_cache(struct device *dev)
-{
- struct adt7x10_data *data = dev_get_drvdata(dev);
- int ret;
- int i;
-
- for (i = 1; i < ARRAY_SIZE(data->temp); i++) {
- ret = adt7x10_read_word(dev, ADT7X10_REG_TEMP[i]);
- if (ret < 0) {
- dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
- ADT7X10_REG_TEMP[i], ret);
- return ret;
- }
- data->temp[i] = ret;
- }
-
- ret = adt7x10_read_byte(dev, ADT7X10_T_HYST);
- if (ret < 0) {
- dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
- ADT7X10_T_HYST, ret);
- return ret;
- }
- data->hyst = ret;
-
- return 0;
-}
-
static s16 ADT7X10_TEMP_TO_REG(long temp)
{
return DIV_ROUND_CLOSEST(clamp_val(temp, ADT7X10_TEMP_MIN,
- ADT7X10_TEMP_MAX) * 128, 1000);
+ ADT7X10_TEMP_MAX) * 128, 1000);
}
static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg)
@@ -215,170 +132,233 @@ static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg)
/*-----------------------------------------------------------------------*/
-/* sysfs attributes for hwmon */
-
-static ssize_t adt7x10_temp_show(struct device *dev,
- struct device_attribute *da, char *buf)
+static int adt7x10_temp_read(struct adt7x10_data *data, int index, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
- struct adt7x10_data *data = dev_get_drvdata(dev);
-
-
- if (attr->index == 0) {
- int ret;
+ unsigned int regval;
+ int ret;
- ret = adt7x10_update_temp(dev);
- if (ret)
+ mutex_lock(&data->update_lock);
+ if (index == adt7x10_temperature && !data->valid) {
+ /* wait for valid temperature */
+ ret = adt7x10_temp_ready(data->regmap);
+ if (ret) {
+ mutex_unlock(&data->update_lock);
return ret;
+ }
+ data->valid = true;
}
+ mutex_unlock(&data->update_lock);
- return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data,
- data->temp[attr->index]));
+ ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], &regval);
+ if (ret)
+ return ret;
+
+ *val = ADT7X10_REG_TO_TEMP(data, regval);
+ return 0;
}
-static ssize_t adt7x10_temp_store(struct device *dev,
- struct device_attribute *da,
- const char *buf, size_t count)
+static int adt7x10_temp_write(struct adt7x10_data *data, int index, long temp)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
- struct adt7x10_data *data = dev_get_drvdata(dev);
- int nr = attr->index;
- long temp;
int ret;
- ret = kstrtol(buf, 10, &temp);
- if (ret)
- return ret;
-
mutex_lock(&data->update_lock);
- data->temp[nr] = ADT7X10_TEMP_TO_REG(temp);
- ret = adt7x10_write_word(dev, ADT7X10_REG_TEMP[nr], data->temp[nr]);
- if (ret)
- count = ret;
+ ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[index],
+ ADT7X10_TEMP_TO_REG(temp));
mutex_unlock(&data->update_lock);
- return count;
+ return ret;
}
-static ssize_t adt7x10_t_hyst_show(struct device *dev,
- struct device_attribute *da, char *buf)
+static int adt7x10_hyst_read(struct adt7x10_data *data, int index, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
- struct adt7x10_data *data = dev_get_drvdata(dev);
- int nr = attr->index;
- int hyst;
+ int hyst, temp, ret;
+
+ mutex_lock(&data->update_lock);
+ ret = regmap_read(data->regmap, ADT7X10_T_HYST, &hyst);
+ if (ret) {
+ mutex_unlock(&data->update_lock);
+ return ret;
+ }
+
+ ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], &temp);
+ mutex_unlock(&data->update_lock);
+ if (ret)
+ return ret;
- hyst = (data->hyst & ADT7X10_T_HYST_MASK) * 1000;
+ hyst = (hyst & ADT7X10_T_HYST_MASK) * 1000;
/*
* hysteresis is stored as a 4 bit offset in the device, convert it
* to an absolute value
*/
- if (nr == 2) /* min has positive offset, others have negative */
+ /* min has positive offset, others have negative */
+ if (index == adt7x10_t_alarm_low)
hyst = -hyst;
- return sprintf(buf, "%d\n",
- ADT7X10_REG_TO_TEMP(data, data->temp[nr]) - hyst);
+
+ *val = ADT7X10_REG_TO_TEMP(data, temp) - hyst;
+ return 0;
}
-static ssize_t adt7x10_t_hyst_store(struct device *dev,
- struct device_attribute *da,
- const char *buf, size_t count)
+static int adt7x10_hyst_write(struct adt7x10_data *data, long hyst)
{
- struct adt7x10_data *data = dev_get_drvdata(dev);
+ unsigned int regval;
int limit, ret;
- long hyst;
- ret = kstrtol(buf, 10, &hyst);
- if (ret)
- return ret;
+ mutex_lock(&data->update_lock);
+
/* convert absolute hysteresis value to a 4 bit delta value */
- limit = ADT7X10_REG_TO_TEMP(data, data->temp[1]);
- hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX);
- data->hyst = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000),
- 0, ADT7X10_T_HYST_MASK);
- ret = adt7x10_write_byte(dev, ADT7X10_T_HYST, data->hyst);
- if (ret)
- return ret;
+ ret = regmap_read(data->regmap, ADT7X10_T_ALARM_HIGH, &regval);
+ if (ret < 0)
+ goto abort;
+
+ limit = ADT7X10_REG_TO_TEMP(data, regval);
- return count;
+ hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX);
+ regval = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000), 0,
+ ADT7X10_T_HYST_MASK);
+ ret = regmap_write(data->regmap, ADT7X10_T_HYST, regval);
+abort:
+ mutex_unlock(&data->update_lock);
+ return ret;
}
-static ssize_t adt7x10_alarm_show(struct device *dev,
- struct device_attribute *da, char *buf)
+static int adt7x10_alarm_read(struct adt7x10_data *data, int index, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+ unsigned int status;
int ret;
- ret = adt7x10_read_byte(dev, ADT7X10_STATUS);
+ ret = regmap_read(data->regmap, ADT7X10_STATUS, &status);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", !!(ret & attr->index));
+ *val = !!(status & index);
+
+ return 0;
+}
+
+static umode_t adt7x10_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (attr) {
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ case hwmon_temp_crit:
+ case hwmon_temp_max_hyst:
+ return 0644;
+ case hwmon_temp_input:
+ case hwmon_temp_min_alarm:
+ case hwmon_temp_max_alarm:
+ case hwmon_temp_crit_alarm:
+ case hwmon_temp_min_hyst:
+ case hwmon_temp_crit_hyst:
+ return 0444;
+ default:
+ break;
+ }
+
+ return 0;
}
-static ssize_t name_show(struct device *dev, struct device_attribute *da,
- char *buf)
+static int adt7x10_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
{
struct adt7x10_data *data = dev_get_drvdata(dev);
- return sprintf(buf, "%s\n", data->name);
+ switch (attr) {
+ case hwmon_temp_input:
+ return adt7x10_temp_read(data, adt7x10_temperature, val);
+ case hwmon_temp_max:
+ return adt7x10_temp_read(data, adt7x10_t_alarm_high, val);
+ case hwmon_temp_min:
+ return adt7x10_temp_read(data, adt7x10_t_alarm_low, val);
+ case hwmon_temp_crit:
+ return adt7x10_temp_read(data, adt7x10_t_crit, val);
+ case hwmon_temp_max_hyst:
+ return adt7x10_hyst_read(data, adt7x10_t_alarm_high, val);
+ case hwmon_temp_min_hyst:
+ return adt7x10_hyst_read(data, adt7x10_t_alarm_low, val);
+ case hwmon_temp_crit_hyst:
+ return adt7x10_hyst_read(data, adt7x10_t_crit, val);
+ case hwmon_temp_min_alarm:
+ return adt7x10_alarm_read(data, ADT7X10_STAT_T_LOW, val);
+ case hwmon_temp_max_alarm:
+ return adt7x10_alarm_read(data, ADT7X10_STAT_T_HIGH, val);
+ case hwmon_temp_crit_alarm:
+ return adt7x10_alarm_read(data, ADT7X10_STAT_T_CRIT, val);
+ default:
+ return -EOPNOTSUPP;
+ }
}
-static SENSOR_DEVICE_ATTR_RO(temp1_input, adt7x10_temp, 0);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, adt7x10_temp, 1);
-static SENSOR_DEVICE_ATTR_RW(temp1_min, adt7x10_temp, 2);
-static SENSOR_DEVICE_ATTR_RW(temp1_crit, adt7x10_temp, 3);
-static SENSOR_DEVICE_ATTR_RW(temp1_max_hyst, adt7x10_t_hyst, 1);
-static SENSOR_DEVICE_ATTR_RO(temp1_min_hyst, adt7x10_t_hyst, 2);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, adt7x10_t_hyst, 3);
-static SENSOR_DEVICE_ATTR_RO(temp1_min_alarm, adt7x10_alarm,
- ADT7X10_STAT_T_LOW);
-static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, adt7x10_alarm,
- ADT7X10_STAT_T_HIGH);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, adt7x10_alarm,
- ADT7X10_STAT_T_CRIT);
-static DEVICE_ATTR_RO(name);
-
-static struct attribute *adt7x10_attributes[] = {
- &sensor_dev_attr_temp1_input.dev_attr.attr,
- &sensor_dev_attr_temp1_max.dev_attr.attr,
- &sensor_dev_attr_temp1_min.dev_attr.attr,
- &sensor_dev_attr_temp1_crit.dev_attr.attr,
- &sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
- &sensor_dev_attr_temp1_min_hyst.dev_attr.attr,
- &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
- &sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
- &sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
- &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
- NULL
+static int adt7x10_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct adt7x10_data *data = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_max:
+ return adt7x10_temp_write(data, adt7x10_t_alarm_high, val);
+ case hwmon_temp_min:
+ return adt7x10_temp_write(data, adt7x10_t_alarm_low, val);
+ case hwmon_temp_crit:
+ return adt7x10_temp_write(data, adt7x10_t_crit, val);
+ case hwmon_temp_max_hyst:
+ return adt7x10_hyst_write(data, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct hwmon_channel_info *adt7x10_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_CRIT | HWMON_T_MAX_HYST | HWMON_T_MIN_HYST |
+ HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+ HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM),
+ NULL,
+};
+
+static const struct hwmon_ops adt7x10_hwmon_ops = {
+ .is_visible = adt7x10_is_visible,
+ .read = adt7x10_read,
+ .write = adt7x10_write,
};
-static const struct attribute_group adt7x10_group = {
- .attrs = adt7x10_attributes,
+static const struct hwmon_chip_info adt7x10_chip_info = {
+ .ops = &adt7x10_hwmon_ops,
+ .info = adt7x10_info,
};
+static void adt7x10_restore_config(void *private)
+{
+ struct adt7x10_data *data = private;
+
+ regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig);
+}
+
int adt7x10_probe(struct device *dev, const char *name, int irq,
- const struct adt7x10_ops *ops)
+ struct regmap *regmap)
{
struct adt7x10_data *data;
+ unsigned int config;
+ struct device *hdev;
int ret;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- data->ops = ops;
- data->name = name;
+ data->regmap = regmap;
dev_set_drvdata(dev, data);
mutex_init(&data->update_lock);
/* configure as specified */
- ret = adt7x10_read_byte(dev, ADT7X10_CONFIG);
+ ret = regmap_read(regmap, ADT7X10_CONFIG, &config);
if (ret < 0) {
dev_dbg(dev, "Can't read config? %d\n", ret);
return ret;
}
- data->oldconfig = ret;
+ data->oldconfig = config;
/*
* Set to 16 bit resolution, continous conversion and comparator mode.
@@ -389,92 +369,49 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
data->config |= ADT7X10_FULL | ADT7X10_RESOLUTION | ADT7X10_EVENT_MODE;
if (data->config != data->oldconfig) {
- ret = adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config);
+ ret = regmap_write(regmap, ADT7X10_CONFIG, data->config);
if (ret)
return ret;
- }
- dev_dbg(dev, "Config %02x\n", data->config);
-
- ret = adt7x10_fill_cache(dev);
- if (ret)
- goto exit_restore;
-
- /* Register sysfs hooks */
- ret = sysfs_create_group(&dev->kobj, &adt7x10_group);
- if (ret)
- goto exit_restore;
-
- /*
- * The I2C device will already have it's own 'name' attribute, but for
- * the SPI device we need to register it. name will only be non NULL if
- * the device doesn't register the 'name' attribute on its own.
- */
- if (name) {
- ret = device_create_file(dev, &dev_attr_name);
+ ret = devm_add_action_or_reset(dev, adt7x10_restore_config, data);
if (ret)
- goto exit_remove;
+ return ret;
}
+ dev_dbg(dev, "Config %02x\n", data->config);
- data->hwmon_dev = hwmon_device_register(dev);
- if (IS_ERR(data->hwmon_dev)) {
- ret = PTR_ERR(data->hwmon_dev);
- goto exit_remove_name;
- }
+ hdev = devm_hwmon_device_register_with_info(dev, name, data,
+ &adt7x10_chip_info, NULL);
+ if (IS_ERR(hdev))
+ return PTR_ERR(hdev);
if (irq > 0) {
- ret = request_threaded_irq(irq, NULL, adt7x10_irq_handler,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- dev_name(dev), dev);
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ adt7x10_irq_handler,
+ IRQF_TRIGGER_FALLING |
+ IRQF_ONESHOT,
+ dev_name(dev), hdev);
if (ret)
- goto exit_hwmon_device_unregister;
+ return ret;
}
return 0;
-
-exit_hwmon_device_unregister:
- hwmon_device_unregister(data->hwmon_dev);
-exit_remove_name:
- if (name)
- device_remove_file(dev, &dev_attr_name);
-exit_remove:
- sysfs_remove_group(&dev->kobj, &adt7x10_group);
-exit_restore:
- adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig);
- return ret;
}
EXPORT_SYMBOL_GPL(adt7x10_probe);
-void adt7x10_remove(struct device *dev, int irq)
-{
- struct adt7x10_data *data = dev_get_drvdata(dev);
-
- if (irq > 0)
- free_irq(irq, dev);
-
- hwmon_device_unregister(data->hwmon_dev);
- if (data->name)
- device_remove_file(dev, &dev_attr_name);
- sysfs_remove_group(&dev->kobj, &adt7x10_group);
- if (data->oldconfig != data->config)
- adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig);
-}
-EXPORT_SYMBOL_GPL(adt7x10_remove);
-
#ifdef CONFIG_PM_SLEEP
static int adt7x10_suspend(struct device *dev)
{
struct adt7x10_data *data = dev_get_drvdata(dev);
- return adt7x10_write_byte(dev, ADT7X10_CONFIG,
- data->config | ADT7X10_PD);
+ return regmap_write(data->regmap, ADT7X10_CONFIG,
+ data->config | ADT7X10_PD);
}
static int adt7x10_resume(struct device *dev)
{
struct adt7x10_data *data = dev_get_drvdata(dev);
- return adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config);
+ return regmap_write(data->regmap, ADT7X10_CONFIG, data->config);
}
SIMPLE_DEV_PM_OPS(adt7x10_dev_pm_ops, adt7x10_suspend, adt7x10_resume);
diff --git a/drivers/hwmon/adt7x10.h b/drivers/hwmon/adt7x10.h
index a1ae682eb32e..ba22c32c8355 100644
--- a/drivers/hwmon/adt7x10.h
+++ b/drivers/hwmon/adt7x10.h
@@ -17,16 +17,8 @@
struct device;
-struct adt7x10_ops {
- int (*read_byte)(struct device *, u8 reg);
- int (*write_byte)(struct device *, u8 reg, u8 data);
- int (*read_word)(struct device *, u8 reg);
- int (*write_word)(struct device *, u8 reg, u16 data);
-};
-
int adt7x10_probe(struct device *dev, const char *name, int irq,
- const struct adt7x10_ops *ops);
-void adt7x10_remove(struct device *dev, int irq);
+ struct regmap *regmap);
#ifdef CONFIG_PM_SLEEP
extern const struct dev_pm_ops adt7x10_dev_pm_ops;
diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c
index fb9341a53051..525809cf7c95 100644
--- a/drivers/hwmon/aquacomputer_d5next.c
+++ b/drivers/hwmon/aquacomputer_d5next.c
@@ -1,32 +1,41 @@
// SPDX-License-Identifier: GPL-2.0+
/*
- * hwmon driver for Aquacomputer D5 Next watercooling pump
+ * hwmon driver for Aquacomputer devices (D5 Next, Farbwerk 360)
*
- * The D5 Next sends HID reports (with ID 0x01) every second to report sensor values
- * (coolant temperature, pump and fan speed, voltage, current and power). It responds to
- * Get_Report requests, but returns a dummy value of no use.
+ * Aquacomputer devices send HID reports (with ID 0x01) every second to report
+ * sensor values.
*
* Copyright 2021 Aleksa Savic <savicaleksa83@gmail.com>
*/
-#include <asm/unaligned.h>
#include <linux/debugfs.h>
#include <linux/hid.h>
#include <linux/hwmon.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/seq_file.h>
+#include <asm/unaligned.h>
-#define DRIVER_NAME "aquacomputer-d5next"
+#define USB_VENDOR_ID_AQUACOMPUTER 0x0c70
+#define USB_PRODUCT_ID_D5NEXT 0xf00e
+#define USB_PRODUCT_ID_FARBWERK360 0xf010
-#define D5NEXT_STATUS_REPORT_ID 0x01
-#define D5NEXT_STATUS_UPDATE_INTERVAL (2 * HZ) /* In seconds */
+enum kinds { d5next, farbwerk360 };
-/* Register offsets for the D5 Next pump */
+static const char *const aqc_device_names[] = {
+ [d5next] = "d5next",
+ [farbwerk360] = "farbwerk360"
+};
-#define D5NEXT_SERIAL_FIRST_PART 3
-#define D5NEXT_SERIAL_SECOND_PART 5
-#define D5NEXT_FIRMWARE_VERSION 13
+#define DRIVER_NAME "aquacomputer_d5next"
+
+#define STATUS_REPORT_ID 0x01
+#define STATUS_UPDATE_INTERVAL (2 * HZ) /* In seconds */
+#define SERIAL_FIRST_PART 3
+#define SERIAL_SECOND_PART 5
+#define FIRMWARE_VERSION 13
+
+/* Register offsets for the D5 Next pump */
#define D5NEXT_POWER_CYCLES 24
#define D5NEXT_COOLANT_TEMP 87
@@ -44,76 +53,118 @@
#define D5NEXT_PUMP_CURRENT 112
#define D5NEXT_FAN_CURRENT 99
-/* Labels for provided values */
+/* Register offsets for the Farbwerk 360 RGB controller */
+#define FARBWERK360_NUM_SENSORS 4
+#define FARBWERK360_SENSOR_START 0x32
+#define FARBWERK360_SENSOR_SIZE 0x02
+#define FARBWERK360_SENSOR_DISCONNECTED 0x7FFF
-#define L_COOLANT_TEMP "Coolant temp"
+/* Labels for D5 Next */
+#define L_D5NEXT_COOLANT_TEMP "Coolant temp"
-#define L_PUMP_SPEED "Pump speed"
-#define L_FAN_SPEED "Fan speed"
-
-#define L_PUMP_POWER "Pump power"
-#define L_FAN_POWER "Fan power"
-
-#define L_PUMP_VOLTAGE "Pump voltage"
-#define L_FAN_VOLTAGE "Fan voltage"
-#define L_5V_VOLTAGE "+5V voltage"
-
-#define L_PUMP_CURRENT "Pump current"
-#define L_FAN_CURRENT "Fan current"
+static const char *const label_d5next_speeds[] = {
+ "Pump speed",
+ "Fan speed"
+};
-static const char *const label_speeds[] = {
- L_PUMP_SPEED,
- L_FAN_SPEED,
+static const char *const label_d5next_power[] = {
+ "Pump power",
+ "Fan power"
};
-static const char *const label_power[] = {
- L_PUMP_POWER,
- L_FAN_POWER,
+static const char *const label_d5next_voltages[] = {
+ "Pump voltage",
+ "Fan voltage",
+ "+5V voltage"
};
-static const char *const label_voltages[] = {
- L_PUMP_VOLTAGE,
- L_FAN_VOLTAGE,
- L_5V_VOLTAGE,
+static const char *const label_d5next_current[] = {
+ "Pump current",
+ "Fan current"
};
-static const char *const label_current[] = {
- L_PUMP_CURRENT,
- L_FAN_CURRENT,
+/* Labels for Farbwerk 360 temperature sensors */
+static const char *const label_temp_sensors[] = {
+ "Sensor 1",
+ "Sensor 2",
+ "Sensor 3",
+ "Sensor 4"
};
-struct d5next_data {
+struct aqc_data {
struct hid_device *hdev;
struct device *hwmon_dev;
struct dentry *debugfs;
- s32 temp_input;
+ enum kinds kind;
+ const char *name;
+
+ /* General info, same across all devices */
+ u32 serial_number[2];
+ u16 firmware_version;
+
+ /* D5 Next specific - how many times the device was powered on */
+ u32 power_cycles;
+
+ /* Sensor values */
+ s32 temp_input[4];
u16 speed_input[2];
u32 power_input[2];
u16 voltage_input[3];
u16 current_input[2];
- u32 serial_number[2];
- u16 firmware_version;
- u32 power_cycles; /* How many times the device was powered on */
+
unsigned long updated;
};
-static umode_t d5next_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
- int channel)
+static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
+ int channel)
{
- return 0444;
+ const struct aqc_data *priv = data;
+
+ switch (type) {
+ case hwmon_temp:
+ switch (priv->kind) {
+ case d5next:
+ if (channel == 0)
+ return 0444;
+ break;
+ case farbwerk360:
+ return 0444;
+ default:
+ break;
+ }
+ break;
+ case hwmon_fan:
+ case hwmon_power:
+ case hwmon_in:
+ case hwmon_curr:
+ switch (priv->kind) {
+ case d5next:
+ return 0444;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
}
-static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
- long *val)
+static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long *val)
{
- struct d5next_data *priv = dev_get_drvdata(dev);
+ struct aqc_data *priv = dev_get_drvdata(dev);
- if (time_after(jiffies, priv->updated + D5NEXT_STATUS_UPDATE_INTERVAL))
+ if (time_after(jiffies, priv->updated + STATUS_UPDATE_INTERVAL))
return -ENODATA;
switch (type) {
case hwmon_temp:
- *val = priv->temp_input;
+ if (priv->temp_input[channel] == -ENODATA)
+ return -ENODATA;
+
+ *val = priv->temp_input[channel];
break;
case hwmon_fan:
*val = priv->speed_input[channel];
@@ -134,24 +185,59 @@ static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 att
return 0;
}
-static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
- int channel, const char **str)
+static int aqc_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
{
+ struct aqc_data *priv = dev_get_drvdata(dev);
+
switch (type) {
case hwmon_temp:
- *str = L_COOLANT_TEMP;
+ switch (priv->kind) {
+ case d5next:
+ *str = L_D5NEXT_COOLANT_TEMP;
+ break;
+ case farbwerk360:
+ *str = label_temp_sensors[channel];
+ break;
+ default:
+ break;
+ }
break;
case hwmon_fan:
- *str = label_speeds[channel];
+ switch (priv->kind) {
+ case d5next:
+ *str = label_d5next_speeds[channel];
+ break;
+ default:
+ break;
+ }
break;
case hwmon_power:
- *str = label_power[channel];
+ switch (priv->kind) {
+ case d5next:
+ *str = label_d5next_power[channel];
+ break;
+ default:
+ break;
+ }
break;
case hwmon_in:
- *str = label_voltages[channel];
+ switch (priv->kind) {
+ case d5next:
+ *str = label_d5next_voltages[channel];
+ break;
+ default:
+ break;
+ }
break;
case hwmon_curr:
- *str = label_current[channel];
+ switch (priv->kind) {
+ case d5next:
+ *str = label_d5next_current[channel];
+ break;
+ default:
+ break;
+ }
break;
default:
return -EOPNOTSUPP;
@@ -160,60 +246,89 @@ static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type,
return 0;
}
-static const struct hwmon_ops d5next_hwmon_ops = {
- .is_visible = d5next_is_visible,
- .read = d5next_read,
- .read_string = d5next_read_string,
+static const struct hwmon_ops aqc_hwmon_ops = {
+ .is_visible = aqc_is_visible,
+ .read = aqc_read,
+ .read_string = aqc_read_string,
};
-static const struct hwmon_channel_info *d5next_info[] = {
- HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL),
- HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL),
- HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL, HWMON_P_INPUT | HWMON_P_LABEL),
- HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL,
+static const struct hwmon_channel_info *aqc_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(fan,
+ HWMON_F_INPUT | HWMON_F_LABEL,
+ HWMON_F_INPUT | HWMON_F_LABEL),
+ HWMON_CHANNEL_INFO(power,
+ HWMON_P_INPUT | HWMON_P_LABEL,
+ HWMON_P_INPUT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_LABEL,
HWMON_I_INPUT | HWMON_I_LABEL),
- HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_LABEL, HWMON_C_INPUT | HWMON_C_LABEL),
+ HWMON_CHANNEL_INFO(curr,
+ HWMON_C_INPUT | HWMON_C_LABEL,
+ HWMON_C_INPUT | HWMON_C_LABEL),
NULL
};
-static const struct hwmon_chip_info d5next_chip_info = {
- .ops = &d5next_hwmon_ops,
- .info = d5next_info,
+static const struct hwmon_chip_info aqc_chip_info = {
+ .ops = &aqc_hwmon_ops,
+ .info = aqc_info,
};
-static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size)
+static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data,
+ int size)
{
- struct d5next_data *priv;
+ int i, sensor_value;
+ struct aqc_data *priv;
- if (report->id != D5NEXT_STATUS_REPORT_ID)
+ if (report->id != STATUS_REPORT_ID)
return 0;
priv = hid_get_drvdata(hdev);
/* Info provided with every report */
-
- priv->serial_number[0] = get_unaligned_be16(data + D5NEXT_SERIAL_FIRST_PART);
- priv->serial_number[1] = get_unaligned_be16(data + D5NEXT_SERIAL_SECOND_PART);
-
- priv->firmware_version = get_unaligned_be16(data + D5NEXT_FIRMWARE_VERSION);
- priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES);
+ priv->serial_number[0] = get_unaligned_be16(data + SERIAL_FIRST_PART);
+ priv->serial_number[1] = get_unaligned_be16(data + SERIAL_SECOND_PART);
+ priv->firmware_version = get_unaligned_be16(data + FIRMWARE_VERSION);
/* Sensor readings */
+ switch (priv->kind) {
+ case d5next:
+ priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES);
- priv->temp_input = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10;
+ priv->temp_input[0] = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10;
- priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED);
- priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED);
+ priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED);
+ priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED);
- priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000;
- priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000;
+ priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000;
+ priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000;
- priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10;
- priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10;
- priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10;
+ priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10;
+ priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10;
+ priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10;
- priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
- priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
+ priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
+ priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
+ break;
+ case farbwerk360:
+ /* Temperature sensor readings */
+ for (i = 0; i < FARBWERK360_NUM_SENSORS; i++) {
+ sensor_value = get_unaligned_be16(data + FARBWERK360_SENSOR_START +
+ i * FARBWERK360_SENSOR_SIZE);
+ if (sensor_value == FARBWERK360_SENSOR_DISCONNECTED)
+ priv->temp_input[i] = -ENODATA;
+ else
+ priv->temp_input[i] = sensor_value * 10;
+ }
+ break;
+ default:
+ break;
+ }
priv->updated = jiffies;
@@ -224,7 +339,7 @@ static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report,
static int serial_number_show(struct seq_file *seqf, void *unused)
{
- struct d5next_data *priv = seqf->private;
+ struct aqc_data *priv = seqf->private;
seq_printf(seqf, "%05u-%05u\n", priv->serial_number[0], priv->serial_number[1]);
@@ -234,7 +349,7 @@ DEFINE_SHOW_ATTRIBUTE(serial_number);
static int firmware_version_show(struct seq_file *seqf, void *unused)
{
- struct d5next_data *priv = seqf->private;
+ struct aqc_data *priv = seqf->private;
seq_printf(seqf, "%u\n", priv->firmware_version);
@@ -244,7 +359,7 @@ DEFINE_SHOW_ATTRIBUTE(firmware_version);
static int power_cycles_show(struct seq_file *seqf, void *unused)
{
- struct d5next_data *priv = seqf->private;
+ struct aqc_data *priv = seqf->private;
seq_printf(seqf, "%u\n", priv->power_cycles);
@@ -252,29 +367,32 @@ static int power_cycles_show(struct seq_file *seqf, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(power_cycles);
-static void d5next_debugfs_init(struct d5next_data *priv)
+static void aqc_debugfs_init(struct aqc_data *priv)
{
- char name[32];
+ char name[64];
- scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev));
+ scnprintf(name, sizeof(name), "%s_%s-%s", "aquacomputer", priv->name,
+ dev_name(&priv->hdev->dev));
priv->debugfs = debugfs_create_dir(name, NULL);
debugfs_create_file("serial_number", 0444, priv->debugfs, priv, &serial_number_fops);
debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops);
- debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
+
+ if (priv->kind == d5next)
+ debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
}
#else
-static void d5next_debugfs_init(struct d5next_data *priv)
+static void aqc_debugfs_init(struct aqc_data *priv)
{
}
#endif
-static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
+static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
- struct d5next_data *priv;
+ struct aqc_data *priv;
int ret;
priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -284,7 +402,7 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
priv->hdev = hdev;
hid_set_drvdata(hdev, priv);
- priv->updated = jiffies - D5NEXT_STATUS_UPDATE_INTERVAL;
+ priv->updated = jiffies - STATUS_UPDATE_INTERVAL;
ret = hid_parse(hdev);
if (ret)
@@ -298,15 +416,28 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
if (ret)
goto fail_and_stop;
- priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "d5next", priv,
- &d5next_chip_info, NULL);
+ switch (hdev->product) {
+ case USB_PRODUCT_ID_D5NEXT:
+ priv->kind = d5next;
+ break;
+ case USB_PRODUCT_ID_FARBWERK360:
+ priv->kind = farbwerk360;
+ break;
+ default:
+ break;
+ }
+
+ priv->name = aqc_device_names[priv->kind];
+
+ priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, priv->name, priv,
+ &aqc_chip_info, NULL);
if (IS_ERR(priv->hwmon_dev)) {
ret = PTR_ERR(priv->hwmon_dev);
goto fail_and_close;
}
- d5next_debugfs_init(priv);
+ aqc_debugfs_init(priv);
return 0;
@@ -317,9 +448,9 @@ fail_and_stop:
return ret;
}
-static void d5next_remove(struct hid_device *hdev)
+static void aqc_remove(struct hid_device *hdev)
{
- struct d5next_data *priv = hid_get_drvdata(hdev);
+ struct aqc_data *priv = hid_get_drvdata(hdev);
debugfs_remove_recursive(priv->debugfs);
hwmon_device_unregister(priv->hwmon_dev);
@@ -328,36 +459,36 @@ static void d5next_remove(struct hid_device *hdev)
hid_hw_stop(hdev);
}
-static const struct hid_device_id d5next_table[] = {
- { HID_USB_DEVICE(0x0c70, 0xf00e) }, /* Aquacomputer D5 Next */
- {},
+static const struct hid_device_id aqc_table[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_D5NEXT) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_FARBWERK360) },
+ { }
};
-MODULE_DEVICE_TABLE(hid, d5next_table);
+MODULE_DEVICE_TABLE(hid, aqc_table);
-static struct hid_driver d5next_driver = {
+static struct hid_driver aqc_driver = {
.name = DRIVER_NAME,
- .id_table = d5next_table,
- .probe = d5next_probe,
- .remove = d5next_remove,
- .raw_event = d5next_raw_event,
+ .id_table = aqc_table,
+ .probe = aqc_probe,
+ .remove = aqc_remove,
+ .raw_event = aqc_raw_event,
};
-static int __init d5next_init(void)
+static int __init aqc_init(void)
{
- return hid_register_driver(&d5next_driver);
+ return hid_register_driver(&aqc_driver);
}
-static void __exit d5next_exit(void)
+static void __exit aqc_exit(void)
{
- hid_unregister_driver(&d5next_driver);
+ hid_unregister_driver(&aqc_driver);
}
/* Request to initialize after the HID bus to ensure it's not being loaded before */
-
-late_initcall(d5next_init);
-module_exit(d5next_exit);
+late_initcall(aqc_init);
+module_exit(aqc_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>");
-MODULE_DESCRIPTION("Hwmon driver for Aquacomputer D5 Next pump");
+MODULE_DESCRIPTION("Hwmon driver for Aquacomputer devices");
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
new file mode 100644
index 000000000000..b5cf0136360c
--- /dev/null
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * HWMON driver for ASUS motherboards that publish some sensor values
+ * via the embedded controller registers.
+ *
+ * Copyright (C) 2021 Eugene Shalygin <eugene.shalygin@gmail.com>
+
+ * EC provides:
+ * - Chipset temperature
+ * - CPU temperature
+ * - Motherboard temperature
+ * - T_Sensor temperature
+ * - VRM temperature
+ * - Water In temperature
+ * - Water Out temperature
+ * - CPU Optional fan RPM
+ * - Chipset fan RPM
+ * - VRM Heat Sink fan RPM
+ * - Water Flow fan RPM
+ * - CPU current
+ * - CPU core voltage
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/dev_printk.h>
+#include <linux/dmi.h>
+#include <linux/hwmon.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sort.h>
+#include <linux/units.h>
+
+#include <asm/unaligned.h>
+
+static char *mutex_path_override;
+
+/* Writing to this EC register switches EC bank */
+#define ASUS_EC_BANK_REGISTER 0xff
+#define SENSOR_LABEL_LEN 16
+
+/*
+ * Arbitrary set max. allowed bank number. Required for sorting banks and
+ * currently is overkill with just 2 banks used at max, but for the sake
+ * of alignment let's set it to a higher value.
+ */
+#define ASUS_EC_MAX_BANK 3
+
+#define ACPI_LOCK_DELAY_MS 500
+
+/* ACPI mutex for locking access to the EC for the firmware */
+#define ASUS_HW_ACCESS_MUTEX_ASMX "\\AMW0.ASMX"
+
+/* There are two variants of the vendor spelling */
+#define VENDOR_ASUS_UPPER_CASE "ASUSTeK COMPUTER INC."
+
+typedef union {
+ u32 value;
+ struct {
+ u8 index;
+ u8 bank;
+ u8 size;
+ u8 dummy;
+ } components;
+} sensor_address;
+
+#define MAKE_SENSOR_ADDRESS(size, bank, index) { \
+ .value = (size << 16) + (bank << 8) + index \
+ }
+
+static u32 hwmon_attributes[hwmon_max] = {
+ [hwmon_chip] = HWMON_C_REGISTER_TZ,
+ [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
+ [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
+ [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
+ [hwmon_fan] = HWMON_F_INPUT | HWMON_F_LABEL,
+};
+
+struct ec_sensor_info {
+ char label[SENSOR_LABEL_LEN];
+ enum hwmon_sensor_types type;
+ sensor_address addr;
+};
+
+#define EC_SENSOR(sensor_label, sensor_type, size, bank, index) { \
+ .label = sensor_label, .type = sensor_type, \
+ .addr = MAKE_SENSOR_ADDRESS(size, bank, index), \
+ }
+
+enum ec_sensors {
+ /* chipset temperature [℃] */
+ ec_sensor_temp_chipset,
+ /* CPU temperature [℃] */
+ ec_sensor_temp_cpu,
+ /* motherboard temperature [℃] */
+ ec_sensor_temp_mb,
+ /* "T_Sensor" temperature sensor reading [℃] */
+ ec_sensor_temp_t_sensor,
+ /* VRM temperature [℃] */
+ ec_sensor_temp_vrm,
+ /* CPU Core voltage [mV] */
+ ec_sensor_in_cpu_core,
+ /* CPU_Opt fan [RPM] */
+ ec_sensor_fan_cpu_opt,
+ /* VRM heat sink fan [RPM] */
+ ec_sensor_fan_vrm_hs,
+ /* Chipset fan [RPM] */
+ ec_sensor_fan_chipset,
+ /* Water flow sensor reading [RPM] */
+ ec_sensor_fan_water_flow,
+ /* CPU current [A] */
+ ec_sensor_curr_cpu,
+ /* "Water_In" temperature sensor reading [℃] */
+ ec_sensor_temp_water_in,
+ /* "Water_Out" temperature sensor reading [℃] */
+ ec_sensor_temp_water_out,
+};
+
+#define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset)
+#define SENSOR_TEMP_CPU BIT(ec_sensor_temp_cpu)
+#define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb)
+#define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor)
+#define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm)
+#define SENSOR_IN_CPU_CORE BIT(ec_sensor_in_cpu_core)
+#define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt)
+#define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs)
+#define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset)
+#define SENSOR_FAN_WATER_FLOW BIT(ec_sensor_fan_water_flow)
+#define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu)
+#define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in)
+#define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out)
+
+/* All the known sensors for ASUS EC controllers */
+static const struct ec_sensor_info known_ec_sensors[] = {
+ [ec_sensor_temp_chipset] =
+ EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a),
+ [ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b),
+ [ec_sensor_temp_mb] =
+ EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c),
+ [ec_sensor_temp_t_sensor] =
+ EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
+ [ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
+ [ec_sensor_in_cpu_core] =
+ EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2),
+ [ec_sensor_fan_cpu_opt] =
+ EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0),
+ [ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2),
+ [ec_sensor_fan_chipset] =
+ EC_SENSOR("Chipset", hwmon_fan, 2, 0x00, 0xb4),
+ [ec_sensor_fan_water_flow] =
+ EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc),
+ [ec_sensor_curr_cpu] = EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4),
+ [ec_sensor_temp_water_in] =
+ EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00),
+ [ec_sensor_temp_water_out] =
+ EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01),
+};
+
+/* Shortcuts for common combinations */
+#define SENSOR_SET_TEMP_CHIPSET_CPU_MB \
+ (SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB)
+#define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT)
+
+#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) { \
+ .matches = { \
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor), \
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \
+ }, \
+ .driver_data = (void *)(sensors), \
+}
+
+static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+ SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+ SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+ "ROG CROSSHAIR VIII DARK HERO",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+ SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
+ SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+ "ROG CROSSHAIR VIII FORMULA",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+ SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+ SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+ SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+ "ROG CROSSHAIR VIII HERO (WI-FI)",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+ SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+ SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+ "ROG CROSSHAIR VIII IMPACT",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
+ SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+ SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+ SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS |
+ SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+ SENSOR_TEMP_T_SENSOR |
+ SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
+ SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING",
+ SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+ SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
+ DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING",
+ SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
+ SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+ {}
+};
+
+struct ec_sensor {
+ unsigned int info_index;
+ s32 cached_value;
+};
+
+struct ec_sensors_data {
+ unsigned long board_sensors;
+ struct ec_sensor *sensors;
+ /* EC registers to read from */
+ u16 *registers;
+ u8 *read_buffer;
+ /* sorted list of unique register banks */
+ u8 banks[ASUS_EC_MAX_BANK + 1];
+ /* in jiffies */
+ unsigned long last_updated;
+ acpi_handle aml_mutex;
+ /* number of board EC sensors */
+ u8 nr_sensors;
+ /*
+ * number of EC registers to read
+ * (sensor might span more than 1 register)
+ */
+ u8 nr_registers;
+ /* number of unique register banks */
+ u8 nr_banks;
+};
+
+static u8 register_bank(u16 reg)
+{
+ return reg >> 8;
+}
+
+static u8 register_index(u16 reg)
+{
+ return reg & 0x00ff;
+}
+
+static bool is_sensor_data_signed(const struct ec_sensor_info *si)
+{
+ /*
+ * guessed from WMI functions in DSDT code for boards
+ * of the X470 generation
+ */
+ return si->type == hwmon_temp;
+}
+
+static const struct ec_sensor_info *
+get_sensor_info(const struct ec_sensors_data *state, int index)
+{
+ return &known_ec_sensors[state->sensors[index].info_index];
+}
+
+static int find_ec_sensor_index(const struct ec_sensors_data *ec,
+ enum hwmon_sensor_types type, int channel)
+{
+ unsigned int i;
+
+ for (i = 0; i < ec->nr_sensors; i++) {
+ if (get_sensor_info(ec, i)->type == type) {
+ if (channel == 0)
+ return i;
+ channel--;
+ }
+ }
+ return -ENOENT;
+}
+
+static int __init bank_compare(const void *a, const void *b)
+{
+ return *((const s8 *)a) - *((const s8 *)b);
+}
+
+static int __init board_sensors_count(unsigned long sensors)
+{
+ return hweight_long(sensors);
+}
+
+static void __init setup_sensor_data(struct ec_sensors_data *ec)
+{
+ struct ec_sensor *s = ec->sensors;
+ bool bank_found;
+ int i, j;
+ u8 bank;
+
+ ec->nr_banks = 0;
+ ec->nr_registers = 0;
+
+ for_each_set_bit(i, &ec->board_sensors,
+ BITS_PER_TYPE(ec->board_sensors)) {
+ s->info_index = i;
+ s->cached_value = 0;
+ ec->nr_registers +=
+ known_ec_sensors[s->info_index].addr.components.size;
+ bank_found = false;
+ bank = known_ec_sensors[s->info_index].addr.components.bank;
+ for (j = 0; j < ec->nr_banks; j++) {
+ if (ec->banks[j] == bank) {
+ bank_found = true;
+ break;
+ }
+ }
+ if (!bank_found) {
+ ec->banks[ec->nr_banks++] = bank;
+ }
+ s++;
+ }
+ sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL);
+}
+
+static void __init fill_ec_registers(struct ec_sensors_data *ec)
+{
+ const struct ec_sensor_info *si;
+ unsigned int i, j, register_idx = 0;
+
+ for (i = 0; i < ec->nr_sensors; ++i) {
+ si = get_sensor_info(ec, i);
+ for (j = 0; j < si->addr.components.size; ++j, ++register_idx) {
+ ec->registers[register_idx] =
+ (si->addr.components.bank << 8) +
+ si->addr.components.index + j;
+ }
+ }
+}
+
+static acpi_handle __init asus_hw_access_mutex(struct device *dev)
+{
+ const char *mutex_path;
+ acpi_handle res;
+ int status;
+
+ mutex_path = mutex_path_override ?
+ mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX;
+
+ status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res);
+ if (ACPI_FAILURE(status)) {
+ dev_err(dev,
+ "Could not get hardware access guard mutex '%s': error %d",
+ mutex_path, status);
+ return NULL;
+ }
+ return res;
+}
+
+static int asus_ec_bank_switch(u8 bank, u8 *old)
+{
+ int status = 0;
+
+ if (old) {
+ status = ec_read(ASUS_EC_BANK_REGISTER, old);
+ }
+ if (status || (old && (*old == bank)))
+ return status;
+ return ec_write(ASUS_EC_BANK_REGISTER, bank);
+}
+
+static int asus_ec_block_read(const struct device *dev,
+ struct ec_sensors_data *ec)
+{
+ int ireg, ibank, status;
+ u8 bank, reg_bank, prev_bank;
+
+ bank = 0;
+ status = asus_ec_bank_switch(bank, &prev_bank);
+ if (status) {
+ dev_warn(dev, "EC bank switch failed");
+ return status;
+ }
+
+ if (prev_bank) {
+ /* oops... somebody else is working with the EC too */
+ dev_warn(dev,
+ "Concurrent access to the ACPI EC detected.\nRace condition possible.");
+ }
+
+ /* read registers minimizing bank switches. */
+ for (ibank = 0; ibank < ec->nr_banks; ibank++) {
+ if (bank != ec->banks[ibank]) {
+ bank = ec->banks[ibank];
+ if (asus_ec_bank_switch(bank, NULL)) {
+ dev_warn(dev, "EC bank switch to %d failed",
+ bank);
+ break;
+ }
+ }
+ for (ireg = 0; ireg < ec->nr_registers; ireg++) {
+ reg_bank = register_bank(ec->registers[ireg]);
+ if (reg_bank < bank) {
+ continue;
+ }
+ ec_read(register_index(ec->registers[ireg]),
+ ec->read_buffer + ireg);
+ }
+ }
+
+ status = asus_ec_bank_switch(prev_bank, NULL);
+ return status;
+}
+
+static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
+{
+ if (is_sensor_data_signed(si)) {
+ switch (si->addr.components.size) {
+ case 1:
+ return (s8)*data;
+ case 2:
+ return (s16)get_unaligned_be16(data);
+ case 4:
+ return (s32)get_unaligned_be32(data);
+ default:
+ return 0;
+ }
+ } else {
+ switch (si->addr.components.size) {
+ case 1:
+ return *data;
+ case 2:
+ return get_unaligned_be16(data);
+ case 4:
+ return get_unaligned_be32(data);
+ default:
+ return 0;
+ }
+ }
+}
+
+static void update_sensor_values(struct ec_sensors_data *ec, u8 *data)
+{
+ const struct ec_sensor_info *si;
+ struct ec_sensor *s;
+
+ for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) {
+ si = &known_ec_sensors[s->info_index];
+ s->cached_value = get_sensor_value(si, data);
+ data += si->addr.components.size;
+ }
+}
+
+static int update_ec_sensors(const struct device *dev,
+ struct ec_sensors_data *ec)
+{
+ int status;
+
+ /*
+ * ASUS DSDT does not specify that access to the EC has to be guarded,
+ * but firmware does access it via ACPI
+ */
+ if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL,
+ ACPI_LOCK_DELAY_MS))) {
+ dev_err(dev, "Failed to acquire AML mutex");
+ status = -EBUSY;
+ goto cleanup;
+ }
+
+ status = asus_ec_block_read(dev, ec);
+
+ if (!status) {
+ update_sensor_values(ec, ec->read_buffer);
+ }
+ if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) {
+ dev_err(dev, "Failed to release AML mutex");
+ }
+cleanup:
+ return status;
+}
+
+static long scale_sensor_value(s32 value, int data_type)
+{
+ switch (data_type) {
+ case hwmon_curr:
+ case hwmon_temp:
+ return value * MILLI;
+ default:
+ return value;
+ }
+}
+
+static int get_cached_value_or_update(const struct device *dev,
+ int sensor_index,
+ struct ec_sensors_data *state, s32 *value)
+{
+ if (time_after(jiffies, state->last_updated + HZ)) {
+ if (update_ec_sensors(dev, state)) {
+ dev_err(dev, "update_ec_sensors() failure\n");
+ return -EIO;
+ }
+
+ state->last_updated = jiffies;
+ }
+
+ *value = state->sensors[sensor_index].cached_value;
+ return 0;
+}
+
+/*
+ * Now follow the functions that implement the hwmon interface
+ */
+
+static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ int ret;
+ s32 value = 0;
+
+ struct ec_sensors_data *state = dev_get_drvdata(dev);
+ int sidx = find_ec_sensor_index(state, type, channel);
+
+ if (sidx < 0) {
+ return sidx;
+ }
+
+ ret = get_cached_value_or_update(dev, sidx, state, &value);
+ if (!ret) {
+ *val = scale_sensor_value(value,
+ get_sensor_info(state, sidx)->type);
+ }
+
+ return ret;
+}
+
+static int asus_ec_hwmon_read_string(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ struct ec_sensors_data *state = dev_get_drvdata(dev);
+ int sensor_index = find_ec_sensor_index(state, type, channel);
+ *str = get_sensor_info(state, sensor_index)->label;
+
+ return 0;
+}
+
+static umode_t asus_ec_hwmon_is_visible(const void *drvdata,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel)
+{
+ const struct ec_sensors_data *state = drvdata;
+
+ return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0;
+}
+
+static int __init
+asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan,
+ struct device *dev, int num,
+ enum hwmon_sensor_types type, u32 config)
+{
+ int i;
+ u32 *cfg = devm_kcalloc(dev, num + 1, sizeof(*cfg), GFP_KERNEL);
+
+ if (!cfg)
+ return -ENOMEM;
+
+ asus_ec_hwmon_chan->type = type;
+ asus_ec_hwmon_chan->config = cfg;
+ for (i = 0; i < num; i++, cfg++)
+ *cfg = config;
+
+ return 0;
+}
+
+static const struct hwmon_ops asus_ec_hwmon_ops = {
+ .is_visible = asus_ec_hwmon_is_visible,
+ .read = asus_ec_hwmon_read,
+ .read_string = asus_ec_hwmon_read_string,
+};
+
+static struct hwmon_chip_info asus_ec_chip_info = {
+ .ops = &asus_ec_hwmon_ops,
+};
+
+static unsigned long __init get_board_sensors(void)
+{
+ const struct dmi_system_id *dmi_entry =
+ dmi_first_match(asus_ec_dmi_table);
+
+ return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0;
+}
+
+static int __init asus_ec_probe(struct platform_device *pdev)
+{
+ const struct hwmon_channel_info **ptr_asus_ec_ci;
+ int nr_count[hwmon_max] = { 0 }, nr_types = 0;
+ struct hwmon_channel_info *asus_ec_hwmon_chan;
+ const struct hwmon_chip_info *chip_info;
+ struct device *dev = &pdev->dev;
+ struct ec_sensors_data *ec_data;
+ const struct ec_sensor_info *si;
+ enum hwmon_sensor_types type;
+ unsigned long board_sensors;
+ struct device *hwdev;
+ unsigned int i;
+
+ board_sensors = get_board_sensors();
+ if (!board_sensors)
+ return -ENODEV;
+
+ ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data),
+ GFP_KERNEL);
+ if (!ec_data)
+ return -ENOMEM;
+
+ dev_set_drvdata(dev, ec_data);
+ ec_data->board_sensors = board_sensors;
+ ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors);
+ ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
+ sizeof(struct ec_sensor), GFP_KERNEL);
+
+ setup_sensor_data(ec_data);
+ ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers,
+ sizeof(u16), GFP_KERNEL);
+ ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers,
+ sizeof(u8), GFP_KERNEL);
+
+ if (!ec_data->registers || !ec_data->read_buffer)
+ return -ENOMEM;
+
+ fill_ec_registers(ec_data);
+
+ ec_data->aml_mutex = asus_hw_access_mutex(dev);
+
+ for (i = 0; i < ec_data->nr_sensors; ++i) {
+ si = get_sensor_info(ec_data, i);
+ if (!nr_count[si->type])
+ ++nr_types;
+ ++nr_count[si->type];
+ }
+
+ if (nr_count[hwmon_temp])
+ nr_count[hwmon_chip]++, nr_types++;
+
+ asus_ec_hwmon_chan = devm_kcalloc(
+ dev, nr_types, sizeof(*asus_ec_hwmon_chan), GFP_KERNEL);
+ if (!asus_ec_hwmon_chan)
+ return -ENOMEM;
+
+ ptr_asus_ec_ci = devm_kcalloc(dev, nr_types + 1,
+ sizeof(*ptr_asus_ec_ci), GFP_KERNEL);
+ if (!ptr_asus_ec_ci)
+ return -ENOMEM;
+
+ asus_ec_chip_info.info = ptr_asus_ec_ci;
+ chip_info = &asus_ec_chip_info;
+
+ for (type = 0; type < hwmon_max; ++type) {
+ if (!nr_count[type])
+ continue;
+
+ asus_ec_hwmon_add_chan_info(asus_ec_hwmon_chan, dev,
+ nr_count[type], type,
+ hwmon_attributes[type]);
+ *ptr_asus_ec_ci++ = asus_ec_hwmon_chan++;
+ }
+
+ dev_info(dev, "board has %d EC sensors that span %d registers",
+ ec_data->nr_sensors, ec_data->nr_registers);
+
+ hwdev = devm_hwmon_device_register_with_info(dev, "asusec",
+ ec_data, chip_info, NULL);
+
+ return PTR_ERR_OR_ZERO(hwdev);
+}
+
+
+static const struct acpi_device_id acpi_ec_ids[] = {
+ /* Embedded Controller Device */
+ { "PNP0C09", 0 },
+ {}
+};
+
+static struct platform_driver asus_ec_sensors_platform_driver = {
+ .driver = {
+ .name = "asus-ec-sensors",
+ .acpi_match_table = acpi_ec_ids,
+ },
+};
+
+MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table);
+module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe);
+
+module_param_named(mutex_path, mutex_path_override, charp, 0);
+MODULE_PARM_DESC(mutex_path,
+ "Override ACPI mutex path used to guard access to hardware");
+
+MODULE_AUTHOR("Eugene Shalygin <eugene.shalygin@gmail.com>");
+MODULE_DESCRIPTION(
+ "HWMON driver for sensors accessible via ACPI EC in ASUS motherboards");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/asus_wmi_ec_sensors.c b/drivers/hwmon/asus_wmi_ec_sensors.c
index 22a1459305a7..a3a2f014dec0 100644
--- a/drivers/hwmon/asus_wmi_ec_sensors.c
+++ b/drivers/hwmon/asus_wmi_ec_sensors.c
@@ -112,7 +112,8 @@ struct asus_wmi_data {
/* boards with EC support */
static struct asus_wmi_data sensors_board_PW_X570_P = {
.known_board_sensors = {
- SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB, SENSOR_TEMP_VRM,
+ SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB,
+ SENSOR_TEMP_T_SENSOR, SENSOR_TEMP_VRM,
SENSOR_FAN_CHIPSET,
SENSOR_MAX
},
diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c
index c80eee874b6c..8fdcb62ae52d 100644
--- a/drivers/hwmon/asus_wmi_sensors.c
+++ b/drivers/hwmon/asus_wmi_sensors.c
@@ -77,6 +77,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = {
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-E GAMING"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING"),
+ DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING II"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-I GAMING"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X399-E GAMING"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X470-F GAMING"),
diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c
index d2092c17d993..96c4a5c45291 100644
--- a/drivers/hwmon/axi-fan-control.c
+++ b/drivers/hwmon/axi-fan-control.c
@@ -339,7 +339,8 @@ static irqreturn_t axi_fan_control_irq_handler(int irq, void *data)
ctl->update_tacho_params = true;
} else {
ctl->hw_pwm_req = false;
- sysfs_notify(&ctl->hdev->kobj, NULL, "pwm1");
+ hwmon_notify_event(ctl->hdev, hwmon_pwm,
+ hwmon_pwm_input, 0);
}
}
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 9949eeb79378..84cb1ede7bc0 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -21,6 +21,7 @@
#include <linux/errno.h>
#include <linux/hwmon.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
@@ -86,8 +87,8 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("i8k");
static bool force;
-module_param(force, bool, 0);
-MODULE_PARM_DESC(force, "Force loading without checking for supported models");
+module_param_unsafe(force, bool, 0);
+MODULE_PARM_DESC(force, "Force loading without checking for supported models and features");
static bool ignore_dmi;
module_param(ignore_dmi, bool, 0);
@@ -250,46 +251,52 @@ static int i8k_smm(struct smm_regs *regs)
/*
* Read the fan status.
*/
-static int i8k_get_fan_status(const struct dell_smm_data *data, int fan)
+static int i8k_get_fan_status(const struct dell_smm_data *data, u8 fan)
{
- struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, };
+ struct smm_regs regs = {
+ .eax = I8K_SMM_GET_FAN,
+ .ebx = fan,
+ };
if (data->disallow_fan_support)
return -EINVAL;
- regs.ebx = fan & 0xff;
return i8k_smm(&regs) ? : regs.eax & 0xff;
}
/*
* Read the fan speed in RPM.
*/
-static int i8k_get_fan_speed(const struct dell_smm_data *data, int fan)
+static int i8k_get_fan_speed(const struct dell_smm_data *data, u8 fan)
{
- struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, };
+ struct smm_regs regs = {
+ .eax = I8K_SMM_GET_SPEED,
+ .ebx = fan,
+ };
if (data->disallow_fan_support)
return -EINVAL;
- regs.ebx = fan & 0xff;
return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
}
/*
* Read the fan type.
*/
-static int _i8k_get_fan_type(const struct dell_smm_data *data, int fan)
+static int _i8k_get_fan_type(const struct dell_smm_data *data, u8 fan)
{
- struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
+ struct smm_regs regs = {
+ .eax = I8K_SMM_GET_FAN_TYPE,
+ .ebx = fan,
+ };
if (data->disallow_fan_support || data->disallow_fan_type_call)
return -EINVAL;
- regs.ebx = fan & 0xff;
return i8k_smm(&regs) ? : regs.eax & 0xff;
}
-static int i8k_get_fan_type(struct dell_smm_data *data, int fan)
+static int i8k_get_fan_type(struct dell_smm_data *data, u8 fan)
{
/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
if (data->fan_type[fan] == INT_MIN)
@@ -301,14 +308,16 @@ static int i8k_get_fan_type(struct dell_smm_data *data, int fan)
/*
* Read the fan nominal rpm for specific fan speed.
*/
-static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, int fan, int speed)
+static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, u8 fan, int speed)
{
- struct smm_regs regs = { .eax = I8K_SMM_GET_NOM_SPEED, };
+ struct smm_regs regs = {
+ .eax = I8K_SMM_GET_NOM_SPEED,
+ .ebx = fan | (speed << 8),
+ };
if (data->disallow_fan_support)
return -EINVAL;
- regs.ebx = (fan & 0xff) | (speed << 8);
return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
}
@@ -329,7 +338,7 @@ static int i8k_enable_fan_auto_mode(const struct dell_smm_data *data, bool enabl
/*
* Set the fan speed (off, low, high, ...).
*/
-static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
+static int i8k_set_fan(const struct dell_smm_data *data, u8 fan, int speed)
{
struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, };
@@ -337,33 +346,35 @@ static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
return -EINVAL;
speed = (speed < 0) ? 0 : ((speed > data->i8k_fan_max) ? data->i8k_fan_max : speed);
- regs.ebx = (fan & 0xff) | (speed << 8);
+ regs.ebx = fan | (speed << 8);
return i8k_smm(&regs);
}
-static int __init i8k_get_temp_type(int sensor)
+static int __init i8k_get_temp_type(u8 sensor)
{
- struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP_TYPE, };
+ struct smm_regs regs = {
+ .eax = I8K_SMM_GET_TEMP_TYPE,
+ .ebx = sensor,
+ };
- regs.ebx = sensor & 0xff;
return i8k_smm(&regs) ? : regs.eax & 0xff;
}
/*
* Read the cpu temperature.
*/
-static int _i8k_get_temp(int sensor)
+static int _i8k_get_temp(u8 sensor)
{
struct smm_regs regs = {
.eax = I8K_SMM_GET_TEMP,
- .ebx = sensor & 0xff,
+ .ebx = sensor,
};
return i8k_smm(&regs) ? : regs.eax & 0xff;
}
-static int i8k_get_temp(int sensor)
+static int i8k_get_temp(u8 sensor)
{
int temp = _i8k_get_temp(sensor);
@@ -496,6 +507,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&val, argp, sizeof(int)))
return -EFAULT;
+ if (val > U8_MAX || val < 0)
+ return -EINVAL;
+
val = i8k_get_fan_speed(data, val);
break;
@@ -503,6 +517,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&val, argp, sizeof(int)))
return -EFAULT;
+ if (val > U8_MAX || val < 0)
+ return -EINVAL;
+
val = i8k_get_fan_status(data, val);
break;
@@ -513,6 +530,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&val, argp, sizeof(int)))
return -EFAULT;
+ if (val > U8_MAX || val < 0)
+ return -EINVAL;
+
if (copy_from_user(&speed, argp + 1, sizeof(int)))
return -EFAULT;
@@ -631,6 +651,11 @@ static umode_t dell_smm_is_visible(const void *drvdata, enum hwmon_sensor_types
case hwmon_temp:
switch (attr) {
case hwmon_temp_input:
+ /* _i8k_get_temp() is fine since we do not care about the actual value */
+ if (data->temp_type[channel] >= 0 || _i8k_get_temp(channel) >= 0)
+ return 0444;
+
+ break;
case hwmon_temp_label:
if (data->temp_type[channel] >= 0)
return 0444;
@@ -920,7 +945,8 @@ static int __init dell_smm_init_hwmon(struct device *dev)
{
struct dell_smm_data *data = dev_get_drvdata(dev);
struct device *dell_smm_hwmon_dev;
- int i, state, err;
+ int state, err;
+ u8 i;
for (i = 0; i < DELL_SMM_NO_TEMP; i++) {
data->temp_type[i] = i8k_get_temp_type(i);
@@ -1131,6 +1157,13 @@ static const struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initconst
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
},
},
+ {
+ .ident = "Dell Inspiron 3505",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 3505"),
+ },
+ },
{ }
};
@@ -1236,7 +1269,8 @@ static int __init dell_smm_probe(struct platform_device *pdev)
{
struct dell_smm_data *data;
const struct dmi_system_id *id, *fan_control;
- int fan, ret;
+ int ret;
+ u8 fan;
data = devm_kzalloc(&pdev->dev, sizeof(struct dell_smm_data), GFP_KERNEL);
if (!data)
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 3501a3ead4ba..989e2c8496dd 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -18,6 +18,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/property.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/thermal.h>
@@ -30,6 +31,7 @@
struct hwmon_device {
const char *name;
+ const char *label;
struct device dev;
const struct hwmon_chip_info *chip;
struct list_head tzdata;
@@ -71,17 +73,29 @@ name_show(struct device *dev, struct device_attribute *attr, char *buf)
}
static DEVICE_ATTR_RO(name);
+static ssize_t
+label_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", to_hwmon_device(dev)->label);
+}
+static DEVICE_ATTR_RO(label);
+
static struct attribute *hwmon_dev_attrs[] = {
&dev_attr_name.attr,
+ &dev_attr_label.attr,
NULL
};
-static umode_t hwmon_dev_name_is_visible(struct kobject *kobj,
+static umode_t hwmon_dev_attr_is_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
struct device *dev = kobj_to_dev(kobj);
+ struct hwmon_device *hdev = to_hwmon_device(dev);
- if (to_hwmon_device(dev)->name == NULL)
+ if (attr == &dev_attr_name.attr && hdev->name == NULL)
+ return 0;
+
+ if (attr == &dev_attr_label.attr && hdev->label == NULL)
return 0;
return attr->mode;
@@ -89,7 +103,7 @@ static umode_t hwmon_dev_name_is_visible(struct kobject *kobj,
static const struct attribute_group hwmon_dev_attr_group = {
.attrs = hwmon_dev_attrs,
- .is_visible = hwmon_dev_name_is_visible,
+ .is_visible = hwmon_dev_attr_is_visible,
};
static const struct attribute_group *hwmon_dev_attr_groups[] = {
@@ -117,6 +131,7 @@ static void hwmon_dev_release(struct device *dev)
if (hwdev->group.attrs)
hwmon_free_attrs(hwdev->group.attrs);
kfree(hwdev->groups);
+ kfree(hwdev->label);
kfree(hwdev);
}
@@ -214,12 +229,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index)
tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata,
&hwmon_thermal_ops);
- /*
- * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
- * so ignore that error but forward any other error.
- */
- if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
- return PTR_ERR(tzd);
+ if (IS_ERR(tzd)) {
+ if (PTR_ERR(tzd) != -ENODEV)
+ return PTR_ERR(tzd);
+ dev_info(dev, "temp%d_input not attached to any thermal zone\n",
+ index + 1);
+ devm_kfree(dev, tdata);
+ return 0;
+ }
err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node);
if (err)
@@ -587,6 +604,7 @@ static const char * const hwmon_pwm_attr_templates[] = {
[hwmon_pwm_enable] = "pwm%d_enable",
[hwmon_pwm_mode] = "pwm%d_mode",
[hwmon_pwm_freq] = "pwm%d_freq",
+ [hwmon_pwm_auto_channels_temp] = "pwm%d_auto_channels_temp",
};
static const char * const hwmon_intrusion_attr_templates[] = {
@@ -623,7 +641,9 @@ static const int __templates_size[] = {
int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel)
{
+ char event[MAX_SYSFS_ATTR_NAME_LENGTH + 5];
char sattr[MAX_SYSFS_ATTR_NAME_LENGTH];
+ char *envp[] = { event, NULL };
const char * const *templates;
const char *template;
int base;
@@ -639,8 +659,9 @@ int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
base = hwmon_attr_base(type);
scnprintf(sattr, MAX_SYSFS_ATTR_NAME_LENGTH, template, base + channel);
+ scnprintf(event, sizeof(event), "NAME=%s", sattr);
sysfs_notify(&dev->kobj, NULL, sattr);
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
if (type == hwmon_temp)
hwmon_thermal_notify(dev, channel);
@@ -733,6 +754,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
const struct attribute_group **groups)
{
struct hwmon_device *hwdev;
+ const char *label;
struct device *hdev;
int i, err, id;
@@ -788,6 +810,18 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
hdev->groups = groups;
}
+ if (dev && device_property_present(dev, "label")) {
+ err = device_property_read_string(dev, "label", &label);
+ if (err < 0)
+ goto free_hwmon;
+
+ hwdev->label = kstrdup(label, GFP_KERNEL);
+ if (hwdev->label == NULL) {
+ err = -ENOMEM;
+ goto free_hwmon;
+ }
+ }
+
hwdev->name = name;
hdev->class = &hwmon_class;
hdev->parent = dev;
diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index d2a60de5b8de..c20a749fc7f2 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -34,6 +34,7 @@
#define LM70_CHIP_LM71 2 /* NS LM71 */
#define LM70_CHIP_LM74 3 /* NS LM74 */
#define LM70_CHIP_TMP122 4 /* TI TMP122/TMP124 */
+#define LM70_CHIP_TMP125 5 /* TI TMP125 */
struct lm70 {
struct spi_device *spi;
@@ -87,6 +88,12 @@ static ssize_t temp1_input_show(struct device *dev,
* LM71:
* 14 bits of 2's complement data, discard LSB 2 bits,
* resolution 0.0312 degrees celsius.
+ *
+ * TMP125:
+ * MSB/D15 is a leading zero. D14 is the sign-bit. This is
+ * followed by 9 temperature bits (D13..D5) in 2's complement
+ * data format with a resolution of 0.25 degrees celsius per unit.
+ * LSB 5 bits (D4..D0) share the same value as D5 and get discarded.
*/
switch (p_lm70->chip) {
case LM70_CHIP_LM70:
@@ -102,6 +109,10 @@ static ssize_t temp1_input_show(struct device *dev,
case LM70_CHIP_LM71:
val = ((int)raw / 4) * 3125 / 100;
break;
+
+ case LM70_CHIP_TMP125:
+ val = (sign_extend32(raw, 14) / 32) * 250;
+ break;
}
status = sprintf(buf, "%d\n", val); /* millidegrees Celsius */
@@ -136,6 +147,10 @@ static const struct of_device_id lm70_of_ids[] = {
.data = (void *) LM70_CHIP_TMP122,
},
{
+ .compatible = "ti,tmp125",
+ .data = (void *) LM70_CHIP_TMP125,
+ },
+ {
.compatible = "ti,lm71",
.data = (void *) LM70_CHIP_LM71,
},
@@ -184,6 +199,7 @@ static const struct spi_device_id lm70_ids[] = {
{ "lm70", LM70_CHIP_LM70 },
{ "tmp121", LM70_CHIP_TMP121 },
{ "tmp122", LM70_CHIP_TMP122 },
+ { "tmp125", LM70_CHIP_TMP125 },
{ "lm71", LM70_CHIP_LM71 },
{ "lm74", LM70_CHIP_LM74 },
{ },
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 74fd7aa373a3..12370dcefa6a 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -18,15 +18,15 @@
* http://www.national.com/pf/LM/LM82.html
*/
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
+#include <linux/bits.h>
+#include <linux/err.h>
#include <linux/i2c.h>
-#include <linux/hwmon-sysfs.h>
+#include <linux/init.h>
#include <linux/hwmon.h>
-#include <linux/err.h>
+#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
#include <linux/sysfs.h>
/*
@@ -66,35 +66,35 @@ enum chips { lm83, lm82 };
#define LM83_REG_R_TCRIT 0x42
#define LM83_REG_W_TCRIT 0x5A
-/*
- * Conversions and various macros
- * The LM83 uses signed 8-bit values with LSB = 1 degree Celsius.
- */
-
-#define TEMP_FROM_REG(val) ((val) * 1000)
-#define TEMP_TO_REG(val) ((val) <= -128000 ? -128 : \
- (val) >= 127000 ? 127 : \
- (val) < 0 ? ((val) - 500) / 1000 : \
- ((val) + 500) / 1000)
-
-static const u8 LM83_REG_R_TEMP[] = {
+static const u8 LM83_REG_TEMP[] = {
LM83_REG_R_LOCAL_TEMP,
LM83_REG_R_REMOTE1_TEMP,
LM83_REG_R_REMOTE2_TEMP,
LM83_REG_R_REMOTE3_TEMP,
+};
+
+static const u8 LM83_REG_MAX[] = {
LM83_REG_R_LOCAL_HIGH,
LM83_REG_R_REMOTE1_HIGH,
LM83_REG_R_REMOTE2_HIGH,
LM83_REG_R_REMOTE3_HIGH,
- LM83_REG_R_TCRIT,
};
-static const u8 LM83_REG_W_HIGH[] = {
- LM83_REG_W_LOCAL_HIGH,
- LM83_REG_W_REMOTE1_HIGH,
- LM83_REG_W_REMOTE2_HIGH,
- LM83_REG_W_REMOTE3_HIGH,
- LM83_REG_W_TCRIT,
+/* alarm and fault registers and bits, indexed by channel */
+static const u8 LM83_ALARM_REG[] = {
+ LM83_REG_R_STATUS1, LM83_REG_R_STATUS2, LM83_REG_R_STATUS1, LM83_REG_R_STATUS2
+};
+
+static const u8 LM83_MAX_ALARM_BIT[] = {
+ BIT(6), BIT(7), BIT(4), BIT(4)
+};
+
+static const u8 LM83_CRIT_ALARM_BIT[] = {
+ BIT(0), BIT(0), BIT(1), BIT(1)
+};
+
+static const u8 LM83_FAULT_BIT[] = {
+ 0, BIT(5), BIT(2), BIT(2)
};
/*
@@ -102,180 +102,274 @@ static const u8 LM83_REG_W_HIGH[] = {
*/
struct lm83_data {
- struct i2c_client *client;
- const struct attribute_group *groups[3];
- struct mutex update_lock;
- bool valid; /* false until following fields are valid */
- unsigned long last_updated; /* in jiffies */
-
- /* registers values */
- s8 temp[9]; /* 0..3: input 1-4,
- 4..7: high limit 1-4,
- 8 : critical limit */
- u16 alarms; /* bitvector, combined */
+ struct regmap *regmap;
+ enum chips type;
};
-static struct lm83_data *lm83_update_device(struct device *dev)
+/* regmap code */
+
+static int lm83_regmap_reg_read(void *context, unsigned int reg, unsigned int *val)
{
- struct lm83_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
+ struct i2c_client *client = context;
+ int ret;
- mutex_lock(&data->update_lock);
+ ret = i2c_smbus_read_byte_data(client, reg);
+ if (ret < 0)
+ return ret;
- if (time_after(jiffies, data->last_updated + HZ * 2) || !data->valid) {
- int nr;
+ *val = ret;
+ return 0;
+}
- dev_dbg(&client->dev, "Updating lm83 data.\n");
- for (nr = 0; nr < 9; nr++) {
- data->temp[nr] =
- i2c_smbus_read_byte_data(client,
- LM83_REG_R_TEMP[nr]);
- }
- data->alarms =
- i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1)
- + (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2)
- << 8);
+/*
+ * The regmap write function maps read register addresses to write register
+ * addresses. This is necessary for regmap register caching to work.
+ * An alternative would be to clear the regmap cache whenever a register is
+ * written, but that would be much more expensive.
+ */
+static int lm83_regmap_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+ struct i2c_client *client = context;
- data->last_updated = jiffies;
- data->valid = true;
+ switch (reg) {
+ case LM83_REG_R_CONFIG:
+ case LM83_REG_R_LOCAL_HIGH:
+ case LM83_REG_R_REMOTE2_HIGH:
+ reg += 0x06;
+ break;
+ case LM83_REG_R_REMOTE1_HIGH:
+ case LM83_REG_R_REMOTE3_HIGH:
+ case LM83_REG_R_TCRIT:
+ reg += 0x18;
+ break;
+ default:
+ break;
}
- mutex_unlock(&data->update_lock);
+ return i2c_smbus_write_byte_data(client, reg, val);
+}
- return data;
+static bool lm83_regmap_is_volatile(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case LM83_REG_R_LOCAL_TEMP:
+ case LM83_REG_R_REMOTE1_TEMP:
+ case LM83_REG_R_REMOTE2_TEMP:
+ case LM83_REG_R_REMOTE3_TEMP:
+ case LM83_REG_R_STATUS1:
+ case LM83_REG_R_STATUS2:
+ return true;
+ default:
+ return false;
+ }
}
-/*
- * Sysfs stuff
- */
+static const struct regmap_config lm83_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .cache_type = REGCACHE_RBTREE,
+ .volatile_reg = lm83_regmap_is_volatile,
+ .reg_read = lm83_regmap_reg_read,
+ .reg_write = lm83_regmap_reg_write,
+};
-static ssize_t temp_show(struct device *dev, struct device_attribute *devattr,
- char *buf)
+/* hwmon API */
+
+static int lm83_temp_read(struct device *dev, u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct lm83_data *data = lm83_update_device(dev);
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[attr->index]));
+ struct lm83_data *data = dev_get_drvdata(dev);
+ unsigned int regval;
+ int err;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ err = regmap_read(data->regmap, LM83_REG_TEMP[channel], &regval);
+ if (err < 0)
+ return err;
+ *val = (s8)regval * 1000;
+ break;
+ case hwmon_temp_max:
+ err = regmap_read(data->regmap, LM83_REG_MAX[channel], &regval);
+ if (err < 0)
+ return err;
+ *val = (s8)regval * 1000;
+ break;
+ case hwmon_temp_crit:
+ err = regmap_read(data->regmap, LM83_REG_R_TCRIT, &regval);
+ if (err < 0)
+ return err;
+ *val = (s8)regval * 1000;
+ break;
+ case hwmon_temp_max_alarm:
+ err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+ if (err < 0)
+ return err;
+ *val = !!(regval & LM83_MAX_ALARM_BIT[channel]);
+ break;
+ case hwmon_temp_crit_alarm:
+ err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+ if (err < 0)
+ return err;
+ *val = !!(regval & LM83_CRIT_ALARM_BIT[channel]);
+ break;
+ case hwmon_temp_fault:
+ err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+ if (err < 0)
+ return err;
+ *val = !!(regval & LM83_FAULT_BIT[channel]);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
}
-static ssize_t temp_store(struct device *dev,
- struct device_attribute *devattr, const char *buf,
- size_t count)
+static int lm83_temp_write(struct device *dev, u32 attr, int channel, long val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct lm83_data *data = dev_get_drvdata(dev);
- struct i2c_client *client = data->client;
- long val;
- int nr = attr->index;
+ unsigned int regval;
int err;
- err = kstrtol(buf, 10, &val);
- if (err < 0)
- return err;
+ regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000);
- mutex_lock(&data->update_lock);
- data->temp[nr] = TEMP_TO_REG(val);
- i2c_smbus_write_byte_data(client, LM83_REG_W_HIGH[nr - 4],
- data->temp[nr]);
- mutex_unlock(&data->update_lock);
- return count;
+ switch (attr) {
+ case hwmon_temp_max:
+ err = regmap_write(data->regmap, LM83_REG_MAX[channel], regval);
+ if (err < 0)
+ return err;
+ break;
+ case hwmon_temp_crit:
+ err = regmap_write(data->regmap, LM83_REG_R_TCRIT, regval);
+ if (err < 0)
+ return err;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
}
-static ssize_t alarms_show(struct device *dev, struct device_attribute *dummy,
- char *buf)
+static int lm83_chip_read(struct device *dev, u32 attr, int channel, long *val)
{
- struct lm83_data *data = lm83_update_device(dev);
- return sprintf(buf, "%d\n", data->alarms);
+ struct lm83_data *data = dev_get_drvdata(dev);
+ unsigned int regval;
+ int err;
+
+ switch (attr) {
+ case hwmon_chip_alarms:
+ err = regmap_read(data->regmap, LM83_REG_R_STATUS1, &regval);
+ if (err < 0)
+ return err;
+ *val = regval;
+ err = regmap_read(data->regmap, LM83_REG_R_STATUS2, &regval);
+ if (err < 0)
+ return err;
+ *val |= regval << 8;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
-static ssize_t alarm_show(struct device *dev,
- struct device_attribute *devattr, char *buf)
+static int lm83_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct lm83_data *data = lm83_update_device(dev);
- int bitnr = attr->index;
+ switch (type) {
+ case hwmon_chip:
+ return lm83_chip_read(dev, attr, channel, val);
+ case hwmon_temp:
+ return lm83_temp_read(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
- return sprintf(buf, "%d\n", (data->alarms >> bitnr) & 1);
+static int lm83_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ switch (type) {
+ case hwmon_temp:
+ return lm83_temp_write(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
}
-static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0);
-static SENSOR_DEVICE_ATTR_RO(temp2_input, temp, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_input, temp, 2);
-static SENSOR_DEVICE_ATTR_RO(temp4_input, temp, 3);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, temp, 4);
-static SENSOR_DEVICE_ATTR_RW(temp2_max, temp, 5);
-static SENSOR_DEVICE_ATTR_RW(temp3_max, temp, 6);
-static SENSOR_DEVICE_ATTR_RW(temp4_max, temp, 7);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp2_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RW(temp3_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp4_crit, temp, 8);
-
-/* Individual alarm files */
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, alarm, 0);
-static SENSOR_DEVICE_ATTR_RO(temp3_crit_alarm, alarm, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_fault, alarm, 2);
-static SENSOR_DEVICE_ATTR_RO(temp3_max_alarm, alarm, 4);
-static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, alarm, 6);
-static SENSOR_DEVICE_ATTR_RO(temp2_crit_alarm, alarm, 8);
-static SENSOR_DEVICE_ATTR_RO(temp4_crit_alarm, alarm, 9);
-static SENSOR_DEVICE_ATTR_RO(temp4_fault, alarm, 10);
-static SENSOR_DEVICE_ATTR_RO(temp4_max_alarm, alarm, 12);
-static SENSOR_DEVICE_ATTR_RO(temp2_fault, alarm, 13);
-static SENSOR_DEVICE_ATTR_RO(temp2_max_alarm, alarm, 15);
-/* Raw alarm file for compatibility */
-static DEVICE_ATTR_RO(alarms);
-
-static struct attribute *lm83_attributes[] = {
- &sensor_dev_attr_temp1_input.dev_attr.attr,
- &sensor_dev_attr_temp3_input.dev_attr.attr,
- &sensor_dev_attr_temp1_max.dev_attr.attr,
- &sensor_dev_attr_temp3_max.dev_attr.attr,
- &sensor_dev_attr_temp1_crit.dev_attr.attr,
- &sensor_dev_attr_temp3_crit.dev_attr.attr,
-
- &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
- &sensor_dev_attr_temp3_crit_alarm.dev_attr.attr,
- &sensor_dev_attr_temp3_fault.dev_attr.attr,
- &sensor_dev_attr_temp3_max_alarm.dev_attr.attr,
- &sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
- &dev_attr_alarms.attr,
- NULL
-};
+static umode_t lm83_is_visible(const void *_data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct lm83_data *data = _data;
-static const struct attribute_group lm83_group = {
- .attrs = lm83_attributes,
-};
+ /*
+ * LM82 only supports a single external channel, modeled as channel 2.
+ */
+ if (data->type == lm82 && (channel == 1 || channel == 3))
+ return 0;
-static struct attribute *lm83_attributes_opt[] = {
- &sensor_dev_attr_temp2_input.dev_attr.attr,
- &sensor_dev_attr_temp4_input.dev_attr.attr,
- &sensor_dev_attr_temp2_max.dev_attr.attr,
- &sensor_dev_attr_temp4_max.dev_attr.attr,
- &sensor_dev_attr_temp2_crit.dev_attr.attr,
- &sensor_dev_attr_temp4_crit.dev_attr.attr,
-
- &sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
- &sensor_dev_attr_temp4_crit_alarm.dev_attr.attr,
- &sensor_dev_attr_temp4_fault.dev_attr.attr,
- &sensor_dev_attr_temp4_max_alarm.dev_attr.attr,
- &sensor_dev_attr_temp2_fault.dev_attr.attr,
- &sensor_dev_attr_temp2_max_alarm.dev_attr.attr,
+ switch (type) {
+ case hwmon_chip:
+ if (attr == hwmon_chip_alarms)
+ return 0444;
+ break;
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_max_alarm:
+ case hwmon_temp_crit_alarm:
+ return 0444;
+ case hwmon_temp_fault:
+ if (channel)
+ return 0444;
+ break;
+ case hwmon_temp_max:
+ return 0644;
+ case hwmon_temp_crit:
+ if (channel == 2)
+ return 0644;
+ return 0444;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *lm83_info[] = {
+ HWMON_CHANNEL_INFO(chip, HWMON_C_ALARMS),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT
+ ),
NULL
};
-static const struct attribute_group lm83_group_opt = {
- .attrs = lm83_attributes_opt,
+static const struct hwmon_ops lm83_hwmon_ops = {
+ .is_visible = lm83_is_visible,
+ .read = lm83_read,
+ .write = lm83_write,
};
-/*
- * Real code
- */
+static const struct hwmon_chip_info lm83_chip_info = {
+ .ops = &lm83_hwmon_ops,
+ .info = lm83_info,
+};
/* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm83_detect(struct i2c_client *new_client,
+static int lm83_detect(struct i2c_client *client,
struct i2c_board_info *info)
{
- struct i2c_adapter *adapter = new_client->adapter;
+ struct i2c_adapter *adapter = client->adapter;
const char *name;
u8 man_id, chip_id;
@@ -283,22 +377,30 @@ static int lm83_detect(struct i2c_client *new_client,
return -ENODEV;
/* Detection */
- if ((i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS1) & 0xA8) ||
- (i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS2) & 0x48) ||
- (i2c_smbus_read_byte_data(new_client, LM83_REG_R_CONFIG) & 0x41)) {
+ if ((i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1) & 0xA8) ||
+ (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2) & 0x48) ||
+ (i2c_smbus_read_byte_data(client, LM83_REG_R_CONFIG) & 0x41)) {
dev_dbg(&adapter->dev, "LM83 detection failed at 0x%02x\n",
- new_client->addr);
+ client->addr);
return -ENODEV;
}
/* Identification */
- man_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_MAN_ID);
+ man_id = i2c_smbus_read_byte_data(client, LM83_REG_R_MAN_ID);
if (man_id != 0x01) /* National Semiconductor */
return -ENODEV;
- chip_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_CHIP_ID);
+ chip_id = i2c_smbus_read_byte_data(client, LM83_REG_R_CHIP_ID);
switch (chip_id) {
case 0x03:
+ /*
+ * According to the LM82 datasheet dated March 2013, recent
+ * revisions of LM82 have a die revision of 0x03. This was
+ * confirmed with a real chip. Further details in this revision
+ * of the LM82 datasheet strongly suggest that LM82 is just a
+ * repackaged LM83. It is therefore impossible to distinguish
+ * those chips from LM83, and they will be misdetected as LM83.
+ */
name = "lm83";
break;
case 0x01:
@@ -306,9 +408,9 @@ static int lm83_detect(struct i2c_client *new_client,
break;
default:
/* identification failed */
- dev_info(&adapter->dev,
- "Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n",
- man_id, chip_id);
+ dev_dbg(&adapter->dev,
+ "Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n",
+ man_id, chip_id);
return -ENODEV;
}
@@ -317,34 +419,31 @@ static int lm83_detect(struct i2c_client *new_client,
return 0;
}
-static const struct i2c_device_id lm83_id[];
+static const struct i2c_device_id lm83_id[] = {
+ { "lm83", lm83 },
+ { "lm82", lm82 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, lm83_id);
-static int lm83_probe(struct i2c_client *new_client)
+static int lm83_probe(struct i2c_client *client)
{
+ struct device *dev = &client->dev;
struct device *hwmon_dev;
struct lm83_data *data;
- data = devm_kzalloc(&new_client->dev, sizeof(struct lm83_data),
- GFP_KERNEL);
+ data = devm_kzalloc(dev, sizeof(struct lm83_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- data->client = new_client;
- mutex_init(&data->update_lock);
+ data->regmap = devm_regmap_init(dev, NULL, client, &lm83_regmap_config);
+ if (IS_ERR(data->regmap))
+ return PTR_ERR(data->regmap);
- /*
- * Register sysfs hooks
- * The LM82 can only monitor one external diode which is
- * at the same register as the LM83 temp3 entry - so we
- * declare 1 and 3 common, and then 2 and 4 only for the LM83.
- */
- data->groups[0] = &lm83_group;
- if (i2c_match_id(lm83_id, new_client)->driver_data == lm83)
- data->groups[1] = &lm83_group_opt;
+ data->type = i2c_match_id(lm83_id, client)->driver_data;
- hwmon_dev = devm_hwmon_device_register_with_groups(&new_client->dev,
- new_client->name,
- data, data->groups);
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+ data, &lm83_chip_info, NULL);
return PTR_ERR_OR_ZERO(hwmon_dev);
}
@@ -352,13 +451,6 @@ static int lm83_probe(struct i2c_client *new_client)
* Driver data (common to all clients)
*/
-static const struct i2c_device_id lm83_id[] = {
- { "lm83", lm83 },
- { "lm82", lm82 },
- { }
-};
-MODULE_DEVICE_TABLE(i2c, lm83_id);
-
static struct i2c_driver lm83_driver = {
.class = I2C_CLASS_HWMON,
.driver = {
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 74019dff2550..1c9493c70813 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -373,7 +373,7 @@ static const struct lm90_params lm90_params[] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
| LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT,
.alert_alarms = 0x7c,
- .max_convrate = 8,
+ .max_convrate = 7,
},
[lm86] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
@@ -394,12 +394,13 @@ static const struct lm90_params lm90_params[] = {
.max_convrate = 9,
},
[max6646] = {
- .flags = LM90_HAVE_CRIT,
+ .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 6,
.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
},
[max6654] = {
+ .flags = LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 7,
.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
@@ -418,7 +419,7 @@ static const struct lm90_params lm90_params[] = {
},
[max6680] = {
.flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT
- | LM90_HAVE_CRIT_ALRM_SWP,
+ | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT,
.alert_alarms = 0x7c,
.max_convrate = 7,
},
@@ -848,7 +849,7 @@ static int lm90_update_device(struct device *dev)
* Re-enable ALERT# output if it was originally enabled and
* relevant alarms are all clear
*/
- if (!(data->config_orig & 0x80) &&
+ if ((client->irq || !(data->config_orig & 0x80)) &&
!(data->alarms & data->alert_alarms)) {
if (data->config & 0x80) {
dev_dbg(&client->dev, "Re-enabling ALERT#\n");
@@ -1807,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status)
if (st & LM90_STATUS_LLOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 0);
+ hwmon_temp_min_alarm, 0);
if (st & LM90_STATUS_RLOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 1);
+ hwmon_temp_min_alarm, 1);
if (st2 & MAX6696_STATUS2_R2LOW)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_min, 2);
+ hwmon_temp_min_alarm, 2);
if (st & LM90_STATUS_LHIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 0);
+ hwmon_temp_max_alarm, 0);
if (st & LM90_STATUS_RHIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 1);
+ hwmon_temp_max_alarm, 1);
if (st2 & MAX6696_STATUS2_R2HIGH)
hwmon_notify_event(data->hwmon_dev, hwmon_temp,
- hwmon_temp_max, 2);
+ hwmon_temp_max_alarm, 2);
return true;
}
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 5fcfd57df61e..4c5487aeb3cf 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -254,7 +254,7 @@ err_remove:
return err;
}
-static int max1111_remove(struct spi_device *spi)
+static void max1111_remove(struct spi_device *spi)
{
struct max1111_data *data = spi_get_drvdata(spi);
@@ -265,7 +265,6 @@ static int max1111_remove(struct spi_device *spi)
sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);
mutex_destroy(&data->drvdata_lock);
- return 0;
}
static const struct spi_device_id max1111_ids[] = {
diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c
index 4cf4fe6809a3..93e048ee4955 100644
--- a/drivers/hwmon/max31722.c
+++ b/drivers/hwmon/max31722.c
@@ -100,7 +100,7 @@ static int max31722_probe(struct spi_device *spi)
return 0;
}
-static int max31722_remove(struct spi_device *spi)
+static void max31722_remove(struct spi_device *spi)
{
struct max31722_data *data = spi_get_drvdata(spi);
int ret;
@@ -111,8 +111,6 @@ static int max31722_remove(struct spi_device *spi)
if (ret)
/* There is nothing we can do about this ... */
dev_warn(&spi->dev, "Failed to put device in stand-by mode\n");
-
- return 0;
}
static int __maybe_unused max31722_suspend(struct device *dev)
diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c
index ccc0f047bd44..14bb7726f8d7 100644
--- a/drivers/hwmon/max6639.c
+++ b/drivers/hwmon/max6639.c
@@ -87,6 +87,9 @@ struct max6639_data {
/* Register values initialized only once */
u8 ppr; /* Pulses per rotation 0..3 for 1..4 ppr */
u8 rpm_range; /* Index in above rpm_ranges table */
+
+ /* Optional regulator for FAN supply */
+ struct regulator *reg;
};
static struct max6639_data *max6639_update_device(struct device *dev)
@@ -516,6 +519,11 @@ static int max6639_detect(struct i2c_client *client,
return 0;
}
+static void max6639_regulator_disable(void *data)
+{
+ regulator_disable(data);
+}
+
static int max6639_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
@@ -528,6 +536,28 @@ static int max6639_probe(struct i2c_client *client)
return -ENOMEM;
data->client = client;
+
+ data->reg = devm_regulator_get_optional(dev, "fan");
+ if (IS_ERR(data->reg)) {
+ if (PTR_ERR(data->reg) != -ENODEV)
+ return PTR_ERR(data->reg);
+
+ data->reg = NULL;
+ } else {
+ /* Spin up fans */
+ err = regulator_enable(data->reg);
+ if (err) {
+ dev_err(dev, "Failed to enable fan supply: %d\n", err);
+ return err;
+ }
+ err = devm_add_action_or_reset(dev, max6639_regulator_disable,
+ data->reg);
+ if (err) {
+ dev_err(dev, "Failed to register action: %d\n", err);
+ return err;
+ }
+ }
+
mutex_init(&data->update_lock);
/* Initialize the max6639 chip */
@@ -545,23 +575,39 @@ static int max6639_probe(struct i2c_client *client)
static int max6639_suspend(struct device *dev)
{
struct i2c_client *client = to_i2c_client(dev);
- int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
- if (data < 0)
- return data;
+ struct max6639_data *data = dev_get_drvdata(dev);
+ int ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
+
+ if (ret < 0)
+ return ret;
+
+ if (data->reg)
+ regulator_disable(data->reg);
return i2c_smbus_write_byte_data(client,
- MAX6639_REG_GCONFIG, data | MAX6639_GCONFIG_STANDBY);
+ MAX6639_REG_GCONFIG, ret | MAX6639_GCONFIG_STANDBY);
}
static int max6639_resume(struct device *dev)
{
struct i2c_client *client = to_i2c_client(dev);
- int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
- if (data < 0)
- return data;
+ struct max6639_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ if (data->reg) {
+ ret = regulator_enable(data->reg);
+ if (ret) {
+ dev_err(dev, "Failed to enable fan supply: %d\n", ret);
+ return ret;
+ }
+ }
+
+ ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
+ if (ret < 0)
+ return ret;
return i2c_smbus_write_byte_data(client,
- MAX6639_REG_GCONFIG, data & ~MAX6639_GCONFIG_STANDBY);
+ MAX6639_REG_GCONFIG, ret & ~MAX6639_GCONFIG_STANDBY);
}
#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c
index 4a8becdb0d58..b48bd7c961d6 100644
--- a/drivers/hwmon/mlxreg-fan.c
+++ b/drivers/hwmon/mlxreg-fan.c
@@ -18,15 +18,6 @@
#define MLXREG_FAN_MAX_STATE 10
#define MLXREG_FAN_MIN_DUTY 51 /* 20% */
#define MLXREG_FAN_MAX_DUTY 255 /* 100% */
-/*
- * Minimum and maximum FAN allowed speed in percent: from 20% to 100%. Values
- * MLXREG_FAN_MAX_STATE + x, where x is between 2 and 10 are used for
- * setting FAN speed dynamic minimum. For example, if value is set to 14 (40%)
- * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
- * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
- */
-#define MLXREG_FAN_SPEED_MIN (MLXREG_FAN_MAX_STATE + 2)
-#define MLXREG_FAN_SPEED_MAX (MLXREG_FAN_MAX_STATE * 2)
#define MLXREG_FAN_SPEED_MIN_LEVEL 2 /* 20 percent */
#define MLXREG_FAN_TACHO_SAMPLES_PER_PULSE_DEF 44
#define MLXREG_FAN_TACHO_DIV_MIN 283
@@ -87,13 +78,16 @@ struct mlxreg_fan_tacho {
* @connected: indicates if PWM is connected;
* @reg: register offset;
* @cooling: cooling device levels;
+ * @last_hwmon_state: last cooling state set by hwmon subsystem;
+ * @last_thermal_state: last cooling state set by thermal subsystem;
* @cdev: cooling device;
*/
struct mlxreg_fan_pwm {
struct mlxreg_fan *fan;
bool connected;
u32 reg;
- u8 cooling_levels[MLXREG_FAN_MAX_STATE + 1];
+ unsigned long last_hwmon_state;
+ unsigned long last_thermal_state;
struct thermal_cooling_device *cdev;
};
@@ -119,6 +113,9 @@ struct mlxreg_fan {
int divider;
};
+static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long state);
+
static int
mlxreg_fan_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long *val)
@@ -213,6 +210,18 @@ mlxreg_fan_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
val > MLXREG_FAN_MAX_DUTY)
return -EINVAL;
pwm = &fan->pwm[channel];
+ /* If thermal is configured - handle PWM limit setting. */
+ if (IS_REACHABLE(CONFIG_THERMAL)) {
+ pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(val);
+ /*
+ * Update PWM only in case requested state is not less than the
+ * last thermal state.
+ */
+ if (pwm->last_hwmon_state >= pwm->last_thermal_state)
+ return mlxreg_fan_set_cur_state(pwm->cdev,
+ pwm->last_hwmon_state);
+ return 0;
+ }
return regmap_write(fan->regmap, pwm->reg, val);
default:
return -EOPNOTSUPP;
@@ -338,58 +347,22 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev,
{
struct mlxreg_fan_pwm *pwm = cdev->devdata;
struct mlxreg_fan *fan = pwm->fan;
- unsigned long cur_state;
- int i, config = 0;
- u32 regval;
int err;
- /*
- * Verify if this request is for changing allowed FAN dynamical
- * minimum. If it is - update cooling levels accordingly and update
- * state, if current state is below the newly requested minimum state.
- * For example, if current state is 5, and minimal state is to be
- * changed from 4 to 6, fan->cooling_levels[0 to 5] will be changed all
- * from 4 to 6. And state 5 (fan->cooling_levels[4]) should be
- * overwritten.
- */
- if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) {
- /*
- * This is configuration change, which is only supported through sysfs.
- * For configuration non-zero value is to be returned to avoid thermal
- * statistics update.
- */
- config = 1;
- state -= MLXREG_FAN_MAX_STATE;
- for (i = 0; i < state; i++)
- pwm->cooling_levels[i] = state;
- for (i = state; i <= MLXREG_FAN_MAX_STATE; i++)
- pwm->cooling_levels[i] = i;
-
- err = regmap_read(fan->regmap, pwm->reg, &regval);
- if (err) {
- dev_err(fan->dev, "Failed to query PWM duty\n");
- return err;
- }
-
- cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval);
- if (state < cur_state)
- return config;
-
- state = cur_state;
- }
-
if (state > MLXREG_FAN_MAX_STATE)
return -EINVAL;
- /* Normalize the state to the valid speed range. */
- state = pwm->cooling_levels[state];
+ /* Save thermal state. */
+ pwm->last_thermal_state = state;
+
+ state = max_t(unsigned long, state, pwm->last_hwmon_state);
err = regmap_write(fan->regmap, pwm->reg,
MLXREG_FAN_PWM_STATE2DUTY(state));
if (err) {
dev_err(fan->dev, "Failed to write PWM duty\n");
return err;
}
- return config;
+ return 0;
}
static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = {
@@ -564,7 +537,7 @@ static int mlxreg_fan_config(struct mlxreg_fan *fan,
static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan)
{
- int i, j;
+ int i;
for (i = 0; i < MLXREG_FAN_MAX_PWM; i++) {
struct mlxreg_fan_pwm *pwm = &fan->pwm[i];
@@ -579,11 +552,8 @@ static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan)
return PTR_ERR(pwm->cdev);
}
- /* Init cooling levels per PWM state. */
- for (j = 0; j < MLXREG_FAN_SPEED_MIN_LEVEL; j++)
- pwm->cooling_levels[j] = MLXREG_FAN_SPEED_MIN_LEVEL;
- for (j = MLXREG_FAN_SPEED_MIN_LEVEL; j <= MLXREG_FAN_MAX_STATE; j++)
- pwm->cooling_levels[j] = j;
+ /* Set minimal PWM speed. */
+ pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(MLXREG_FAN_MIN_DUTY);
}
return 0;
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index fd3f91cb01c6..2b91f7e05126 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -308,6 +308,7 @@ static void superio_exit(struct nct6775_sio_data *sio_data)
#define NUM_TEMP 10 /* Max number of temp attribute sets w/ limits*/
#define NUM_TEMP_FIXED 6 /* Max number of fixed temp attribute sets */
+#define NUM_TSI_TEMP 8 /* Max number of TSI temp register pairs */
#define NUM_REG_ALARM 7 /* Max number of alarm registers */
#define NUM_REG_BEEP 5 /* Max number of beep registers */
@@ -498,6 +499,8 @@ static const u16 NCT6775_REG_TEMP_CRIT[32] = {
[11] = 0xa07
};
+static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 };
+
/* NCT6776 specific data */
/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */
@@ -581,6 +584,9 @@ static const u16 NCT6776_REG_TEMP_CRIT[32] = {
[12] = 0x70a,
};
+static const u16 NCT6776_REG_TSI_TEMP[] = {
+ 0x409, 0x40b, 0x40d, 0x40f, 0x411, 0x413, 0x415, 0x417 };
+
/* NCT6779 specific data */
static const u16 NCT6779_REG_IN[] = {
@@ -864,6 +870,8 @@ static const char *const nct6796_temp_label[] = {
#define NCT6796_TEMP_MASK 0xbfff0ffe
#define NCT6796_VIRT_TEMP_MASK 0x80000c00
+static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b };
+
static const char *const nct6798_temp_label[] = {
"",
"SYSTIN",
@@ -1005,6 +1013,8 @@ static const u16 NCT6106_REG_TEMP_CRIT[32] = {
[12] = 0x205,
};
+static const u16 NCT6106_REG_TSI_TEMP[] = { 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x67 };
+
/* NCT6112D/NCT6114D/NCT6116D specific data */
static const u16 NCT6116_REG_FAN[] = { 0x20, 0x22, 0x24, 0x26, 0x28 };
@@ -1069,6 +1079,8 @@ static const s8 NCT6116_BEEP_BITS[] = {
34, -1 /* intrusion0, intrusion1 */
};
+static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b };
+
static enum pwm_enable reg_to_pwm_enable(int pwm, int mode)
{
if (mode == 0 && pwm == 255)
@@ -1169,17 +1181,23 @@ static inline u8 in_to_reg(u32 val, u8 nr)
return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255);
}
+/* TSI temperatures are in 8.3 format */
+static inline unsigned int tsi_temp_from_reg(unsigned int reg)
+{
+ return (reg >> 5) * 125;
+}
+
/*
* Data structures and manipulation thereof
*/
struct nct6775_data {
int addr; /* IO base of hw monitor block */
- int sioreg; /* SIO register address */
+ struct nct6775_sio_data *sio_data;
enum kinds kind;
const char *name;
- const struct attribute_group *groups[6];
+ const struct attribute_group *groups[7];
u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
* 3=temp_crit, 4=temp_lcrit
@@ -1240,6 +1258,8 @@ struct nct6775_data {
const u16 *REG_ALARM;
const u16 *REG_BEEP;
+ const u16 *REG_TSI_TEMP;
+
unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg);
unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg);
@@ -1267,6 +1287,7 @@ struct nct6775_data {
s8 temp_offset[NUM_TEMP_FIXED];
s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
* 3=temp_crit, 4=temp_lcrit */
+ s16 tsi_temp[NUM_TSI_TEMP];
u64 alarms;
u64 beeps;
@@ -1315,6 +1336,7 @@ struct nct6775_data {
u16 have_temp;
u16 have_temp_fixed;
+ u16 have_tsi_temp;
u16 have_in;
/* Remember extra register values over suspend/resume */
@@ -1464,13 +1486,15 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
switch (data->kind) {
case nct6106:
return reg == 0x20 || reg == 0x22 || reg == 0x24 ||
+ (reg >= 0x59 && reg < 0x69 && (reg & 1)) ||
reg == 0xe0 || reg == 0xe2 || reg == 0xe4 ||
reg == 0x111 || reg == 0x121 || reg == 0x131;
case nct6116:
return reg == 0x20 || reg == 0x22 || reg == 0x24 ||
- reg == 0x26 || reg == 0x28 || reg == 0xe0 || reg == 0xe2 ||
- reg == 0xe4 || reg == 0xe6 || reg == 0xe8 || reg == 0x111 ||
- reg == 0x121 || reg == 0x131 || reg == 0x191 || reg == 0x1a1;
+ reg == 0x26 || reg == 0x28 || reg == 0x59 || reg == 0x5b ||
+ reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0xe6 ||
+ reg == 0xe8 || reg == 0x111 || reg == 0x121 || reg == 0x131 ||
+ reg == 0x191 || reg == 0x1a1;
case nct6775:
return (((reg & 0xff00) == 0x100 ||
(reg & 0xff00) == 0x200) &&
@@ -1479,7 +1503,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
(reg & 0x00ff) == 0x55)) ||
(reg & 0xfff0) == 0x630 ||
reg == 0x640 || reg == 0x642 ||
- reg == 0x662 ||
+ reg == 0x662 || reg == 0x669 ||
((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) ||
reg == 0x73 || reg == 0x75 || reg == 0x77;
case nct6776:
@@ -1490,6 +1514,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
(reg & 0x00ff) == 0x55)) ||
(reg & 0xfff0) == 0x630 ||
reg == 0x402 ||
+ (reg >= 0x409 && reg < 0x419 && (reg & 1)) ||
reg == 0x640 || reg == 0x642 ||
((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) ||
reg == 0x73 || reg == 0x75 || reg == 0x77;
@@ -1504,6 +1529,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
(reg & 0xfff0) == 0x4c0 ||
reg == 0x402 ||
+ (reg >= 0x409 && reg < 0x419 && (reg & 1)) ||
reg == 0x63a || reg == 0x63c || reg == 0x63e ||
reg == 0x640 || reg == 0x642 || reg == 0x64a ||
reg == 0x64c ||
@@ -1987,6 +2013,12 @@ static struct nct6775_data *nct6775_update_device(struct device *dev)
data->REG_TEMP_OFFSET[i]);
}
+ for (i = 0; i < NUM_TSI_TEMP; i++) {
+ if (!(data->have_tsi_temp & BIT(i)))
+ continue;
+ data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]);
+ }
+
data->alarms = 0;
for (i = 0; i < NUM_REG_ALARM; i++) {
u8 alarm;
@@ -2670,6 +2702,44 @@ static const struct sensor_template_group nct6775_temp_template_group = {
.base = 1,
};
+static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nct6775_data *data = nct6775_update_device(dev);
+ struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
+
+ return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index]));
+}
+
+static ssize_t show_tsi_temp_label(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
+
+ return sysfs_emit(buf, "TSI%d_TEMP\n", sattr->index);
+}
+
+SENSOR_TEMPLATE(tsi_temp_input, "temp%d_input", 0444, show_tsi_temp, NULL, 0);
+SENSOR_TEMPLATE(tsi_temp_label, "temp%d_label", 0444, show_tsi_temp_label, NULL, 0);
+
+static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribute *attr,
+ int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct nct6775_data *data = dev_get_drvdata(dev);
+ int temp = index / 2;
+
+ return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0;
+}
+
+/*
+ * The index calculation in nct6775_tsi_temp_is_visible() must be kept in
+ * sync with the size of this array.
+ */
+static struct sensor_device_template *nct6775_tsi_temp_template[] = {
+ &sensor_dev_template_tsi_temp_input,
+ &sensor_dev_template_tsi_temp_label,
+ NULL
+};
+
static ssize_t
show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -3559,7 +3629,7 @@ clear_caseopen(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct nct6775_data *data = dev_get_drvdata(dev);
- struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
+ struct nct6775_sio_data *sio_data = data->sio_data;
int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE;
unsigned long val;
u8 reg;
@@ -3948,10 +4018,11 @@ static int nct6775_probe(struct platform_device *pdev)
const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config;
const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit;
const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL;
- int num_reg_temp, num_reg_temp_mon;
+ int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp;
u8 cr2a;
struct attribute_group *group;
struct device *hwmon_dev;
+ struct sensor_template_group tsi_temp_tg;
int num_attr_groups = 0;
if (sio_data->access == access_direct) {
@@ -3967,7 +4038,7 @@ static int nct6775_probe(struct platform_device *pdev)
return -ENOMEM;
data->kind = sio_data->kind;
- data->sioreg = sio_data->sioreg;
+ data->sio_data = sio_data;
if (sio_data->access == access_direct) {
data->addr = res->start;
@@ -4043,11 +4114,13 @@ static int nct6775_probe(struct platform_device *pdev)
data->ALARM_BITS = NCT6106_ALARM_BITS;
data->REG_BEEP = NCT6106_REG_BEEP;
data->BEEP_BITS = NCT6106_BEEP_BITS;
+ data->REG_TSI_TEMP = NCT6106_REG_TSI_TEMP;
reg_temp = NCT6106_REG_TEMP;
reg_temp_mon = NCT6106_REG_TEMP_MON;
num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP);
num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON);
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6106_REG_TSI_TEMP);
reg_temp_over = NCT6106_REG_TEMP_OVER;
reg_temp_hyst = NCT6106_REG_TEMP_HYST;
reg_temp_config = NCT6106_REG_TEMP_CONFIG;
@@ -4116,11 +4189,13 @@ static int nct6775_probe(struct platform_device *pdev)
data->ALARM_BITS = NCT6116_ALARM_BITS;
data->REG_BEEP = NCT6106_REG_BEEP;
data->BEEP_BITS = NCT6116_BEEP_BITS;
+ data->REG_TSI_TEMP = NCT6116_REG_TSI_TEMP;
reg_temp = NCT6106_REG_TEMP;
reg_temp_mon = NCT6106_REG_TEMP_MON;
num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP);
num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON);
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6116_REG_TSI_TEMP);
reg_temp_over = NCT6106_REG_TEMP_OVER;
reg_temp_hyst = NCT6106_REG_TEMP_HYST;
reg_temp_config = NCT6106_REG_TEMP_CONFIG;
@@ -4191,11 +4266,13 @@ static int nct6775_probe(struct platform_device *pdev)
data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
data->REG_ALARM = NCT6775_REG_ALARM;
data->REG_BEEP = NCT6775_REG_BEEP;
+ data->REG_TSI_TEMP = NCT6775_REG_TSI_TEMP;
reg_temp = NCT6775_REG_TEMP;
reg_temp_mon = NCT6775_REG_TEMP_MON;
num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP);
num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON);
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6775_REG_TSI_TEMP);
reg_temp_over = NCT6775_REG_TEMP_OVER;
reg_temp_hyst = NCT6775_REG_TEMP_HYST;
reg_temp_config = NCT6775_REG_TEMP_CONFIG;
@@ -4264,11 +4341,13 @@ static int nct6775_probe(struct platform_device *pdev)
data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
data->REG_ALARM = NCT6775_REG_ALARM;
data->REG_BEEP = NCT6776_REG_BEEP;
+ data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
reg_temp = NCT6775_REG_TEMP;
reg_temp_mon = NCT6775_REG_TEMP_MON;
num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP);
num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON);
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
reg_temp_over = NCT6775_REG_TEMP_OVER;
reg_temp_hyst = NCT6775_REG_TEMP_HYST;
reg_temp_config = NCT6776_REG_TEMP_CONFIG;
@@ -4341,11 +4420,13 @@ static int nct6775_probe(struct platform_device *pdev)
data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
data->REG_ALARM = NCT6779_REG_ALARM;
data->REG_BEEP = NCT6776_REG_BEEP;
+ data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
reg_temp = NCT6779_REG_TEMP;
reg_temp_mon = NCT6779_REG_TEMP_MON;
num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON);
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
reg_temp_over = NCT6779_REG_TEMP_OVER;
reg_temp_hyst = NCT6779_REG_TEMP_HYST;
reg_temp_config = NCT6779_REG_TEMP_CONFIG;
@@ -4460,6 +4541,24 @@ static int nct6775_probe(struct platform_device *pdev)
data->REG_BEEP = NCT6776_REG_BEEP;
else
data->REG_BEEP = NCT6792_REG_BEEP;
+ switch (data->kind) {
+ case nct6791:
+ case nct6792:
+ case nct6793:
+ data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
+ break;
+ case nct6795:
+ case nct6796:
+ case nct6797:
+ case nct6798:
+ data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
+ num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
+ break;
+ default:
+ num_reg_tsi_temp = 0;
+ break;
+ }
reg_temp = NCT6779_REG_TEMP;
num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
@@ -4659,6 +4758,12 @@ static int nct6775_probe(struct platform_device *pdev)
}
#endif /* USE_ALTERNATE */
+ /* Check which TSIx_TEMP registers are active */
+ for (i = 0; i < num_reg_tsi_temp; i++) {
+ if (data->read_value(data, data->REG_TSI_TEMP[i]))
+ data->have_tsi_temp |= BIT(i);
+ }
+
/* Initialize the chip */
nct6775_init_device(data);
@@ -4766,6 +4871,18 @@ static int nct6775_probe(struct platform_device *pdev)
return PTR_ERR(group);
data->groups[num_attr_groups++] = group;
+
+ if (data->have_tsi_temp) {
+ tsi_temp_tg.templates = nct6775_tsi_temp_template;
+ tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible;
+ tsi_temp_tg.base = fls(data->have_temp) + 1;
+ group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp));
+ if (IS_ERR(group))
+ return PTR_ERR(group);
+
+ data->groups[num_attr_groups++] = group;
+ }
+
data->groups[num_attr_groups++] = &nct6775_group_other;
hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name,
@@ -4985,9 +5102,14 @@ static struct platform_device *pdev[2];
static const char * const asus_wmi_boards[] = {
"ProArt X570-CREATOR WIFI",
+ "Pro B550M-C",
"Pro WS X570-ACE",
"PRIME B360-PLUS",
"PRIME B460-PLUS",
+ "PRIME B550-PLUS",
+ "PRIME B550M-A",
+ "PRIME B550M-A (WI-FI)",
+ "PRIME X570-P",
"PRIME X570-PRO",
"ROG CROSSHAIR VIII DARK HERO",
"ROG CROSSHAIR VIII FORMULA",
@@ -4997,10 +5119,22 @@ static const char * const asus_wmi_boards[] = {
"ROG STRIX B550-E GAMING",
"ROG STRIX B550-F GAMING",
"ROG STRIX B550-F GAMING (WI-FI)",
+ "ROG STRIX B550-F GAMING WIFI II",
"ROG STRIX B550-I GAMING",
+ "ROG STRIX B550-XE GAMING (WI-FI)",
+ "ROG STRIX X570-E GAMING",
"ROG STRIX X570-F GAMING",
"ROG STRIX X570-I GAMING",
"ROG STRIX Z390-E GAMING",
+ "ROG STRIX Z390-F GAMING",
+ "ROG STRIX Z390-H GAMING",
+ "ROG STRIX Z390-I GAMING",
+ "ROG STRIX Z490-A GAMING",
+ "ROG STRIX Z490-E GAMING",
+ "ROG STRIX Z490-F GAMING",
+ "ROG STRIX Z490-G GAMING",
+ "ROG STRIX Z490-G GAMING (WI-FI)",
+ "ROG STRIX Z490-H GAMING",
"ROG STRIX Z490-I GAMING",
"TUF GAMING B550M-PLUS",
"TUF GAMING B550M-PLUS (WI-FI)",
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 414204f5704c..9c9e9f4ccb9e 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -59,7 +59,7 @@ static const struct platform_device_id ntc_thermistor_id[] = {
[NTC_NCP15XH103] = { "ncp15xh103", TYPE_NCPXXXH103 },
[NTC_NCP18WB473] = { "ncp18wb473", TYPE_NCPXXWB473 },
[NTC_NCP21WB473] = { "ncp21wb473", TYPE_NCPXXWB473 },
- [NTC_SSG1404001221] = { "ssg1404-001221", TYPE_NCPXXWB473 },
+ [NTC_SSG1404001221] = { "ssg1404_001221", TYPE_NCPXXWB473 },
[NTC_LAST] = { },
};
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index 0cb4a0a6cbc1..f00cd59f1d19 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -674,6 +674,9 @@ static ssize_t occ_show_caps_3(struct device *dev,
case 7:
val = caps->user_source;
break;
+ case 8:
+ val = get_unaligned_be16(&caps->soft_min) * 1000000ULL;
+ break;
default:
return -EINVAL;
}
@@ -835,12 +838,13 @@ static int occ_setup_sensor_attrs(struct occ *occ)
case 1:
num_attrs += (sensors->caps.num_sensors * 7);
break;
- case 3:
- show_caps = occ_show_caps_3;
- fallthrough;
case 2:
num_attrs += (sensors->caps.num_sensors * 8);
break;
+ case 3:
+ show_caps = occ_show_caps_3;
+ num_attrs += (sensors->caps.num_sensors * 9);
+ break;
default:
sensors->caps.num_sensors = 0;
}
@@ -1047,6 +1051,15 @@ static int occ_setup_sensor_attrs(struct occ *occ)
attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
show_caps, NULL, 7, 0);
attr++;
+
+ if (sensors->caps.version > 2) {
+ snprintf(attr->name, sizeof(attr->name),
+ "power%d_cap_min_soft", s);
+ attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
+ show_caps, NULL,
+ 8, 0);
+ attr++;
+ }
}
}
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index 5020117be740..2dd4a4d240c0 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -119,6 +119,8 @@ struct occ {
u8 prev_stat;
u8 prev_ext_stat;
u8 prev_occs_present;
+ u8 prev_ips_status;
+ u8 prev_mode;
};
int occ_setup(struct occ *occ, const char *name);
diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c
index 03b16abef67f..b2f788a77746 100644
--- a/drivers/hwmon/occ/sysfs.c
+++ b/drivers/hwmon/occ/sysfs.c
@@ -19,6 +19,8 @@
#define OCC_EXT_STAT_DVFS_POWER BIT(6)
#define OCC_EXT_STAT_MEM_THROTTLE BIT(5)
#define OCC_EXT_STAT_QUICK_DROP BIT(4)
+#define OCC_EXT_STAT_DVFS_VDD BIT(3)
+#define OCC_EXT_STAT_GPU_THROTTLE GENMASK(2, 0)
static ssize_t occ_sysfs_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -63,6 +65,18 @@ static ssize_t occ_sysfs_show(struct device *dev,
else
val = 1;
break;
+ case 8:
+ val = header->ips_status;
+ break;
+ case 9:
+ val = header->mode;
+ break;
+ case 10:
+ val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
+ break;
+ case 11:
+ val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
+ break;
default:
return -EINVAL;
}
@@ -88,6 +102,10 @@ static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4);
static SENSOR_DEVICE_ATTR(occ_quick_pwr_drop, 0444, occ_sysfs_show, NULL, 5);
static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6);
static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7);
+static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8);
+static SENSOR_DEVICE_ATTR(occ_mode, 0444, occ_sysfs_show, NULL, 9);
+static SENSOR_DEVICE_ATTR(occ_dvfs_vdd, 0444, occ_sysfs_show, NULL, 10);
+static SENSOR_DEVICE_ATTR(occ_gpu_throttle, 0444, occ_sysfs_show, NULL, 11);
static DEVICE_ATTR_RO(occ_error);
static struct attribute *occ_attributes[] = {
@@ -99,6 +117,10 @@ static struct attribute *occ_attributes[] = {
&sensor_dev_attr_occ_quick_pwr_drop.dev_attr.attr,
&sensor_dev_attr_occ_state.dev_attr.attr,
&sensor_dev_attr_occs_present.dev_attr.attr,
+ &sensor_dev_attr_occ_ips_status.dev_attr.attr,
+ &sensor_dev_attr_occ_mode.dev_attr.attr,
+ &sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr,
+ &sensor_dev_attr_occ_gpu_throttle.dev_attr.attr,
&dev_attr_occ_error.attr,
NULL
};
@@ -156,12 +178,34 @@ void occ_sysfs_poll_done(struct occ *occ)
sysfs_notify(&occ->bus_dev->kobj, NULL, name);
}
+ if ((header->ext_status & OCC_EXT_STAT_DVFS_VDD) !=
+ (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_VDD)) {
+ name = sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr.name;
+ sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+ }
+
+ if ((header->ext_status & OCC_EXT_STAT_GPU_THROTTLE) !=
+ (occ->prev_ext_stat & OCC_EXT_STAT_GPU_THROTTLE)) {
+ name = sensor_dev_attr_occ_gpu_throttle.dev_attr.attr.name;
+ sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+ }
+
if ((header->status & OCC_STAT_MASTER) &&
header->occs_present != occ->prev_occs_present) {
name = sensor_dev_attr_occs_present.dev_attr.attr.name;
sysfs_notify(&occ->bus_dev->kobj, NULL, name);
}
+ if (header->ips_status != occ->prev_ips_status) {
+ name = sensor_dev_attr_occ_ips_status.dev_attr.attr.name;
+ sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+ }
+
+ if (header->mode != occ->prev_mode) {
+ name = sensor_dev_attr_occ_mode.dev_attr.attr.name;
+ sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+ }
+
if (occ->error && occ->error != occ->prev_error) {
name = dev_attr_occ_error.attr.name;
sysfs_notify(&occ->bus_dev->kobj, NULL, name);
@@ -174,6 +218,8 @@ done:
occ->prev_stat = header->status;
occ->prev_ext_stat = header->ext_status;
occ->prev_occs_present = header->occs_present;
+ occ->prev_ips_status = header->ips_status;
+ occ->prev_mode = header->mode;
}
int occ_setup_sysfs(struct occ *occ)
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 41f6cbf96d3b..a2ea1d5a8765 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -174,6 +174,13 @@ config SENSORS_LM25066
This driver can also be built as a module. If so, the module will
be called lm25066.
+config SENSORS_LM25066_REGULATOR
+ bool "Regulator support for LM25066 and compatibles"
+ depends on SENSORS_LM25066 && REGULATOR
+ help
+ If you say yes here you get regulator support for National
+ Semiconductor LM25066, LM5064, and LM5066.
+
config SENSORS_LTC2978
tristate "Linear Technologies LTC2978 and compatibles"
help
@@ -189,8 +196,8 @@ config SENSORS_LTC2978_REGULATOR
depends on SENSORS_LTC2978 && REGULATOR
help
If you say yes here you get regulator support for Linear Technology
- LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880,
- LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686,
+ LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880,
+ LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686,
and LTM4700.
config SENSORS_LTC3815
@@ -310,6 +317,22 @@ config SENSORS_PIM4328
This driver can also be built as a module. If so, the module will
be called pim4328.
+config SENSORS_PLI1209BC
+ tristate "Vicor PLI1209BC"
+ help
+ If you say yes here you get hardware monitoring support for Vicor
+ PLI1209BC Digital Supervisor.
+
+ This driver can also be built as a module. If so, the module will
+ be called pli1209bc.
+
+config SENSORS_PLI1209BC_REGULATOR
+ bool "Regulator support for PLI1209BC"
+ depends on SENSORS_PLI1209BC && REGULATOR
+ help
+ If you say yes here you get regulator support for Vicor PLI1209BC
+ Digital Supervisor.
+
config SENSORS_PM6764TR
tristate "ST PM6764TR"
help
@@ -394,6 +417,12 @@ config SENSORS_XDPE122
This driver can also be built as a module. If so, the module will
be called xdpe12284.
+config SENSORS_XDPE122_REGULATOR
+ bool "Regulator support for XDPE122 and compatibles"
+ depends on SENSORS_XDPE122 && REGULATOR
+ help
+ Uses the xdpe12284 or compatible as regulator.
+
config SENSORS_ZL6100
tristate "Intersil ZL6100 and compatibles"
help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index e5935f70c9e0..a4a96ac71de7 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_SENSORS_MAX8688) += max8688.o
obj-$(CONFIG_SENSORS_MP2888) += mp2888.o
obj-$(CONFIG_SENSORS_MP2975) += mp2975.o
obj-$(CONFIG_SENSORS_MP5023) += mp5023.o
+obj-$(CONFIG_SENSORS_PLI1209BC) += pli1209bc.o
obj-$(CONFIG_SENSORS_PM6764TR) += pm6764tr.o
obj-$(CONFIG_SENSORS_PXE1610) += pxe1610.o
obj-$(CONFIG_SENSORS_Q54SJ108A2) += q54sj108a2.o
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index d311e0557401..3b07bfb43e93 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -475,6 +475,7 @@ static int adm1275_probe(struct i2c_client *client)
int vindex = -1, voindex = -1, cindex = -1, pindex = -1;
int tindex = -1;
u32 shunt;
+ u32 avg;
if (!i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_BYTE_DATA
@@ -687,7 +688,7 @@ static int adm1275_probe(struct i2c_client *client)
if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) !=
(ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) {
config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN;
- ret = i2c_smbus_write_byte_data(client,
+ ret = i2c_smbus_write_word_data(client,
ADM1275_PMON_CONFIG,
config);
if (ret < 0) {
@@ -756,6 +757,43 @@ static int adm1275_probe(struct i2c_client *client)
return -ENODEV;
}
+ if (data->have_power_sampling &&
+ of_property_read_u32(client->dev.of_node,
+ "adi,power-sample-average", &avg) == 0) {
+ if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
+ BIT(__fls(avg)) != avg) {
+ dev_err(&client->dev,
+ "Invalid number of power samples");
+ return -EINVAL;
+ }
+ ret = adm1275_write_pmon_config(data, client, true,
+ ilog2(avg));
+ if (ret < 0) {
+ dev_err(&client->dev,
+ "Setting power sample averaging failed with error %d",
+ ret);
+ return ret;
+ }
+ }
+
+ if (of_property_read_u32(client->dev.of_node,
+ "adi,volt-curr-sample-average", &avg) == 0) {
+ if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
+ BIT(__fls(avg)) != avg) {
+ dev_err(&client->dev,
+ "Invalid number of voltage/current samples");
+ return -EINVAL;
+ }
+ ret = adm1275_write_pmon_config(data, client, false,
+ ilog2(avg));
+ if (ret < 0) {
+ dev_err(&client->dev,
+ "Setting voltage and current sample averaging failed with error %d",
+ ret);
+ return ret;
+ }
+ }
+
if (voindex < 0)
voindex = vindex;
if (vindex >= 0) {
diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c
index 0ea7e1c18bdc..09276e397194 100644
--- a/drivers/hwmon/pmbus/ir38064.c
+++ b/drivers/hwmon/pmbus/ir38064.c
@@ -62,7 +62,7 @@ static const struct i2c_device_id ir38064_id[] = {
MODULE_DEVICE_TABLE(i2c, ir38064_id);
-static const struct of_device_id ir38064_of_match[] = {
+static const struct of_device_id __maybe_unused ir38064_of_match[] = {
{ .compatible = "infineon,ir38060" },
{ .compatible = "infineon,ir38064" },
{ .compatible = "infineon,ir38164" },
diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c
index 8402b41520eb..09792cd03d9f 100644
--- a/drivers/hwmon/pmbus/lm25066.c
+++ b/drivers/hwmon/pmbus/lm25066.c
@@ -435,6 +435,12 @@ static int lm25066_write_word_data(struct i2c_client *client, int page, int reg,
return ret;
}
+#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR)
+static const struct regulator_desc lm25066_reg_desc[] = {
+ PMBUS_REGULATOR("vout", 0),
+};
+#endif
+
static const struct i2c_device_id lm25066_id[] = {
{"lm25056", lm25056},
{"lm25066", lm25066},
@@ -545,6 +551,14 @@ static int lm25066_probe(struct i2c_client *client)
info->m[PSC_CURRENT_IN] = info->m[PSC_CURRENT_IN] * shunt / 1000;
info->m[PSC_POWER] = info->m[PSC_POWER] * shunt / 1000;
+#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR)
+ /* LM25056 doesn't support OPERATION */
+ if (data->id != lm25056) {
+ info->num_regulators = ARRAY_SIZE(lm25066_reg_desc);
+ info->reg_desc = lm25066_reg_desc;
+ }
+#endif
+
return pmbus_do_probe(client, info);
}
diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c
new file mode 100644
index 000000000000..05b4ee35ba27
--- /dev/null
+++ b/drivers/hwmon/pmbus/pli1209bc.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Hardware monitoring driver for Vicor PLI1209BC Digital Supervisor
+ *
+ * Copyright (c) 2022 9elements GmbH
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/pmbus.h>
+#include <linux/regulator/driver.h>
+#include "pmbus.h"
+
+/*
+ * The capability command is only supported at page 0. Probing the device while
+ * the page register is set to 1 will falsely enable PEC support. Disable
+ * capability probing accordingly, since the PLI1209BC does not have any
+ * additional capabilities.
+ */
+static struct pmbus_platform_data pli1209bc_plat_data = {
+ .flags = PMBUS_NO_CAPABILITY,
+};
+
+static int pli1209bc_read_word_data(struct i2c_client *client, int page,
+ int phase, int reg)
+{
+ int data;
+
+ switch (reg) {
+ /* PMBUS_READ_POUT uses a direct format with R=0 */
+ case PMBUS_READ_POUT:
+ data = pmbus_read_word_data(client, page, phase, reg);
+ if (data < 0)
+ return data;
+ data = sign_extend32(data, 15) * 10;
+ return clamp_val(data, -32768, 32767) & 0xffff;
+ /*
+ * PMBUS_READ_VOUT and PMBUS_READ_TEMPERATURE_1 return invalid data
+ * when the BCM is turned off. Since it is not possible to return
+ * ENODATA error, return zero instead.
+ */
+ case PMBUS_READ_VOUT:
+ case PMBUS_READ_TEMPERATURE_1:
+ data = pmbus_read_word_data(client, page, phase,
+ PMBUS_STATUS_WORD);
+ if (data < 0)
+ return data;
+ if (data & PB_STATUS_POWER_GOOD_N)
+ return 0;
+ return pmbus_read_word_data(client, page, phase, reg);
+ default:
+ return -ENODATA;
+ }
+}
+
+#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
+static const struct regulator_desc pli1209bc_reg_desc = {
+ .name = "vout2",
+ .id = 1,
+ .of_match = of_match_ptr("vout2"),
+ .regulators_node = of_match_ptr("regulators"),
+ .ops = &pmbus_regulator_ops,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static struct pmbus_driver_info pli1209bc_info = {
+ .pages = 2,
+ .format[PSC_VOLTAGE_IN] = direct,
+ .format[PSC_VOLTAGE_OUT] = direct,
+ .format[PSC_CURRENT_IN] = direct,
+ .format[PSC_CURRENT_OUT] = direct,
+ .format[PSC_POWER] = direct,
+ .format[PSC_TEMPERATURE] = direct,
+ .m[PSC_VOLTAGE_IN] = 1,
+ .b[PSC_VOLTAGE_IN] = 0,
+ .R[PSC_VOLTAGE_IN] = 1,
+ .m[PSC_VOLTAGE_OUT] = 1,
+ .b[PSC_VOLTAGE_OUT] = 0,
+ .R[PSC_VOLTAGE_OUT] = 1,
+ .m[PSC_CURRENT_IN] = 1,
+ .b[PSC_CURRENT_IN] = 0,
+ .R[PSC_CURRENT_IN] = 3,
+ .m[PSC_CURRENT_OUT] = 1,
+ .b[PSC_CURRENT_OUT] = 0,
+ .R[PSC_CURRENT_OUT] = 2,
+ .m[PSC_POWER] = 1,
+ .b[PSC_POWER] = 0,
+ .R[PSC_POWER] = 1,
+ .m[PSC_TEMPERATURE] = 1,
+ .b[PSC_TEMPERATURE] = 0,
+ .R[PSC_TEMPERATURE] = 0,
+ /*
+ * Page 0 sums up all attributes except voltage readings.
+ * The pli1209 digital supervisor only contains a single BCM, making
+ * page 0 redundant.
+ */
+ .func[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT
+ | PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT
+ | PMBUS_HAVE_PIN | PMBUS_HAVE_POUT
+ | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
+ | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT,
+ .read_word_data = pli1209bc_read_word_data,
+#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
+ .num_regulators = 1,
+ .reg_desc = &pli1209bc_reg_desc,
+#endif
+};
+
+static int pli1209bc_probe(struct i2c_client *client)
+{
+ client->dev.platform_data = &pli1209bc_plat_data;
+ return pmbus_do_probe(client, &pli1209bc_info);
+}
+
+static const struct i2c_device_id pli1209bc_id[] = {
+ {"pli1209bc", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, pli1209bc_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pli1209bc_of_match[] = {
+ { .compatible = "vicor,pli1209bc" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, pli1209bc_of_match);
+#endif
+
+static struct i2c_driver pli1209bc_driver = {
+ .driver = {
+ .name = "pli1209bc",
+ .of_match_table = of_match_ptr(pli1209bc_of_match),
+ },
+ .probe_new = pli1209bc_probe,
+ .id_table = pli1209bc_id,
+};
+
+module_i2c_driver(pli1209bc_driver);
+
+MODULE_AUTHOR("Marcello Sylvester Bauer <sylv@sylv.io>");
+MODULE_DESCRIPTION("PMBus driver for Vicor PLI1209BC");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index e0aa8aa46d8c..e74b6ef070f3 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -319,6 +319,7 @@ enum pmbus_fan_mode { percent = 0, rpm };
/*
* STATUS_VOUT, STATUS_INPUT
*/
+#define PB_VOLTAGE_VIN_OFF BIT(3)
#define PB_VOLTAGE_UV_FAULT BIT(4)
#define PB_VOLTAGE_UV_WARNING BIT(5)
#define PB_VOLTAGE_OV_WARNING BIT(6)
@@ -464,6 +465,7 @@ extern const struct regulator_ops pmbus_regulator_ops;
#define PMBUS_REGULATOR(_name, _id) \
[_id] = { \
.name = (_name # _id), \
+ .supply_name = "vin", \
.id = (_id), \
.of_match = of_match_ptr(_name # _id), \
.regulators_node = of_match_ptr("regulators"), \
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 776ee2237be2..b2618b1d529e 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -911,6 +911,11 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b,
pmbus_update_sensor_data(client, s2);
regval = status & mask;
+ if (regval) {
+ ret = pmbus_write_byte_data(client, page, reg, regval);
+ if (ret)
+ goto unlock;
+ }
if (s1 && s2) {
s64 v1, v2;
@@ -1368,7 +1373,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = {
.reg = PMBUS_VIN_UV_FAULT_LIMIT,
.attr = "lcrit",
.alarm = "lcrit_alarm",
- .sbit = PB_VOLTAGE_UV_FAULT,
+ .sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF,
}, {
.reg = PMBUS_VIN_OV_WARN_LIMIT,
.attr = "max",
@@ -2386,10 +2391,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
{
struct device *dev = rdev_get_dev(rdev);
struct i2c_client *client = to_i2c_client(dev->parent);
+ struct pmbus_data *data = i2c_get_clientdata(client);
u8 page = rdev_get_id(rdev);
int ret;
+ mutex_lock(&data->update_lock);
ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+ mutex_unlock(&data->update_lock);
+
if (ret < 0)
return ret;
@@ -2400,11 +2409,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
{
struct device *dev = rdev_get_dev(rdev);
struct i2c_client *client = to_i2c_client(dev->parent);
+ struct pmbus_data *data = i2c_get_clientdata(client);
u8 page = rdev_get_id(rdev);
+ int ret;
- return pmbus_update_byte_data(client, page, PMBUS_OPERATION,
- PB_OPERATION_CONTROL_ON,
- enable ? PB_OPERATION_CONTROL_ON : 0);
+ mutex_lock(&data->update_lock);
+ ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION,
+ PB_OPERATION_CONTROL_ON,
+ enable ? PB_OPERATION_CONTROL_ON : 0);
+ mutex_unlock(&data->update_lock);
+
+ return ret;
}
static int pmbus_regulator_enable(struct regulator_dev *rdev)
@@ -2417,10 +2432,124 @@ static int pmbus_regulator_disable(struct regulator_dev *rdev)
return _pmbus_regulator_on_off(rdev, 0);
}
+/* A PMBus status flag and the corresponding REGULATOR_ERROR_* flag */
+struct pmbus_regulator_status_assoc {
+ int pflag, rflag;
+};
+
+/* PMBus->regulator bit mappings for a PMBus status register */
+struct pmbus_regulator_status_category {
+ int func;
+ int reg;
+ const struct pmbus_regulator_status_assoc *bits; /* zero-terminated */
+};
+
+static const struct pmbus_regulator_status_category pmbus_regulator_flag_map[] = {
+ {
+ .func = PMBUS_HAVE_STATUS_VOUT,
+ .reg = PMBUS_STATUS_VOUT,
+ .bits = (const struct pmbus_regulator_status_assoc[]) {
+ { PB_VOLTAGE_UV_WARNING, REGULATOR_ERROR_UNDER_VOLTAGE_WARN },
+ { PB_VOLTAGE_UV_FAULT, REGULATOR_ERROR_UNDER_VOLTAGE },
+ { PB_VOLTAGE_OV_WARNING, REGULATOR_ERROR_OVER_VOLTAGE_WARN },
+ { PB_VOLTAGE_OV_FAULT, REGULATOR_ERROR_REGULATION_OUT },
+ { },
+ },
+ }, {
+ .func = PMBUS_HAVE_STATUS_IOUT,
+ .reg = PMBUS_STATUS_IOUT,
+ .bits = (const struct pmbus_regulator_status_assoc[]) {
+ { PB_IOUT_OC_WARNING, REGULATOR_ERROR_OVER_CURRENT_WARN },
+ { PB_IOUT_OC_FAULT, REGULATOR_ERROR_OVER_CURRENT },
+ { PB_IOUT_OC_LV_FAULT, REGULATOR_ERROR_OVER_CURRENT },
+ { },
+ },
+ }, {
+ .func = PMBUS_HAVE_STATUS_TEMP,
+ .reg = PMBUS_STATUS_TEMPERATURE,
+ .bits = (const struct pmbus_regulator_status_assoc[]) {
+ { PB_TEMP_OT_WARNING, REGULATOR_ERROR_OVER_TEMP_WARN },
+ { PB_TEMP_OT_FAULT, REGULATOR_ERROR_OVER_TEMP },
+ { },
+ },
+ },
+};
+
+static int pmbus_regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags)
+{
+ int i, status;
+ const struct pmbus_regulator_status_category *cat;
+ const struct pmbus_regulator_status_assoc *bit;
+ struct device *dev = rdev_get_dev(rdev);
+ struct i2c_client *client = to_i2c_client(dev->parent);
+ struct pmbus_data *data = i2c_get_clientdata(client);
+ u8 page = rdev_get_id(rdev);
+ int func = data->info->func[page];
+
+ *flags = 0;
+
+ mutex_lock(&data->update_lock);
+
+ for (i = 0; i < ARRAY_SIZE(pmbus_regulator_flag_map); i++) {
+ cat = &pmbus_regulator_flag_map[i];
+ if (!(func & cat->func))
+ continue;
+
+ status = pmbus_read_byte_data(client, page, cat->reg);
+ if (status < 0) {
+ mutex_unlock(&data->update_lock);
+ return status;
+ }
+
+ for (bit = cat->bits; bit->pflag; bit++) {
+ if (status & bit->pflag)
+ *flags |= bit->rflag;
+ }
+ }
+
+ /*
+ * Map what bits of STATUS_{WORD,BYTE} we can to REGULATOR_ERROR_*
+ * bits. Some of the other bits are tempting (especially for cases
+ * where we don't have the relevant PMBUS_HAVE_STATUS_*
+ * functionality), but there's an unfortunate ambiguity in that
+ * they're defined as indicating a fault *or* a warning, so we can't
+ * easily determine whether to report REGULATOR_ERROR_<foo> or
+ * REGULATOR_ERROR_<foo>_WARN.
+ */
+ status = pmbus_get_status(client, page, PMBUS_STATUS_WORD);
+ mutex_unlock(&data->update_lock);
+ if (status < 0)
+ return status;
+
+ if (pmbus_regulator_is_enabled(rdev) && (status & PB_STATUS_OFF))
+ *flags |= REGULATOR_ERROR_FAIL;
+
+ /*
+ * Unlike most other status bits, PB_STATUS_{IOUT_OC,VOUT_OV} are
+ * defined strictly as fault indicators (not warnings).
+ */
+ if (status & PB_STATUS_IOUT_OC)
+ *flags |= REGULATOR_ERROR_OVER_CURRENT;
+ if (status & PB_STATUS_VOUT_OV)
+ *flags |= REGULATOR_ERROR_REGULATION_OUT;
+
+ /*
+ * If we haven't discovered any thermal faults or warnings via
+ * PMBUS_STATUS_TEMPERATURE, map PB_STATUS_TEMPERATURE to a warning as
+ * a (conservative) best-effort interpretation.
+ */
+ if (!(*flags & (REGULATOR_ERROR_OVER_TEMP | REGULATOR_ERROR_OVER_TEMP_WARN)) &&
+ (status & PB_STATUS_TEMPERATURE))
+ *flags |= REGULATOR_ERROR_OVER_TEMP_WARN;
+
+ return 0;
+}
+
const struct regulator_ops pmbus_regulator_ops = {
.enable = pmbus_regulator_enable,
.disable = pmbus_regulator_disable,
.is_enabled = pmbus_regulator_is_enabled,
+ .get_error_flags = pmbus_regulator_get_error_flags,
};
EXPORT_SYMBOL_NS_GPL(pmbus_regulator_ops, PMBUS);
diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c
index b07da06a40c9..18fffc5d749b 100644
--- a/drivers/hwmon/pmbus/xdpe12284.c
+++ b/drivers/hwmon/pmbus/xdpe12284.c
@@ -10,6 +10,8 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/regulator/driver.h>
+
#include "pmbus.h"
#define XDPE122_PROT_VR12_5MV 0x01 /* VR12.0 mode, 5-mV DAC */
@@ -76,7 +78,22 @@ static int xdpe122_identify(struct i2c_client *client,
struct pmbus_driver_info *info)
{
u8 vout_params;
- int i, ret;
+ int i, ret, vout_mode;
+
+ vout_mode = pmbus_read_byte_data(client, 0, PMBUS_VOUT_MODE);
+ if (vout_mode >= 0 && vout_mode != 0xff) {
+ switch (vout_mode >> 5) {
+ case 0:
+ info->format[PSC_VOLTAGE_OUT] = linear;
+ return 0;
+ case 1:
+ info->format[PSC_VOLTAGE_OUT] = vid;
+ info->read_word_data = xdpe122_read_word_data;
+ break;
+ default:
+ return -ENODEV;
+ }
+ }
for (i = 0; i < XDPE122_PAGE_NUM; i++) {
/* Read the register with VOUT scaling value.*/
@@ -107,10 +124,14 @@ static int xdpe122_identify(struct i2c_client *client,
return 0;
}
+static const struct regulator_desc xdpe122_reg_desc[] = {
+ PMBUS_REGULATOR("vout", 0),
+ PMBUS_REGULATOR("vout", 1),
+};
+
static struct pmbus_driver_info xdpe122_info = {
.pages = XDPE122_PAGE_NUM,
.format[PSC_VOLTAGE_IN] = linear,
- .format[PSC_VOLTAGE_OUT] = vid,
.format[PSC_TEMPERATURE] = linear,
.format[PSC_CURRENT_IN] = linear,
.format[PSC_CURRENT_OUT] = linear,
@@ -124,7 +145,10 @@ static struct pmbus_driver_info xdpe122_info = {
PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
.identify = xdpe122_identify,
- .read_word_data = xdpe122_read_word_data,
+#if IS_ENABLED(CONFIG_SENSORS_XDPE122_REGULATOR)
+ .num_regulators = 2,
+ .reg_desc = xdpe122_reg_desc,
+#endif
};
static int xdpe122_probe(struct i2c_client *client)
@@ -140,6 +164,7 @@ static int xdpe122_probe(struct i2c_client *client)
}
static const struct i2c_device_id xdpe122_id[] = {
+ {"xdpe11280", 0},
{"xdpe12254", 0},
{"xdpe12284", 0},
{}
@@ -148,6 +173,7 @@ static const struct i2c_device_id xdpe122_id[] = {
MODULE_DEVICE_TABLE(i2c, xdpe122_id);
static const struct of_device_id __maybe_unused xdpe122_of_match[] = {
+ {.compatible = "infineon,xdpe11280"},
{.compatible = "infineon,xdpe12254"},
{.compatible = "infineon,xdpe12284"},
{}
diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c
index 9e086338dcba..f77dc6db31ac 100644
--- a/drivers/hwmon/powr1220.c
+++ b/drivers/hwmon/powr1220.c
@@ -22,6 +22,8 @@
#define ADC_STEP_MV 2
#define ADC_MAX_LOW_MEASUREMENT_MV 2000
+enum powr1xxx_chips { powr1014, powr1220 };
+
enum powr1220_regs {
VMON_STATUS0,
VMON_STATUS1,
@@ -74,6 +76,7 @@ enum powr1220_adc_values {
struct powr1220_data {
struct i2c_client *client;
struct mutex update_lock;
+ u8 max_channels;
bool adc_valid[MAX_POWR1220_ADC_VALUES];
/* the next value is in jiffies */
unsigned long adc_last_updated[MAX_POWR1220_ADC_VALUES];
@@ -111,7 +114,7 @@ static int powr1220_read_adc(struct device *dev, int ch_num)
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->adc_last_updated[ch_num] + HZ) ||
- !data->adc_valid[ch_num]) {
+ !data->adc_valid[ch_num]) {
/*
* figure out if we need to use the attenuator for
* high inputs or inputs that we don't yet have a measurement
@@ -119,12 +122,12 @@ static int powr1220_read_adc(struct device *dev, int ch_num)
* max reading.
*/
if (data->adc_maxes[ch_num] > ADC_MAX_LOW_MEASUREMENT_MV ||
- data->adc_maxes[ch_num] == 0)
+ data->adc_maxes[ch_num] == 0)
adc_range = 1 << 4;
/* set the attenuator and mux */
result = i2c_smbus_write_byte_data(data->client, ADC_MUX,
- adc_range | ch_num);
+ adc_range | ch_num);
if (result)
goto exit;
@@ -167,135 +170,116 @@ exit:
return result;
}
-/* Shows the voltage associated with the specified ADC channel */
-static ssize_t powr1220_voltage_show(struct device *dev,
- struct device_attribute *dev_attr,
- char *buf)
+static umode_t
+powr1220_is_visible(const void *data, enum hwmon_sensor_types type, u32
+ attr, int channel)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
- int adc_val = powr1220_read_adc(dev, attr->index);
-
- if (adc_val < 0)
- return adc_val;
+ struct powr1220_data *chip_data = (struct powr1220_data *)data;
+
+ if (channel >= chip_data->max_channels)
+ return 0;
+
+ switch (type) {
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_input:
+ case hwmon_in_highest:
+ case hwmon_in_label:
+ return 0444;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
- return sprintf(buf, "%d\n", adc_val);
+ return 0;
}
-/* Shows the maximum setting associated with the specified ADC channel */
-static ssize_t powr1220_max_show(struct device *dev,
- struct device_attribute *dev_attr, char *buf)
+static int
+powr1220_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
- struct powr1220_data *data = dev_get_drvdata(dev);
+ switch (type) {
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_label:
+ *str = input_names[channel];
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- return sprintf(buf, "%d\n", data->adc_maxes[attr->index]);
+ return -EOPNOTSUPP;
}
-/* Shows the label associated with the specified ADC channel */
-static ssize_t powr1220_label_show(struct device *dev,
- struct device_attribute *dev_attr,
- char *buf)
+static int
+powr1220_read(struct device *dev, enum hwmon_sensor_types type, u32
+ attr, int channel, long *val)
{
- struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
+ struct powr1220_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ switch (type) {
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_input:
+ ret = powr1220_read_adc(dev, channel);
+ if (ret < 0)
+ return ret;
+ *val = ret;
+ break;
+ case hwmon_in_highest:
+ *val = data->adc_maxes[channel];
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ return -EOPNOTSUPP;
+}
- return sprintf(buf, "%s\n", input_names[attr->index]);
+ return 0;
}
-static SENSOR_DEVICE_ATTR_RO(in0_input, powr1220_voltage, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_input, powr1220_voltage, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_input, powr1220_voltage, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_input, powr1220_voltage, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_input, powr1220_voltage, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_input, powr1220_voltage, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_input, powr1220_voltage, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_input, powr1220_voltage, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_input, powr1220_voltage, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_input, powr1220_voltage, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_input, powr1220_voltage, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_input, powr1220_voltage, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_input, powr1220_voltage, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_input, powr1220_voltage, VCCINP);
-
-static SENSOR_DEVICE_ATTR_RO(in0_highest, powr1220_max, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_highest, powr1220_max, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_highest, powr1220_max, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_highest, powr1220_max, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_highest, powr1220_max, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_highest, powr1220_max, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_highest, powr1220_max, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_highest, powr1220_max, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_highest, powr1220_max, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_highest, powr1220_max, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_highest, powr1220_max, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_highest, powr1220_max, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_highest, powr1220_max, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_highest, powr1220_max, VCCINP);
-
-static SENSOR_DEVICE_ATTR_RO(in0_label, powr1220_label, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_label, powr1220_label, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_label, powr1220_label, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_label, powr1220_label, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_label, powr1220_label, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_label, powr1220_label, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_label, powr1220_label, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_label, powr1220_label, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_label, powr1220_label, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_label, powr1220_label, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_label, powr1220_label, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_label, powr1220_label, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_label, powr1220_label, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_label, powr1220_label, VCCINP);
-
-static struct attribute *powr1220_attrs[] = {
- &sensor_dev_attr_in0_input.dev_attr.attr,
- &sensor_dev_attr_in1_input.dev_attr.attr,
- &sensor_dev_attr_in2_input.dev_attr.attr,
- &sensor_dev_attr_in3_input.dev_attr.attr,
- &sensor_dev_attr_in4_input.dev_attr.attr,
- &sensor_dev_attr_in5_input.dev_attr.attr,
- &sensor_dev_attr_in6_input.dev_attr.attr,
- &sensor_dev_attr_in7_input.dev_attr.attr,
- &sensor_dev_attr_in8_input.dev_attr.attr,
- &sensor_dev_attr_in9_input.dev_attr.attr,
- &sensor_dev_attr_in10_input.dev_attr.attr,
- &sensor_dev_attr_in11_input.dev_attr.attr,
- &sensor_dev_attr_in12_input.dev_attr.attr,
- &sensor_dev_attr_in13_input.dev_attr.attr,
-
- &sensor_dev_attr_in0_highest.dev_attr.attr,
- &sensor_dev_attr_in1_highest.dev_attr.attr,
- &sensor_dev_attr_in2_highest.dev_attr.attr,
- &sensor_dev_attr_in3_highest.dev_attr.attr,
- &sensor_dev_attr_in4_highest.dev_attr.attr,
- &sensor_dev_attr_in5_highest.dev_attr.attr,
- &sensor_dev_attr_in6_highest.dev_attr.attr,
- &sensor_dev_attr_in7_highest.dev_attr.attr,
- &sensor_dev_attr_in8_highest.dev_attr.attr,
- &sensor_dev_attr_in9_highest.dev_attr.attr,
- &sensor_dev_attr_in10_highest.dev_attr.attr,
- &sensor_dev_attr_in11_highest.dev_attr.attr,
- &sensor_dev_attr_in12_highest.dev_attr.attr,
- &sensor_dev_attr_in13_highest.dev_attr.attr,
-
- &sensor_dev_attr_in0_label.dev_attr.attr,
- &sensor_dev_attr_in1_label.dev_attr.attr,
- &sensor_dev_attr_in2_label.dev_attr.attr,
- &sensor_dev_attr_in3_label.dev_attr.attr,
- &sensor_dev_attr_in4_label.dev_attr.attr,
- &sensor_dev_attr_in5_label.dev_attr.attr,
- &sensor_dev_attr_in6_label.dev_attr.attr,
- &sensor_dev_attr_in7_label.dev_attr.attr,
- &sensor_dev_attr_in8_label.dev_attr.attr,
- &sensor_dev_attr_in9_label.dev_attr.attr,
- &sensor_dev_attr_in10_label.dev_attr.attr,
- &sensor_dev_attr_in11_label.dev_attr.attr,
- &sensor_dev_attr_in12_label.dev_attr.attr,
- &sensor_dev_attr_in13_label.dev_attr.attr,
+static const struct hwmon_channel_info *powr1220_info[] = {
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+ HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL),
NULL
};
-ATTRIBUTE_GROUPS(powr1220);
+static const struct hwmon_ops powr1220_hwmon_ops = {
+ .read = powr1220_read,
+ .read_string = powr1220_read_string,
+ .is_visible = powr1220_is_visible,
+};
+
+static const struct hwmon_chip_info powr1220_chip_info = {
+ .ops = &powr1220_hwmon_ops,
+ .info = powr1220_info,
+};
+
+static const struct i2c_device_id powr1220_ids[];
static int powr1220_probe(struct i2c_client *client)
{
@@ -309,17 +293,30 @@ static int powr1220_probe(struct i2c_client *client)
if (!data)
return -ENOMEM;
+ switch (i2c_match_id(powr1220_ids, client)->driver_data) {
+ case powr1014:
+ data->max_channels = 10;
+ break;
+ default:
+ data->max_channels = 12;
+ break;
+ }
+
mutex_init(&data->update_lock);
data->client = client;
- hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
- client->name, data, powr1220_groups);
+ hwmon_dev = devm_hwmon_device_register_with_info(&client->dev,
+ client->name,
+ data,
+ &powr1220_chip_info,
+ NULL);
return PTR_ERR_OR_ZERO(hwmon_dev);
}
static const struct i2c_device_id powr1220_ids[] = {
- { "powr1220", 0, },
+ { "powr1014", powr1014, },
+ { "powr1220", powr1220, },
{ }
};
diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c
index 8f1b569c69e7..25fbbd4c9a2b 100644
--- a/drivers/hwmon/sch5627.c
+++ b/drivers/hwmon/sch5627.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
@@ -51,6 +52,9 @@ static const u16 SCH5627_REG_FAN[SCH5627_NO_FANS] = {
static const u16 SCH5627_REG_FAN_MIN[SCH5627_NO_FANS] = {
0x62, 0x64, 0x66, 0x68 };
+static const u16 SCH5627_REG_PWM_MAP[SCH5627_NO_FANS] = {
+ 0xA0, 0xA1, 0xA2, 0xA3 };
+
static const u16 SCH5627_REG_IN_MSB[SCH5627_NO_IN] = {
0x22, 0x23, 0x24, 0x25, 0x189 };
static const u16 SCH5627_REG_IN_LSN[SCH5627_NO_IN] = {
@@ -222,6 +226,9 @@ static int reg_to_rpm(u16 reg)
static umode_t sch5627_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr,
int channel)
{
+ if (type == hwmon_pwm && attr == hwmon_pwm_auto_channels_temp)
+ return 0644;
+
return 0444;
}
@@ -277,6 +284,23 @@ static int sch5627_read(struct device *dev, enum hwmon_sensor_types type, u32 at
break;
}
break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_auto_channels_temp:
+ mutex_lock(&data->update_lock);
+ ret = sch56xx_read_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel]);
+ mutex_unlock(&data->update_lock);
+
+ if (ret < 0)
+ return ret;
+
+ *val = ret;
+
+ return 0;
+ default:
+ break;
+ }
+ break;
case hwmon_in:
ret = sch5627_update_in(data);
if (ret < 0)
@@ -317,10 +341,42 @@ static int sch5627_read_string(struct device *dev, enum hwmon_sensor_types type,
return -EOPNOTSUPP;
}
+static int sch5627_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+ long val)
+{
+ struct sch5627_data *data = dev_get_drvdata(dev);
+ int ret;
+
+ switch (type) {
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_auto_channels_temp:
+ /* registers are 8 bit wide */
+ if (val > U8_MAX || val < 0)
+ return -EINVAL;
+
+ mutex_lock(&data->update_lock);
+ ret = sch56xx_write_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel],
+ val);
+ mutex_unlock(&data->update_lock);
+
+ return ret;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
static const struct hwmon_ops sch5627_ops = {
.is_visible = sch5627_is_visible,
.read = sch5627_read,
.read_string = sch5627_read_string,
+ .write = sch5627_write,
};
static const struct hwmon_channel_info *sch5627_info[] = {
@@ -341,6 +397,12 @@ static const struct hwmon_channel_info *sch5627_info[] = {
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT,
HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT
),
+ HWMON_CHANNEL_INFO(pwm,
+ HWMON_PWM_AUTO_CHANNELS_TEMP,
+ HWMON_PWM_AUTO_CHANNELS_TEMP,
+ HWMON_PWM_AUTO_CHANNELS_TEMP,
+ HWMON_PWM_AUTO_CHANNELS_TEMP
+ ),
HWMON_CHANNEL_INFO(in,
HWMON_I_INPUT | HWMON_I_LABEL,
HWMON_I_INPUT | HWMON_I_LABEL,
@@ -456,11 +518,20 @@ static int sch5627_probe(struct platform_device *pdev)
return 0;
}
+static const struct platform_device_id sch5627_device_id[] = {
+ {
+ .name = "sch5627",
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(platform, sch5627_device_id);
+
static struct platform_driver sch5627_driver = {
.driver = {
.name = DRVNAME,
},
.probe = sch5627_probe,
+ .id_table = sch5627_device_id,
};
module_platform_driver(sch5627_driver);
diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c
index 39ff1c9b1df5..269757bc3a9e 100644
--- a/drivers/hwmon/sch5636.c
+++ b/drivers/hwmon/sch5636.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
@@ -501,12 +502,21 @@ error:
return err;
}
+static const struct platform_device_id sch5636_device_id[] = {
+ {
+ .name = "sch5636",
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(platform, sch5636_device_id);
+
static struct platform_driver sch5636_driver = {
.driver = {
.name = DRVNAME,
},
.probe = sch5636_probe,
.remove = sch5636_remove,
+ .id_table = sch5636_device_id,
};
module_platform_driver(sch5636_driver);
diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 40cdadad35e5..3ece53adabd6 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -7,8 +7,10 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/init.h>
#include <linux/platform_device.h>
+#include <linux/dmi.h>
#include <linux/err.h>
#include <linux/io.h>
#include <linux/acpi.h>
@@ -19,7 +21,10 @@
#include <linux/slab.h>
#include "sch56xx-common.h"
-/* Insmod parameters */
+static bool ignore_dmi;
+module_param(ignore_dmi, bool, 0);
+MODULE_PARM_DESC(ignore_dmi, "Omit DMI check for supported devices (default=0)");
+
static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
@@ -134,7 +139,7 @@ static int sch56xx_send_cmd(u16 addr, u8 cmd, u16 reg, u8 v)
/* EM Interface Polling "Algorithm" */
for (i = 0; i < max_busy_polls + max_lazy_polls; i++) {
if (i >= max_busy_polls)
- msleep(1);
+ usleep_range(1000, 2000);
/* Read Interrupt source Register */
val = inb(addr + 8);
/* Write Clear the interrupt source bits */
@@ -422,7 +427,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
data->wddev.max_timeout = 255 * 60;
watchdog_set_nowayout(&data->wddev, nowayout);
if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)
- set_bit(WDOG_ACTIVE, &data->wddev.status);
+ set_bit(WDOG_HW_RUNNING, &data->wddev.status);
/* Since the watchdog uses a downcounter there is no register to read
the BIOS set timeout from (if any was set at all) ->
@@ -518,11 +523,42 @@ static int __init sch56xx_device_add(int address, const char *name)
return PTR_ERR_OR_ZERO(sch56xx_pdev);
}
+/* For autoloading only */
+static const struct dmi_system_id sch56xx_dmi_table[] __initconst = {
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ },
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(dmi, sch56xx_dmi_table);
+
static int __init sch56xx_init(void)
{
- int address;
const char *name = NULL;
+ int address;
+ if (!ignore_dmi) {
+ if (!dmi_check_system(sch56xx_dmi_table))
+ return -ENODEV;
+
+ /*
+ * Some machines like the Esprimo P720 and Esprimo C700 have
+ * onboard devices named " Antiope"/" Theseus" instead of
+ * "Antiope"/"Theseus", so we need to check for both.
+ */
+ if (!dmi_find_device(DMI_DEV_TYPE_OTHER, "Antiope", NULL) &&
+ !dmi_find_device(DMI_DEV_TYPE_OTHER, " Antiope", NULL) &&
+ !dmi_find_device(DMI_DEV_TYPE_OTHER, "Theseus", NULL) &&
+ !dmi_find_device(DMI_DEV_TYPE_OTHER, " Theseus", NULL))
+ return -ENODEV;
+ }
+
+ /*
+ * Some devices like the Esprimo C700 have both onboard devices,
+ * so we still have to check manually
+ */
address = sch56xx_find(0x4e, &name);
if (address < 0)
address = sch56xx_find(0x2e, &name);
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 919877970ae3..5187c6dd5a4f 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -141,7 +141,6 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
struct scpi_ops *scpi_ops;
struct device *hwdev, *dev = &pdev->dev;
struct scpi_sensors *scpi_sensors;
- const struct of_device_id *of_id;
int idx, ret;
scpi_ops = get_scpi_ops();
@@ -171,12 +170,11 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
scpi_sensors->scpi_ops = scpi_ops;
- of_id = of_match_device(scpi_of_match, &pdev->dev);
- if (!of_id) {
+ scale = of_device_get_match_data(&pdev->dev);
+ if (!scale) {
dev_err(&pdev->dev, "Unable to initialize scpi-hwmon data\n");
return -ENODEV;
}
- scale = of_id->data;
for (i = 0, idx = 0; i < nr_sensors; i++) {
struct sensor_data *sensor = &scpi_sensors->data[idx];
diff --git a/drivers/hwmon/tc654.c b/drivers/hwmon/tc654.c
index a52ca72af120..54cd33d09688 100644
--- a/drivers/hwmon/tc654.c
+++ b/drivers/hwmon/tc654.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/thermal.h>
#include <linux/util_macros.h>
enum tc654_regs {
@@ -379,28 +380,20 @@ static ssize_t pwm_show(struct device *dev, struct device_attribute *da,
return sprintf(buf, "%d\n", pwm);
}
-static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
- const char *buf, size_t count)
+static int _set_pwm(struct tc654_data *data, unsigned long val)
{
- struct tc654_data *data = dev_get_drvdata(dev);
struct i2c_client *client = data->client;
- unsigned long val;
int ret;
- if (kstrtoul(buf, 10, &val))
- return -EINVAL;
- if (val > 255)
- return -EINVAL;
-
mutex_lock(&data->update_lock);
- if (val == 0)
+ if (val == 0) {
data->config |= TC654_REG_CONFIG_SDM;
- else
+ data->duty_cycle = 0;
+ } else {
data->config &= ~TC654_REG_CONFIG_SDM;
-
- data->duty_cycle = find_closest(val, tc654_pwm_map,
- ARRAY_SIZE(tc654_pwm_map));
+ data->duty_cycle = val - 1;
+ }
ret = i2c_smbus_write_byte_data(client, TC654_REG_CONFIG, data->config);
if (ret < 0)
@@ -411,6 +404,24 @@ static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
out:
mutex_unlock(&data->update_lock);
+ return ret;
+}
+
+static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
+ const char *buf, size_t count)
+{
+ struct tc654_data *data = dev_get_drvdata(dev);
+ unsigned long val;
+ int ret;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+ if (val > 255)
+ return -EINVAL;
+ if (val > 0)
+ val = find_closest(val, tc654_pwm_map, ARRAY_SIZE(tc654_pwm_map)) + 1;
+
+ ret = _set_pwm(data, val);
return ret < 0 ? ret : count;
}
@@ -443,6 +454,58 @@ static struct attribute *tc654_attrs[] = {
ATTRIBUTE_GROUPS(tc654);
/*
+ * thermal cooling device functions
+ *
+ * Account for the "ShutDown Mode (SDM)" state by offsetting
+ * the 16 PWM duty cycle states by 1.
+ *
+ * State 0 = 0% PWM | Shutdown - Fan(s) are off
+ * State 1 = 30% PWM | duty_cycle = 0
+ * State 2 = ~35% PWM | duty_cycle = 1
+ * [...]
+ * State 15 = ~95% PWM | duty_cycle = 14
+ * State 16 = 100% PWM | duty_cycle = 15
+ */
+#define TC654_MAX_COOLING_STATE 16
+
+static int tc654_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+ *state = TC654_MAX_COOLING_STATE;
+ return 0;
+}
+
+static int tc654_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+ struct tc654_data *data = tc654_update_client(cdev->devdata);
+
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ if (data->config & TC654_REG_CONFIG_SDM)
+ *state = 0; /* FAN is off */
+ else
+ *state = data->duty_cycle + 1; /* offset PWM States by 1 */
+
+ return 0;
+}
+
+static int tc654_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+ struct tc654_data *data = tc654_update_client(cdev->devdata);
+
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ return _set_pwm(data, clamp_val(state, 0, TC654_MAX_COOLING_STATE));
+}
+
+static const struct thermal_cooling_device_ops tc654_fan_cool_ops = {
+ .get_max_state = tc654_get_max_state,
+ .get_cur_state = tc654_get_cur_state,
+ .set_cur_state = tc654_set_cur_state,
+};
+
+/*
* device probe and removal
*/
@@ -472,7 +535,18 @@ static int tc654_probe(struct i2c_client *client)
hwmon_dev =
devm_hwmon_device_register_with_groups(dev, client->name, data,
tc654_groups);
- return PTR_ERR_OR_ZERO(hwmon_dev);
+ if (IS_ERR(hwmon_dev))
+ return PTR_ERR(hwmon_dev);
+
+ if (IS_ENABLED(CONFIG_THERMAL)) {
+ struct thermal_cooling_device *cdev;
+
+ cdev = devm_thermal_of_cooling_device_register(dev, dev->of_node, client->name,
+ hwmon_dev, &tc654_fan_cool_ops);
+ return PTR_ERR_OR_ZERO(cdev);
+ }
+
+ return 0;
}
static const struct i2c_device_id tc654_id[] = {
diff --git a/drivers/hwmon/tmp464.c b/drivers/hwmon/tmp464.c
new file mode 100644
index 000000000000..7814f39bd1a3
--- /dev/null
+++ b/drivers/hwmon/tmp464.c
@@ -0,0 +1,712 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Driver for the Texas Instruments TMP464 SMBus temperature sensor IC.
+ * Supported models: TMP464, TMP468
+
+ * Copyright (C) 2022 Agathe Porte <agathe.porte@nokia.com>
+ * Preliminary support by:
+ * Lionel Pouliquen <lionel.lp.pouliquen@nokia.com>
+ */
+
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END };
+
+#define TMP464_NUM_CHANNELS 5 /* chan 0 is internal, 1-4 are remote */
+#define TMP468_NUM_CHANNELS 9 /* chan 0 is internal, 1-8 are remote */
+
+#define MAX_CHANNELS 9
+
+#define TMP464_TEMP_REG(channel) (channel)
+#define TMP464_TEMP_OFFSET_REG(channel) (0x40 + ((channel) - 1) * 8)
+#define TMP464_N_FACTOR_REG(channel) (0x41 + ((channel) - 1) * 8)
+
+static const u8 TMP464_THERM_LIMIT[MAX_CHANNELS] = {
+ 0x39, 0x42, 0x4A, 0x52, 0x5A, 0x62, 0x6a, 0x72, 0x7a };
+static const u8 TMP464_THERM2_LIMIT[MAX_CHANNELS] = {
+ 0x3A, 0x43, 0x4B, 0x53, 0x5B, 0x63, 0x6b, 0x73, 0x7b };
+
+#define TMP464_THERM_STATUS_REG 0x21
+#define TMP464_THERM2_STATUS_REG 0x22
+#define TMP464_REMOTE_OPEN_REG 0x23
+#define TMP464_CONFIG_REG 0x30
+#define TMP464_TEMP_HYST_REG 0x38
+#define TMP464_LOCK_REG 0xc4
+
+/* Identification */
+#define TMP464_MANUFACTURER_ID_REG 0xFE
+#define TMP464_DEVICE_ID_REG 0xFF
+
+/* Flags */
+#define TMP464_CONFIG_SHUTDOWN BIT(5)
+#define TMP464_CONFIG_RANGE 0x04
+#define TMP464_CONFIG_REG_REN(x) (BIT(7 + (x)))
+#define TMP464_CONFIG_REG_REN_MASK GENMASK(15, 7)
+#define TMP464_CONFIG_CONVERSION_RATE_B0 2
+#define TMP464_CONFIG_CONVERSION_RATE_B2 4
+#define TMP464_CONFIG_CONVERSION_RATE_MASK GENMASK(TMP464_CONFIG_CONVERSION_RATE_B2, \
+ TMP464_CONFIG_CONVERSION_RATE_B0)
+
+#define TMP464_UNLOCK_VAL 0xeb19
+#define TMP464_LOCK_VAL 0x5ca6
+#define TMP464_LOCKED 0x8000
+
+/* Manufacturer / Device ID's */
+#define TMP464_MANUFACTURER_ID 0x5449
+#define TMP464_DEVICE_ID 0x1468
+#define TMP468_DEVICE_ID 0x0468
+
+static const struct i2c_device_id tmp464_id[] = {
+ { "tmp464", TMP464_NUM_CHANNELS },
+ { "tmp468", TMP468_NUM_CHANNELS },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, tmp464_id);
+
+static const struct of_device_id __maybe_unused tmp464_of_match[] = {
+ {
+ .compatible = "ti,tmp464",
+ .data = (void *)TMP464_NUM_CHANNELS
+ },
+ {
+ .compatible = "ti,tmp468",
+ .data = (void *)TMP468_NUM_CHANNELS
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, tmp464_of_match);
+
+struct tmp464_channel {
+ const char *label;
+ bool enabled;
+};
+
+struct tmp464_data {
+ struct regmap *regmap;
+ struct mutex update_lock;
+ int channels;
+ s16 config_orig;
+ u16 open_reg;
+ unsigned long last_updated;
+ bool valid;
+ int update_interval;
+ struct tmp464_channel channel[MAX_CHANNELS];
+};
+
+static int temp_from_reg(s16 reg)
+{
+ return DIV_ROUND_CLOSEST((reg >> 3) * 625, 10);
+}
+
+static s16 temp_to_limit_reg(long temp)
+{
+ return DIV_ROUND_CLOSEST(temp, 500) << 6;
+}
+
+static s16 temp_to_offset_reg(long temp)
+{
+ return DIV_ROUND_CLOSEST(temp * 10, 625) << 3;
+}
+
+static int tmp464_enable_channels(struct tmp464_data *data)
+{
+ struct regmap *regmap = data->regmap;
+ u16 enable = 0;
+ int i;
+
+ for (i = 0; i < data->channels; i++)
+ if (data->channel[i].enabled)
+ enable |= TMP464_CONFIG_REG_REN(i);
+
+ return regmap_update_bits(regmap, TMP464_CONFIG_REG, TMP464_CONFIG_REG_REN_MASK, enable);
+}
+
+static int tmp464_chip_read(struct device *dev, u32 attr, int channel, long *val)
+{
+ struct tmp464_data *data = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_chip_update_interval:
+ *val = data->update_interval;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int tmp464_temp_read(struct device *dev, u32 attr, int channel, long *val)
+{
+ struct tmp464_data *data = dev_get_drvdata(dev);
+ struct regmap *regmap = data->regmap;
+ unsigned int regval, regval2;
+ int err = 0;
+
+ mutex_lock(&data->update_lock);
+
+ switch (attr) {
+ case hwmon_temp_max_alarm:
+ err = regmap_read(regmap, TMP464_THERM_STATUS_REG, &regval);
+ if (err < 0)
+ break;
+ *val = !!(regval & BIT(channel + 7));
+ break;
+ case hwmon_temp_crit_alarm:
+ err = regmap_read(regmap, TMP464_THERM2_STATUS_REG, &regval);
+ if (err < 0)
+ break;
+ *val = !!(regval & BIT(channel + 7));
+ break;
+ case hwmon_temp_fault:
+ /*
+ * The chip clears TMP464_REMOTE_OPEN_REG after it is read
+ * and only updates it after the next measurement cycle is
+ * complete. That means we have to cache the value internally
+ * for one measurement cycle and report the cached value.
+ */
+ if (!data->valid || time_after(jiffies, data->last_updated +
+ msecs_to_jiffies(data->update_interval))) {
+ err = regmap_read(regmap, TMP464_REMOTE_OPEN_REG, &regval);
+ if (err < 0)
+ break;
+ data->open_reg = regval;
+ data->last_updated = jiffies;
+ data->valid = true;
+ }
+ *val = !!(data->open_reg & BIT(channel + 7));
+ break;
+ case hwmon_temp_max_hyst:
+ err = regmap_read(regmap, TMP464_THERM_LIMIT[channel], &regval);
+ if (err < 0)
+ break;
+ err = regmap_read(regmap, TMP464_TEMP_HYST_REG, &regval2);
+ if (err < 0)
+ break;
+ regval -= regval2;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_max:
+ err = regmap_read(regmap, TMP464_THERM_LIMIT[channel], &regval);
+ if (err < 0)
+ break;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_crit_hyst:
+ err = regmap_read(regmap, TMP464_THERM2_LIMIT[channel], &regval);
+ if (err < 0)
+ break;
+ err = regmap_read(regmap, TMP464_TEMP_HYST_REG, &regval2);
+ if (err < 0)
+ break;
+ regval -= regval2;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_crit:
+ err = regmap_read(regmap, TMP464_THERM2_LIMIT[channel], &regval);
+ if (err < 0)
+ break;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_offset:
+ err = regmap_read(regmap, TMP464_TEMP_OFFSET_REG(channel), &regval);
+ if (err < 0)
+ break;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_input:
+ if (!data->channel[channel].enabled) {
+ err = -ENODATA;
+ break;
+ }
+ err = regmap_read(regmap, TMP464_TEMP_REG(channel), &regval);
+ if (err < 0)
+ break;
+ *val = temp_from_reg(regval);
+ break;
+ case hwmon_temp_enable:
+ *val = data->channel[channel].enabled;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ mutex_unlock(&data->update_lock);
+
+ return err;
+}
+
+static int tmp464_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ switch (type) {
+ case hwmon_chip:
+ return tmp464_chip_read(dev, attr, channel, val);
+ case hwmon_temp:
+ return tmp464_temp_read(dev, attr, channel, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int tmp464_read_string(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+{
+ struct tmp464_data *data = dev_get_drvdata(dev);
+
+ *str = data->channel[channel].label;
+
+ return 0;
+}
+
+static int tmp464_set_convrate(struct tmp464_data *data, long interval)
+{
+ int rate;
+
+ /*
+ * For valid rates, interval in milli-seconds can be calculated as
+ * interval = 125 << (7 - rate);
+ * or
+ * interval = (1 << (7 - rate)) * 125;
+ * The rate is therefore
+ * rate = 7 - __fls(interval / 125);
+ * and the rounded rate is
+ * rate = 7 - __fls(interval * 4 / (125 * 3));
+ * Use clamp_val() to avoid overflows, and to ensure valid input
+ * for __fls.
+ */
+ interval = clamp_val(interval, 125, 16000);
+ rate = 7 - __fls(interval * 4 / (125 * 3));
+ data->update_interval = 125 << (7 - rate);
+
+ return regmap_update_bits(data->regmap, TMP464_CONFIG_REG,
+ TMP464_CONFIG_CONVERSION_RATE_MASK,
+ rate << TMP464_CONFIG_CONVERSION_RATE_B0);
+}
+
+static int tmp464_chip_write(struct tmp464_data *data, u32 attr, int channel, long val)
+{
+ switch (attr) {
+ case hwmon_chip_update_interval:
+ return tmp464_set_convrate(data, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int tmp464_temp_write(struct tmp464_data *data, u32 attr, int channel, long val)
+{
+ struct regmap *regmap = data->regmap;
+ unsigned int regval;
+ int err = 0;
+
+ switch (attr) {
+ case hwmon_temp_max_hyst:
+ err = regmap_read(regmap, TMP464_THERM_LIMIT[0], &regval);
+ if (err < 0)
+ break;
+ val = clamp_val(val, -256000, 256000); /* prevent overflow/underflow */
+ val = clamp_val(temp_from_reg(regval) - val, 0, 255000);
+ err = regmap_write(regmap, TMP464_TEMP_HYST_REG,
+ DIV_ROUND_CLOSEST(val, 1000) << 7);
+ break;
+ case hwmon_temp_max:
+ val = temp_to_limit_reg(clamp_val(val, -255000, 255500));
+ err = regmap_write(regmap, TMP464_THERM_LIMIT[channel], val);
+ break;
+ case hwmon_temp_crit:
+ val = temp_to_limit_reg(clamp_val(val, -255000, 255500));
+ err = regmap_write(regmap, TMP464_THERM2_LIMIT[channel], val);
+ break;
+ case hwmon_temp_offset:
+ val = temp_to_offset_reg(clamp_val(val, -128000, 127937));
+ err = regmap_write(regmap, TMP464_TEMP_OFFSET_REG(channel), val);
+ break;
+ case hwmon_temp_enable:
+ data->channel[channel].enabled = !!val;
+ err = tmp464_enable_channels(data);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int tmp464_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct tmp464_data *data = dev_get_drvdata(dev);
+ int err;
+
+ mutex_lock(&data->update_lock);
+
+ switch (type) {
+ case hwmon_chip:
+ err = tmp464_chip_write(data, attr, channel, val);
+ break;
+ case hwmon_temp:
+ err = tmp464_temp_write(data, attr, channel, val);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ mutex_unlock(&data->update_lock);
+
+ return err;
+}
+
+static umode_t tmp464_is_visible(const void *_data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct tmp464_data *data = _data;
+
+ if (channel >= data->channels)
+ return 0;
+
+ if (type == hwmon_chip) {
+ if (attr == hwmon_chip_update_interval)
+ return 0644;
+ return 0;
+ }
+
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_max_alarm:
+ case hwmon_temp_crit_alarm:
+ case hwmon_temp_crit_hyst:
+ return 0444;
+ case hwmon_temp_enable:
+ case hwmon_temp_max:
+ case hwmon_temp_crit:
+ return 0644;
+ case hwmon_temp_max_hyst:
+ if (!channel)
+ return 0644;
+ return 0444;
+ case hwmon_temp_label:
+ if (data->channel[channel].label)
+ return 0444;
+ return 0;
+ case hwmon_temp_fault:
+ if (channel)
+ return 0444;
+ return 0;
+ case hwmon_temp_offset:
+ if (channel)
+ return 0644;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static void tmp464_restore_lock(void *regmap)
+{
+ regmap_write(regmap, TMP464_LOCK_REG, TMP464_LOCK_VAL);
+}
+
+static void tmp464_restore_config(void *_data)
+{
+ struct tmp464_data *data = _data;
+
+ regmap_write(data->regmap, TMP464_CONFIG_REG, data->config_orig);
+}
+
+static int tmp464_init_client(struct device *dev, struct tmp464_data *data)
+{
+ struct regmap *regmap = data->regmap;
+ unsigned int regval;
+ int err;
+
+ err = regmap_read(regmap, TMP464_LOCK_REG, &regval);
+ if (err)
+ return err;
+ if (regval == TMP464_LOCKED) {
+ /* Explicitly unlock chip if it is locked */
+ err = regmap_write(regmap, TMP464_LOCK_REG, TMP464_UNLOCK_VAL);
+ if (err)
+ return err;
+ /* and lock it again when unloading the driver */
+ err = devm_add_action_or_reset(dev, tmp464_restore_lock, regmap);
+ if (err)
+ return err;
+ }
+
+ err = regmap_read(regmap, TMP464_CONFIG_REG, &regval);
+ if (err)
+ return err;
+ data->config_orig = regval;
+ err = devm_add_action_or_reset(dev, tmp464_restore_config, data);
+ if (err)
+ return err;
+
+ /* Default to 500 ms update interval */
+ err = regmap_update_bits(regmap, TMP464_CONFIG_REG,
+ TMP464_CONFIG_CONVERSION_RATE_MASK | TMP464_CONFIG_SHUTDOWN,
+ BIT(TMP464_CONFIG_CONVERSION_RATE_B0) |
+ BIT(TMP464_CONFIG_CONVERSION_RATE_B2));
+ if (err)
+ return err;
+
+ data->update_interval = 500;
+
+ return tmp464_enable_channels(data);
+}
+
+static int tmp464_detect(struct i2c_client *client,
+ struct i2c_board_info *info)
+{
+ struct i2c_adapter *adapter = client->adapter;
+ char *name, *chip;
+ int reg;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA))
+ return -ENODEV;
+
+ reg = i2c_smbus_read_word_swapped(client, TMP464_MANUFACTURER_ID_REG);
+ if (reg < 0)
+ return reg;
+ if (reg != TMP464_MANUFACTURER_ID)
+ return -ENODEV;
+
+ /* Check for "always return zero" bits */
+ reg = i2c_smbus_read_word_swapped(client, TMP464_THERM_STATUS_REG);
+ if (reg < 0)
+ return reg;
+ if (reg & 0x1f)
+ return -ENODEV;
+ reg = i2c_smbus_read_word_swapped(client, TMP464_THERM2_STATUS_REG);
+ if (reg < 0)
+ return reg;
+ if (reg & 0x1f)
+ return -ENODEV;
+
+ reg = i2c_smbus_read_word_swapped(client, TMP464_DEVICE_ID_REG);
+ if (reg < 0)
+ return reg;
+ switch (reg) {
+ case TMP464_DEVICE_ID:
+ name = "tmp464";
+ chip = "TMP464";
+ break;
+ case TMP468_DEVICE_ID:
+ name = "tmp468";
+ chip = "TMP468";
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ strscpy(info->type, name, I2C_NAME_SIZE);
+ dev_info(&adapter->dev, "Detected TI %s chip at 0x%02x\n", chip, client->addr);
+
+ return 0;
+}
+
+static int tmp464_probe_child_from_dt(struct device *dev,
+ struct device_node *child,
+ struct tmp464_data *data)
+
+{
+ struct regmap *regmap = data->regmap;
+ u32 channel;
+ s32 nfactor;
+ int err;
+
+ err = of_property_read_u32(child, "reg", &channel);
+ if (err) {
+ dev_err(dev, "missing reg property of %pOFn\n", child);
+ return err;
+ }
+
+ if (channel >= data->channels) {
+ dev_err(dev, "invalid reg %d of %pOFn\n", channel, child);
+ return -EINVAL;
+ }
+
+ of_property_read_string(child, "label", &data->channel[channel].label);
+
+ data->channel[channel].enabled = of_device_is_available(child);
+
+ err = of_property_read_s32(child, "ti,n-factor", &nfactor);
+ if (err && err != -EINVAL)
+ return err;
+ if (!err) {
+ if (channel == 0) {
+ dev_err(dev, "n-factor can't be set for internal channel\n");
+ return -EINVAL;
+ }
+ if (nfactor > 127 || nfactor < -128) {
+ dev_err(dev, "n-factor for channel %d invalid (%d)\n",
+ channel, nfactor);
+ return -EINVAL;
+ }
+ err = regmap_write(regmap, TMP464_N_FACTOR_REG(channel),
+ (nfactor << 8) & 0xff00);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int tmp464_probe_from_dt(struct device *dev, struct tmp464_data *data)
+{
+ const struct device_node *np = dev->of_node;
+ struct device_node *child;
+ int err;
+
+ for_each_child_of_node(np, child) {
+ if (strcmp(child->name, "channel"))
+ continue;
+
+ err = tmp464_probe_child_from_dt(dev, child, data);
+ if (err) {
+ of_node_put(child);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static const struct hwmon_ops tmp464_ops = {
+ .is_visible = tmp464_is_visible,
+ .read = tmp464_read,
+ .read_string = tmp464_read_string,
+ .write = tmp464_write,
+};
+
+static const struct hwmon_channel_info *tmp464_info[] = {
+ HWMON_CHANNEL_INFO(chip,
+ HWMON_C_UPDATE_INTERVAL),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST | HWMON_T_CRIT |
+ HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM |
+ HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+ HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+ HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE),
+ NULL
+};
+
+static const struct hwmon_chip_info tmp464_chip_info = {
+ .ops = &tmp464_ops,
+ .info = tmp464_info,
+};
+
+/* regmap */
+
+static bool tmp464_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+ return (reg < TMP464_TEMP_REG(TMP468_NUM_CHANNELS) ||
+ reg == TMP464_THERM_STATUS_REG ||
+ reg == TMP464_THERM2_STATUS_REG ||
+ reg == TMP464_REMOTE_OPEN_REG);
+}
+
+static const struct regmap_config tmp464_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .max_register = TMP464_DEVICE_ID_REG,
+ .volatile_reg = tmp464_is_volatile_reg,
+ .val_format_endian = REGMAP_ENDIAN_BIG,
+ .cache_type = REGCACHE_RBTREE,
+ .use_single_read = true,
+ .use_single_write = true,
+};
+
+static int tmp464_probe(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ struct device *hwmon_dev;
+ struct tmp464_data *data;
+ int i, err;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA)) {
+ dev_err(&client->dev, "i2c functionality check failed\n");
+ return -ENODEV;
+ }
+ data = devm_kzalloc(dev, sizeof(struct tmp464_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ mutex_init(&data->update_lock);
+
+ if (dev->of_node)
+ data->channels = (int)(unsigned long)of_device_get_match_data(&client->dev);
+ else
+ data->channels = i2c_match_id(tmp464_id, client)->driver_data;
+
+ data->regmap = devm_regmap_init_i2c(client, &tmp464_regmap_config);
+ if (IS_ERR(data->regmap))
+ return PTR_ERR(data->regmap);
+
+ for (i = 0; i < data->channels; i++)
+ data->channel[i].enabled = true;
+
+ err = tmp464_init_client(dev, data);
+ if (err)
+ return err;
+
+ if (dev->of_node) {
+ err = tmp464_probe_from_dt(dev, data);
+ if (err)
+ return err;
+ }
+
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+ data, &tmp464_chip_info, NULL);
+ return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static struct i2c_driver tmp464_driver = {
+ .class = I2C_CLASS_HWMON,
+ .driver = {
+ .name = "tmp464",
+ .of_match_table = of_match_ptr(tmp464_of_match),
+ },
+ .probe_new = tmp464_probe,
+ .id_table = tmp464_id,
+ .detect = tmp464_detect,
+ .address_list = normal_i2c,
+};
+
+module_i2c_driver(tmp464_driver);
+
+MODULE_AUTHOR("Agathe Porte <agathe.porte@nokia.com>");
+MODULE_DESCRIPTION("Texas Instruments TMP464 temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/vexpress-hwmon.c b/drivers/hwmon/vexpress-hwmon.c
index 44d798be3d59..2ac5fb96bba4 100644
--- a/drivers/hwmon/vexpress-hwmon.c
+++ b/drivers/hwmon/vexpress-hwmon.c
@@ -207,7 +207,6 @@ MODULE_DEVICE_TABLE(of, vexpress_hwmon_of_match);
static int vexpress_hwmon_probe(struct platform_device *pdev)
{
- const struct of_device_id *match;
struct vexpress_hwmon_data *data;
const struct vexpress_hwmon_type *type;
@@ -216,10 +215,9 @@ static int vexpress_hwmon_probe(struct platform_device *pdev)
return -ENOMEM;
platform_set_drvdata(pdev, data);
- match = of_match_device(vexpress_hwmon_of_match, &pdev->dev);
- if (!match)
+ type = of_device_get_match_data(&pdev->dev);
+ if (!type)
return -ENODEV;
- type = match->data;
data->reg = devm_regmap_init_vexpress_config(&pdev->dev);
if (IS_ERR(data->reg))
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 432ade0842f6..70a07b4e9967 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -658,13 +658,11 @@ static void msc_buffer_clear_hw_header(struct msc *msc)
list_for_each_entry(win, &msc->win_list, entry) {
unsigned int blk;
- size_t hw_sz = sizeof(struct msc_block_desc) -
- offsetof(struct msc_block_desc, hw_tag);
for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
struct msc_block_desc *bdesc = sg_virt(sg);
- memset(&bdesc->hw_tag, 0, hw_sz);
+ memset_startat(bdesc, 0, hw_tag);
}
}
}
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 42da31c1ab70..8a6c6ee28556 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -488,7 +488,7 @@ config I2C_BRCMSTB
config I2C_CADENCE
tristate "Cadence I2C Controller"
- depends on ARCH_ZYNQ || ARM64 || XTENSA
+ depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST
help
Say yes here to select Cadence I2C Host Controller. This controller is
e.g. used by Xilinx Zynq.
@@ -680,7 +680,7 @@ config I2C_IMG
config I2C_IMX
tristate "IMX I2C interface"
- depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE
+ depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE || COMPILE_TEST
select I2C_SLAVE
help
Say Y here if you want to use the IIC bus controller on
@@ -935,7 +935,7 @@ config I2C_QCOM_GENI
config I2C_QUP
tristate "Qualcomm QUP based I2C controller"
- depends on ARCH_QCOM
+ depends on ARCH_QCOM || COMPILE_TEST
help
If you say yes to this option, support will be included for the
built-in I2C interface on the Qualcomm SoCs.
diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c
index dfc534065595..5149454eef4a 100644
--- a/drivers/i2c/busses/i2c-bcm2835.c
+++ b/drivers/i2c/busses/i2c-bcm2835.c
@@ -23,6 +23,11 @@
#define BCM2835_I2C_FIFO 0x10
#define BCM2835_I2C_DIV 0x14
#define BCM2835_I2C_DEL 0x18
+/*
+ * 16-bit field for the number of SCL cycles to wait after rising SCL
+ * before deciding the slave is not responding. 0 disables the
+ * timeout detection.
+ */
#define BCM2835_I2C_CLKT 0x1c
#define BCM2835_I2C_C_READ BIT(0)
@@ -474,6 +479,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
adap->dev.of_node = pdev->dev.of_node;
adap->quirks = of_device_get_match_data(&pdev->dev);
+ /*
+ * Disable the hardware clock stretching timeout. SMBUS
+ * specifies a limit for how long the device can stretch the
+ * clock, but core I2C doesn't.
+ */
+ bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0);
bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0);
ret = i2c_add_adapter(adap);
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 490ee3962645..b00f35c0b066 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -673,7 +673,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev)
/* set the data in/out register size for compatible SoCs */
if (of_device_is_compatible(dev->device->of_node,
- "brcmstb,brcmper-i2c"))
+ "brcm,brcmper-i2c"))
dev->data_regsz = sizeof(u8);
else
dev->data_regsz = sizeof(u32);
diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c
index c1de8eb66169..cf54f1cb4c57 100644
--- a/drivers/i2c/busses/i2c-qcom-cci.c
+++ b/drivers/i2c/busses/i2c-qcom-cci.c
@@ -558,7 +558,7 @@ static int cci_probe(struct platform_device *pdev)
cci->master[idx].adap.quirks = &cci->data->quirks;
cci->master[idx].adap.algo = &cci_algo;
cci->master[idx].adap.dev.parent = dev;
- cci->master[idx].adap.dev.of_node = child;
+ cci->master[idx].adap.dev.of_node = of_node_get(child);
cci->master[idx].master = idx;
cci->master[idx].cci = cci;
@@ -643,8 +643,10 @@ static int cci_probe(struct platform_device *pdev)
continue;
ret = i2c_add_adapter(&cci->master[i].adap);
- if (ret < 0)
+ if (ret < 0) {
+ of_node_put(cci->master[i].adap.dev.of_node);
goto error_i2c;
+ }
}
pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
@@ -655,9 +657,11 @@ static int cci_probe(struct platform_device *pdev)
return 0;
error_i2c:
- for (; i >= 0; i--) {
- if (cci->master[i].cci)
+ for (--i ; i >= 0; i--) {
+ if (cci->master[i].cci) {
i2c_del_adapter(&cci->master[i].adap);
+ of_node_put(cci->master[i].adap.dev.of_node);
+ }
}
error:
disable_irq(cci->irq);
@@ -673,8 +677,10 @@ static int cci_remove(struct platform_device *pdev)
int i;
for (i = 0; i < cci->data->num_masters; i++) {
- if (cci->master[i].cci)
+ if (cci->master[i].cci) {
i2c_del_adapter(&cci->master[i].adap);
+ of_node_put(cci->master[i].adap.dev.of_node);
+ }
cci_halt(cci, i);
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 0b66e25c0e2d..b7640cfe0020 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -64,6 +64,7 @@ static struct cpuidle_driver intel_idle_driver = {
/* intel_idle.max_cstate=0 disables driver */
static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int disabled_states_mask;
+static unsigned int preferred_states_mask;
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
@@ -121,9 +122,6 @@ static unsigned int mwait_substates __initdata;
* If the local APIC timer is not known to be reliable in the target idle state,
* enable one-shot tick broadcasting for the target CPU before executing MWAIT.
*
- * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
- * flushing user TLBs.
- *
* Must be called under local_irq_disable().
*/
static __cpuidle int intel_idle(struct cpuidle_device *dev,
@@ -761,6 +759,46 @@ static struct cpuidle_state icx_cstates[] __initdata = {
.enter = NULL }
};
+/*
+ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+ * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1 and disable C1E by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state spr_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
+ CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 290,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
static struct cpuidle_state atom_cstates[] __initdata = {
{
.name = "C1E",
@@ -1104,6 +1142,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_spr __initconst = {
+ .state_table = spr_cstates,
+ .disable_promotion_to_c1e = true,
+ .use_acpi = true,
+};
+
static const struct idle_cpu idle_cpu_avn __initconst = {
.state_table = avn_cstates,
.disable_promotion_to_c1e = true,
@@ -1166,6 +1210,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
@@ -1353,6 +1398,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
+static void c1e_promotion_enable(void);
+
/**
* ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
*
@@ -1523,6 +1570,41 @@ static void __init skx_idle_state_table_update(void)
}
}
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+ unsigned long long msr;
+
+ /* Check if user prefers C1E over C1. */
+ if (preferred_states_mask & BIT(2)) {
+ if (preferred_states_mask & BIT(1))
+ /* Both can't be enabled, stick to the defaults. */
+ return;
+
+ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+ spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+
+ /* Enable C1E using the "C1E promotion" bit. */
+ c1e_promotion_enable();
+ disable_promotion_to_c1e = false;
+ }
+
+ /*
+ * By default, the C6 state assumes the worst-case scenario of package
+ * C6. However, if PC6 is disabled, we update the numbers to match
+ * core C6.
+ */
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+ /* Limit value 2 and above allow for PC6. */
+ if ((msr & 0x7) < 2) {
+ spr_cstates[2].exit_latency = 190;
+ spr_cstates[2].target_residency = 600;
+ }
+}
+
static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
{
unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1557,6 +1639,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SKYLAKE_X:
skx_idle_state_table_update();
break;
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ spr_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1629,6 +1714,15 @@ static void auto_demotion_disable(void)
wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
}
+static void c1e_promotion_enable(void)
+{
+ unsigned long long msr_bits;
+
+ rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ msr_bits |= 0x2;
+ wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
static void c1e_promotion_disable(void)
{
unsigned long long msr_bits;
@@ -1798,3 +1892,14 @@ module_param(max_cstate, int, 0444);
*/
module_param_named(states_off, disabled_states_mask, uint, 0444);
MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
+/*
+ * Some platforms come with mutually exclusive C-states, so that if one is
+ * enabled, the other C-states must not be used. Example: C1 and C1E on
+ * Sapphire Rapids platform. This parameter allows for selecting the
+ * preferred C-states among the groups of mutually exclusive C-states - the
+ * selected C-states will be registered, the other C-states from the mutually
+ * exclusive group won't be registered. If the platform has no mutually
+ * exclusive C-states, this parameter has no effect.
+ */
+module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
+MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
diff --git a/drivers/iio/accel/bma400_spi.c b/drivers/iio/accel/bma400_spi.c
index 9f622e37477b..9040a717b247 100644
--- a/drivers/iio/accel/bma400_spi.c
+++ b/drivers/iio/accel/bma400_spi.c
@@ -87,11 +87,9 @@ static int bma400_spi_probe(struct spi_device *spi)
return bma400_probe(&spi->dev, regmap, id->name);
}
-static int bma400_spi_remove(struct spi_device *spi)
+static void bma400_spi_remove(struct spi_device *spi)
{
bma400_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id bma400_spi_ids[] = {
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index e6081dd0a880..d11f668016a6 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -1783,11 +1783,14 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(dev, "Unable to register iio device\n");
- goto err_trigger_unregister;
+ goto err_pm_cleanup;
}
return 0;
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
err_trigger_unregister:
bmc150_accel_unregister_triggers(data, BMC150_ACCEL_TRIGGERS - 1);
err_buffer_cleanup:
diff --git a/drivers/iio/accel/bmc150-accel-spi.c b/drivers/iio/accel/bmc150-accel-spi.c
index 11559567cb39..80007cc2d044 100644
--- a/drivers/iio/accel/bmc150-accel-spi.c
+++ b/drivers/iio/accel/bmc150-accel-spi.c
@@ -35,11 +35,9 @@ static int bmc150_accel_probe(struct spi_device *spi)
true);
}
-static int bmc150_accel_remove(struct spi_device *spi)
+static void bmc150_accel_remove(struct spi_device *spi)
{
bmc150_accel_core_remove(&spi->dev);
-
- return 0;
}
static const struct acpi_device_id bmc150_accel_acpi_match[] = {
diff --git a/drivers/iio/accel/bmi088-accel-spi.c b/drivers/iio/accel/bmi088-accel-spi.c
index 758ad2f12896..06d99d9949f3 100644
--- a/drivers/iio/accel/bmi088-accel-spi.c
+++ b/drivers/iio/accel/bmi088-accel-spi.c
@@ -56,11 +56,9 @@ static int bmi088_accel_probe(struct spi_device *spi)
true);
}
-static int bmi088_accel_remove(struct spi_device *spi)
+static void bmi088_accel_remove(struct spi_device *spi)
{
bmi088_accel_core_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id bmi088_accel_id[] = {
diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
index 32989d91b982..f7fd9e046588 100644
--- a/drivers/iio/accel/fxls8962af-core.c
+++ b/drivers/iio/accel/fxls8962af-core.c
@@ -173,12 +173,20 @@ struct fxls8962af_data {
u16 upper_thres;
};
-const struct regmap_config fxls8962af_regmap_conf = {
+const struct regmap_config fxls8962af_i2c_regmap_conf = {
.reg_bits = 8,
.val_bits = 8,
.max_register = FXLS8962AF_MAX_REG,
};
-EXPORT_SYMBOL_GPL(fxls8962af_regmap_conf);
+EXPORT_SYMBOL_GPL(fxls8962af_i2c_regmap_conf);
+
+const struct regmap_config fxls8962af_spi_regmap_conf = {
+ .reg_bits = 8,
+ .pad_bits = 8,
+ .val_bits = 8,
+ .max_register = FXLS8962AF_MAX_REG,
+};
+EXPORT_SYMBOL_GPL(fxls8962af_spi_regmap_conf);
enum {
fxls8962af_idx_x,
diff --git a/drivers/iio/accel/fxls8962af-i2c.c b/drivers/iio/accel/fxls8962af-i2c.c
index cfb004b20455..6bde9891effb 100644
--- a/drivers/iio/accel/fxls8962af-i2c.c
+++ b/drivers/iio/accel/fxls8962af-i2c.c
@@ -18,7 +18,7 @@ static int fxls8962af_probe(struct i2c_client *client)
{
struct regmap *regmap;
- regmap = devm_regmap_init_i2c(client, &fxls8962af_regmap_conf);
+ regmap = devm_regmap_init_i2c(client, &fxls8962af_i2c_regmap_conf);
if (IS_ERR(regmap)) {
dev_err(&client->dev, "Failed to initialize i2c regmap\n");
return PTR_ERR(regmap);
diff --git a/drivers/iio/accel/fxls8962af-spi.c b/drivers/iio/accel/fxls8962af-spi.c
index 57108d3d480b..6f4dff3238d3 100644
--- a/drivers/iio/accel/fxls8962af-spi.c
+++ b/drivers/iio/accel/fxls8962af-spi.c
@@ -18,7 +18,7 @@ static int fxls8962af_probe(struct spi_device *spi)
{
struct regmap *regmap;
- regmap = devm_regmap_init_spi(spi, &fxls8962af_regmap_conf);
+ regmap = devm_regmap_init_spi(spi, &fxls8962af_spi_regmap_conf);
if (IS_ERR(regmap)) {
dev_err(&spi->dev, "Failed to initialize spi regmap\n");
return PTR_ERR(regmap);
diff --git a/drivers/iio/accel/fxls8962af.h b/drivers/iio/accel/fxls8962af.h
index b67572c3ef06..9cbe98c3ba9a 100644
--- a/drivers/iio/accel/fxls8962af.h
+++ b/drivers/iio/accel/fxls8962af.h
@@ -17,6 +17,7 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq);
int fxls8962af_core_remove(struct device *dev);
extern const struct dev_pm_ops fxls8962af_pm_ops;
-extern const struct regmap_config fxls8962af_regmap_conf;
+extern const struct regmap_config fxls8962af_i2c_regmap_conf;
+extern const struct regmap_config fxls8962af_spi_regmap_conf;
#endif /* _FXLS8962AF_H_ */
diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index 0fe570316848..ac74cdcd2bc8 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -1590,11 +1590,14 @@ static int kxcjk1013_probe(struct i2c_client *client,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(&client->dev, "unable to register iio device\n");
- goto err_buffer_cleanup;
+ goto err_pm_cleanup;
}
return 0;
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(&client->dev);
+ pm_runtime_disable(&client->dev);
err_buffer_cleanup:
iio_triggered_buffer_cleanup(indio_dev);
err_trigger_unregister:
diff --git a/drivers/iio/accel/kxsd9-spi.c b/drivers/iio/accel/kxsd9-spi.c
index 441e6b764281..57c451cfb9e5 100644
--- a/drivers/iio/accel/kxsd9-spi.c
+++ b/drivers/iio/accel/kxsd9-spi.c
@@ -32,11 +32,9 @@ static int kxsd9_spi_probe(struct spi_device *spi)
spi_get_device_id(spi)->name);
}
-static int kxsd9_spi_remove(struct spi_device *spi)
+static void kxsd9_spi_remove(struct spi_device *spi)
{
kxsd9_common_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id kxsd9_spi_id[] = {
diff --git a/drivers/iio/accel/mma7455_spi.c b/drivers/iio/accel/mma7455_spi.c
index ecf690692dcc..b746031551a3 100644
--- a/drivers/iio/accel/mma7455_spi.c
+++ b/drivers/iio/accel/mma7455_spi.c
@@ -22,11 +22,9 @@ static int mma7455_spi_probe(struct spi_device *spi)
return mma7455_core_probe(&spi->dev, regmap, id->name);
}
-static int mma7455_spi_remove(struct spi_device *spi)
+static void mma7455_spi_remove(struct spi_device *spi)
{
mma7455_core_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id mma7455_spi_ids[] = {
diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c
index 4c359fb05480..c53a3398b14c 100644
--- a/drivers/iio/accel/mma9551.c
+++ b/drivers/iio/accel/mma9551.c
@@ -495,11 +495,14 @@ static int mma9551_probe(struct i2c_client *client,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(&client->dev, "unable to register iio device\n");
- goto out_poweroff;
+ goto err_pm_cleanup;
}
return 0;
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(&client->dev);
+ pm_runtime_disable(&client->dev);
out_poweroff:
mma9551_set_device_state(client, false);
diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c
index 0570ab1cc064..5ff6bc70708b 100644
--- a/drivers/iio/accel/mma9553.c
+++ b/drivers/iio/accel/mma9553.c
@@ -1134,12 +1134,15 @@ static int mma9553_probe(struct i2c_client *client,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(&client->dev, "unable to register iio device\n");
- goto out_poweroff;
+ goto err_pm_cleanup;
}
dev_dbg(&indio_dev->dev, "Registered device %s\n", name);
return 0;
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(&client->dev);
+ pm_runtime_disable(&client->dev);
out_poweroff:
mma9551_set_device_state(client, false);
return ret;
diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c
index 43ecacbdc95a..83c81072511e 100644
--- a/drivers/iio/accel/sca3000.c
+++ b/drivers/iio/accel/sca3000.c
@@ -1524,7 +1524,7 @@ error_ret:
return ret;
}
-static int sca3000_remove(struct spi_device *spi)
+static void sca3000_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct sca3000_state *st = iio_priv(indio_dev);
@@ -1535,8 +1535,6 @@ static int sca3000_remove(struct spi_device *spi)
sca3000_stop_all_interrupts(st);
if (spi->irq)
free_irq(spi->irq, indio_dev);
-
- return 0;
}
static const struct spi_device_id sca3000_id[] = {
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index bc2cfa5f9592..b400bbe291aa 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -76,7 +76,7 @@
#define AD7124_CONFIG_REF_SEL(x) FIELD_PREP(AD7124_CONFIG_REF_SEL_MSK, x)
#define AD7124_CONFIG_PGA_MSK GENMASK(2, 0)
#define AD7124_CONFIG_PGA(x) FIELD_PREP(AD7124_CONFIG_PGA_MSK, x)
-#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(7, 6)
+#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(6, 5)
#define AD7124_CONFIG_IN_BUFF(x) FIELD_PREP(AD7124_CONFIG_IN_BUFF_MSK, x)
/* AD7124_FILTER_X */
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 1d345d66742d..c17d9b5fbaf6 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -479,7 +479,7 @@ error_disable_reg:
return ret;
}
-static int ad7266_remove(struct spi_device *spi)
+static void ad7266_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad7266_state *st = iio_priv(indio_dev);
@@ -488,8 +488,6 @@ static int ad7266_remove(struct spi_device *spi)
iio_triggered_buffer_cleanup(indio_dev);
if (!IS_ERR(st->reg))
regulator_disable(st->reg);
-
- return 0;
}
static const struct spi_device_id ad7266_id[] = {
diff --git a/drivers/iio/adc/ltc2496.c b/drivers/iio/adc/ltc2496.c
index dd956a7c216e..5a55f79f2574 100644
--- a/drivers/iio/adc/ltc2496.c
+++ b/drivers/iio/adc/ltc2496.c
@@ -78,13 +78,11 @@ static int ltc2496_probe(struct spi_device *spi)
return ltc2497core_probe(dev, indio_dev);
}
-static int ltc2496_remove(struct spi_device *spi)
+static void ltc2496_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
ltc2497core_remove(indio_dev);
-
- return 0;
}
static const struct of_device_id ltc2496_of_match[] = {
diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c
index 8d1cff28cae0..b4c69acb33e3 100644
--- a/drivers/iio/adc/mcp320x.c
+++ b/drivers/iio/adc/mcp320x.c
@@ -459,15 +459,13 @@ reg_disable:
return ret;
}
-static int mcp320x_remove(struct spi_device *spi)
+static void mcp320x_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct mcp320x *adc = iio_priv(indio_dev);
iio_device_unregister(indio_dev);
regulator_disable(adc->reg);
-
- return 0;
}
static const struct of_device_id mcp320x_dt_ids[] = {
diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c
index 13535f148c4c..1cb4590fe412 100644
--- a/drivers/iio/adc/mcp3911.c
+++ b/drivers/iio/adc/mcp3911.c
@@ -321,7 +321,7 @@ reg_disable:
return ret;
}
-static int mcp3911_remove(struct spi_device *spi)
+static void mcp3911_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct mcp3911 *adc = iio_priv(indio_dev);
@@ -331,8 +331,6 @@ static int mcp3911_remove(struct spi_device *spi)
clk_disable_unprepare(adc->clki);
if (adc->vref)
regulator_disable(adc->vref);
-
- return 0;
}
static const struct of_device_id mcp3911_dt_ids[] = {
diff --git a/drivers/iio/adc/men_z188_adc.c b/drivers/iio/adc/men_z188_adc.c
index 42ea8bc7e780..adc5ceaef8c9 100644
--- a/drivers/iio/adc/men_z188_adc.c
+++ b/drivers/iio/adc/men_z188_adc.c
@@ -103,6 +103,7 @@ static int men_z188_probe(struct mcb_device *dev,
struct z188_adc *adc;
struct iio_dev *indio_dev;
struct resource *mem;
+ int ret;
indio_dev = devm_iio_device_alloc(&dev->dev, sizeof(struct z188_adc));
if (!indio_dev)
@@ -128,8 +129,14 @@ static int men_z188_probe(struct mcb_device *dev,
adc->mem = mem;
mcb_set_drvdata(dev, indio_dev);
- return iio_device_register(indio_dev);
+ ret = iio_device_register(indio_dev);
+ if (ret)
+ goto err_unmap;
+
+ return 0;
+err_unmap:
+ iounmap(adc->base);
err:
mcb_release_mem(mem);
return -ENXIO;
diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c
index 6eb62b564dae..59d75d09604f 100644
--- a/drivers/iio/adc/ti-adc12138.c
+++ b/drivers/iio/adc/ti-adc12138.c
@@ -503,7 +503,7 @@ err_clk_disable:
return ret;
}
-static int adc12138_remove(struct spi_device *spi)
+static void adc12138_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct adc12138 *adc = iio_priv(indio_dev);
@@ -514,8 +514,6 @@ static int adc12138_remove(struct spi_device *spi)
regulator_disable(adc->vref_n);
regulator_disable(adc->vref_p);
clk_disable_unprepare(adc->cclk);
-
- return 0;
}
static const struct of_device_id adc12138_dt_ids[] = {
diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c
index a7efa3eada2c..e3658b969c5b 100644
--- a/drivers/iio/adc/ti-ads7950.c
+++ b/drivers/iio/adc/ti-ads7950.c
@@ -662,7 +662,7 @@ error_destroy_mutex:
return ret;
}
-static int ti_ads7950_remove(struct spi_device *spi)
+static void ti_ads7950_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ti_ads7950_state *st = iio_priv(indio_dev);
@@ -672,8 +672,6 @@ static int ti_ads7950_remove(struct spi_device *spi)
iio_triggered_buffer_cleanup(indio_dev);
regulator_disable(st->reg);
mutex_destroy(&st->slock);
-
- return 0;
}
static const struct spi_device_id ti_ads7950_id[] = {
diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c
index 2e24717d7f55..22c2583eedd0 100644
--- a/drivers/iio/adc/ti-ads8688.c
+++ b/drivers/iio/adc/ti-ads8688.c
@@ -479,7 +479,7 @@ err_regulator_disable:
return ret;
}
-static int ads8688_remove(struct spi_device *spi)
+static void ads8688_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ads8688_state *st = iio_priv(indio_dev);
@@ -489,8 +489,6 @@ static int ads8688_remove(struct spi_device *spi)
if (!IS_ERR(st->reg))
regulator_disable(st->reg);
-
- return 0;
}
static const struct spi_device_id ads8688_id[] = {
diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c
index 403b787f9f7e..2406eda9dfc6 100644
--- a/drivers/iio/adc/ti-tlc4541.c
+++ b/drivers/iio/adc/ti-tlc4541.c
@@ -224,7 +224,7 @@ error_disable_reg:
return ret;
}
-static int tlc4541_remove(struct spi_device *spi)
+static void tlc4541_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct tlc4541_state *st = iio_priv(indio_dev);
@@ -232,8 +232,6 @@ static int tlc4541_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
iio_triggered_buffer_cleanup(indio_dev);
regulator_disable(st->reg);
-
- return 0;
}
static const struct of_device_id tlc4541_dt_ids[] = {
diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c
index d84ae6b008c1..e8fc4d01f30b 100644
--- a/drivers/iio/adc/ti-tsc2046.c
+++ b/drivers/iio/adc/ti-tsc2046.c
@@ -388,7 +388,7 @@ static int tsc2046_adc_update_scan_mode(struct iio_dev *indio_dev,
mutex_lock(&priv->slock);
size = 0;
- for_each_set_bit(ch_idx, active_scan_mask, indio_dev->num_channels) {
+ for_each_set_bit(ch_idx, active_scan_mask, ARRAY_SIZE(priv->l)) {
size += tsc2046_adc_group_set_layout(priv, group, ch_idx);
tsc2046_adc_group_set_cmd(priv, group, ch_idx);
group++;
@@ -548,7 +548,7 @@ static int tsc2046_adc_setup_spi_msg(struct tsc2046_adc_priv *priv)
* enabled.
*/
size = 0;
- for (ch_idx = 0; ch_idx < priv->dcfg->num_channels; ch_idx++)
+ for (ch_idx = 0; ch_idx < ARRAY_SIZE(priv->l); ch_idx++)
size += tsc2046_adc_group_set_layout(priv, ch_idx, ch_idx);
priv->tx = devm_kzalloc(&priv->spi->dev, size, GFP_KERNEL);
diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index 5271073bb74e..acd230a6af35 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -134,7 +134,6 @@ struct ad74413r_state {
#define AD74413R_CH_EN_MASK(x) BIT(x)
#define AD74413R_REG_DIN_COMP_OUT 0x25
-#define AD74413R_DIN_COMP_OUT_SHIFT_X(x) x
#define AD74413R_REG_ADC_RESULT_X(x) (0x26 + (x))
#define AD74413R_ADC_RESULT_MAX GENMASK(15, 0)
@@ -288,7 +287,7 @@ static void ad74413r_gpio_set_multiple(struct gpio_chip *chip,
unsigned int offset = 0;
int ret;
- for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) {
+ for_each_set_bit_from(offset, mask, chip->ngpio) {
unsigned int real_offset = st->gpo_gpio_offsets[offset];
ret = ad74413r_set_gpo_config(st, real_offset,
@@ -316,7 +315,7 @@ static int ad74413r_gpio_get(struct gpio_chip *chip, unsigned int offset)
if (ret)
return ret;
- status &= AD74413R_DIN_COMP_OUT_SHIFT_X(real_offset);
+ status &= BIT(real_offset);
return status ? 1 : 0;
}
@@ -334,11 +333,10 @@ static int ad74413r_gpio_get_multiple(struct gpio_chip *chip,
if (ret)
return ret;
- for_each_set_bit_from(offset, mask, AD74413R_CHANNEL_MAX) {
+ for_each_set_bit_from(offset, mask, chip->ngpio) {
unsigned int real_offset = st->comp_gpio_offsets[offset];
- if (val & BIT(real_offset))
- *bits |= offset;
+ __assign_bit(offset, bits, val & BIT(real_offset));
}
return ret;
@@ -840,7 +838,7 @@ static int ad74413r_update_scan_mode(struct iio_dev *indio_dev,
{
struct ad74413r_state *st = iio_priv(indio_dev);
struct spi_transfer *xfer = st->adc_samples_xfer;
- u8 *rx_buf = &st->adc_samples_buf.rx_buf[-1 * AD74413R_FRAME_SIZE];
+ u8 *rx_buf = st->adc_samples_buf.rx_buf;
u8 *tx_buf = st->adc_samples_tx_buf;
unsigned int channel;
int ret = -EINVAL;
@@ -894,9 +892,10 @@ static int ad74413r_update_scan_mode(struct iio_dev *indio_dev,
spi_message_add_tail(xfer, &st->adc_samples_msg);
- xfer++;
tx_buf += AD74413R_FRAME_SIZE;
- rx_buf += AD74413R_FRAME_SIZE;
+ if (xfer != st->adc_samples_xfer)
+ rx_buf += AD74413R_FRAME_SIZE;
+ xfer++;
}
xfer->rx_buf = rx_buf;
diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c
index cfcf18a0bce8..1134ae12e531 100644
--- a/drivers/iio/amplifiers/ad8366.c
+++ b/drivers/iio/amplifiers/ad8366.c
@@ -298,7 +298,7 @@ error_disable_reg:
return ret;
}
-static int ad8366_remove(struct spi_device *spi)
+static void ad8366_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad8366_state *st = iio_priv(indio_dev);
@@ -308,8 +308,6 @@ static int ad8366_remove(struct spi_device *spi)
if (!IS_ERR(reg))
regulator_disable(reg);
-
- return 0;
}
static const struct spi_device_id ad8366_id[] = {
diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c
index 1aee87100038..eafaf4529df5 100644
--- a/drivers/iio/common/ssp_sensors/ssp_dev.c
+++ b/drivers/iio/common/ssp_sensors/ssp_dev.c
@@ -586,7 +586,7 @@ err_setup_irq:
return ret;
}
-static int ssp_remove(struct spi_device *spi)
+static void ssp_remove(struct spi_device *spi)
{
struct ssp_data *data = spi_get_drvdata(spi);
@@ -608,8 +608,6 @@ static int ssp_remove(struct spi_device *spi)
mutex_destroy(&data->pending_lock);
mfd_remove_devices(&spi->dev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/iio/dac/ad5360.c b/drivers/iio/dac/ad5360.c
index 2d3b14c407d8..ecbc6a51d60f 100644
--- a/drivers/iio/dac/ad5360.c
+++ b/drivers/iio/dac/ad5360.c
@@ -521,7 +521,7 @@ error_free_channels:
return ret;
}
-static int ad5360_remove(struct spi_device *spi)
+static void ad5360_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5360_state *st = iio_priv(indio_dev);
@@ -531,8 +531,6 @@ static int ad5360_remove(struct spi_device *spi)
kfree(indio_dev->channels);
regulator_bulk_disable(st->chip_info->num_vrefs, st->vref_reg);
-
- return 0;
}
static const struct spi_device_id ad5360_ids[] = {
diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
index e38860a6a9f3..82e1d9bd773e 100644
--- a/drivers/iio/dac/ad5380.c
+++ b/drivers/iio/dac/ad5380.c
@@ -488,11 +488,9 @@ static int ad5380_spi_probe(struct spi_device *spi)
return ad5380_probe(&spi->dev, regmap, id->driver_data, id->name);
}
-static int ad5380_spi_remove(struct spi_device *spi)
+static void ad5380_spi_remove(struct spi_device *spi)
{
ad5380_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id ad5380_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 1c9b54c012a7..14cfabacbea5 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -491,11 +491,9 @@ static int ad5446_spi_probe(struct spi_device *spi)
&ad5446_spi_chip_info[id->driver_data]);
}
-static int ad5446_spi_remove(struct spi_device *spi)
+static void ad5446_spi_remove(struct spi_device *spi)
{
ad5446_remove(&spi->dev);
-
- return 0;
}
static struct spi_driver ad5446_spi_driver = {
diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c
index f5e93c6acc9d..bad9bdaafa94 100644
--- a/drivers/iio/dac/ad5449.c
+++ b/drivers/iio/dac/ad5449.c
@@ -330,7 +330,7 @@ error_disable_reg:
return ret;
}
-static int ad5449_spi_remove(struct spi_device *spi)
+static void ad5449_spi_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5449 *st = iio_priv(indio_dev);
@@ -338,8 +338,6 @@ static int ad5449_spi_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg);
-
- return 0;
}
static const struct spi_device_id ad5449_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c
index b631261efa97..8507573aa13e 100644
--- a/drivers/iio/dac/ad5504.c
+++ b/drivers/iio/dac/ad5504.c
@@ -336,7 +336,7 @@ error_disable_reg:
return ret;
}
-static int ad5504_remove(struct spi_device *spi)
+static void ad5504_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5504_state *st = iio_priv(indio_dev);
@@ -345,8 +345,6 @@ static int ad5504_remove(struct spi_device *spi)
if (!IS_ERR(st->reg))
regulator_disable(st->reg);
-
- return 0;
}
static const struct spi_device_id ad5504_id[] = {
diff --git a/drivers/iio/dac/ad5592r.c b/drivers/iio/dac/ad5592r.c
index 6bfd7951e18c..0f7abfa75bec 100644
--- a/drivers/iio/dac/ad5592r.c
+++ b/drivers/iio/dac/ad5592r.c
@@ -130,11 +130,9 @@ static int ad5592r_spi_probe(struct spi_device *spi)
return ad5592r_probe(&spi->dev, id->name, &ad5592r_rw_ops);
}
-static int ad5592r_spi_remove(struct spi_device *spi)
+static void ad5592r_spi_remove(struct spi_device *spi)
{
ad5592r_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id ad5592r_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c
index 3c98941b9f99..371e812850eb 100644
--- a/drivers/iio/dac/ad5624r_spi.c
+++ b/drivers/iio/dac/ad5624r_spi.c
@@ -293,7 +293,7 @@ error_disable_reg:
return ret;
}
-static int ad5624r_remove(struct spi_device *spi)
+static void ad5624r_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5624r_state *st = iio_priv(indio_dev);
@@ -301,8 +301,6 @@ static int ad5624r_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
if (!IS_ERR(st->reg))
regulator_disable(st->reg);
-
- return 0;
}
static const struct spi_device_id ad5624r_id[] = {
diff --git a/drivers/iio/dac/ad5686-spi.c b/drivers/iio/dac/ad5686-spi.c
index 2628810fdbb1..d26fb29b6b04 100644
--- a/drivers/iio/dac/ad5686-spi.c
+++ b/drivers/iio/dac/ad5686-spi.c
@@ -95,11 +95,9 @@ static int ad5686_spi_probe(struct spi_device *spi)
ad5686_spi_write, ad5686_spi_read);
}
-static int ad5686_spi_remove(struct spi_device *spi)
+static void ad5686_spi_remove(struct spi_device *spi)
{
ad5686_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id ad5686_spi_id[] = {
diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c
index e37e095e94fc..4cb8471db81e 100644
--- a/drivers/iio/dac/ad5761.c
+++ b/drivers/iio/dac/ad5761.c
@@ -394,7 +394,7 @@ disable_regulator_err:
return ret;
}
-static int ad5761_remove(struct spi_device *spi)
+static void ad5761_remove(struct spi_device *spi)
{
struct iio_dev *iio_dev = spi_get_drvdata(spi);
struct ad5761_state *st = iio_priv(iio_dev);
@@ -403,8 +403,6 @@ static int ad5761_remove(struct spi_device *spi)
if (!IS_ERR_OR_NULL(st->vref_reg))
regulator_disable(st->vref_reg);
-
- return 0;
}
static const struct spi_device_id ad5761_id[] = {
diff --git a/drivers/iio/dac/ad5764.c b/drivers/iio/dac/ad5764.c
index ae089b9145cb..d235a8047ba0 100644
--- a/drivers/iio/dac/ad5764.c
+++ b/drivers/iio/dac/ad5764.c
@@ -332,7 +332,7 @@ error_disable_reg:
return ret;
}
-static int ad5764_remove(struct spi_device *spi)
+static void ad5764_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5764_state *st = iio_priv(indio_dev);
@@ -341,8 +341,6 @@ static int ad5764_remove(struct spi_device *spi)
if (st->chip_info->int_vref == 0)
regulator_bulk_disable(ARRAY_SIZE(st->vref_reg), st->vref_reg);
-
- return 0;
}
static const struct spi_device_id ad5764_ids[] = {
diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c
index 7b4579d73d18..2b14914b4050 100644
--- a/drivers/iio/dac/ad5791.c
+++ b/drivers/iio/dac/ad5791.c
@@ -428,7 +428,7 @@ error_disable_reg_pos:
return ret;
}
-static int ad5791_remove(struct spi_device *spi)
+static void ad5791_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad5791_state *st = iio_priv(indio_dev);
@@ -439,8 +439,6 @@ static int ad5791_remove(struct spi_device *spi)
if (!IS_ERR(st->reg_vss))
regulator_disable(st->reg_vss);
-
- return 0;
}
static const struct spi_device_id ad5791_id[] = {
diff --git a/drivers/iio/dac/ad8801.c b/drivers/iio/dac/ad8801.c
index 5ecfdad54dec..6be35c92d435 100644
--- a/drivers/iio/dac/ad8801.c
+++ b/drivers/iio/dac/ad8801.c
@@ -193,7 +193,7 @@ error_disable_vrefh_reg:
return ret;
}
-static int ad8801_remove(struct spi_device *spi)
+static void ad8801_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ad8801_state *state = iio_priv(indio_dev);
@@ -202,8 +202,6 @@ static int ad8801_remove(struct spi_device *spi)
if (state->vrefl_reg)
regulator_disable(state->vrefl_reg);
regulator_disable(state->vrefh_reg);
-
- return 0;
}
static const struct spi_device_id ad8801_ids[] = {
diff --git a/drivers/iio/dac/ltc1660.c b/drivers/iio/dac/ltc1660.c
index f6ec9bf5815e..c76233c9bb72 100644
--- a/drivers/iio/dac/ltc1660.c
+++ b/drivers/iio/dac/ltc1660.c
@@ -206,15 +206,13 @@ error_disable_reg:
return ret;
}
-static int ltc1660_remove(struct spi_device *spi)
+static void ltc1660_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ltc1660_priv *priv = iio_priv(indio_dev);
iio_device_unregister(indio_dev);
regulator_disable(priv->vref_reg);
-
- return 0;
}
static const struct of_device_id ltc1660_dt_ids[] = {
diff --git a/drivers/iio/dac/ltc2632.c b/drivers/iio/dac/ltc2632.c
index 53e4b887d372..aed46c80757e 100644
--- a/drivers/iio/dac/ltc2632.c
+++ b/drivers/iio/dac/ltc2632.c
@@ -372,7 +372,7 @@ static int ltc2632_probe(struct spi_device *spi)
return iio_device_register(indio_dev);
}
-static int ltc2632_remove(struct spi_device *spi)
+static void ltc2632_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ltc2632_state *st = iio_priv(indio_dev);
@@ -381,8 +381,6 @@ static int ltc2632_remove(struct spi_device *spi)
if (st->vref_reg)
regulator_disable(st->vref_reg);
-
- return 0;
}
static const struct spi_device_id ltc2632_id[] = {
diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c
index 0ae414ee1716..cb9e60e71b91 100644
--- a/drivers/iio/dac/mcp4922.c
+++ b/drivers/iio/dac/mcp4922.c
@@ -172,7 +172,7 @@ error_disable_reg:
return ret;
}
-static int mcp4922_remove(struct spi_device *spi)
+static void mcp4922_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct mcp4922_state *state;
@@ -180,8 +180,6 @@ static int mcp4922_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
state = iio_priv(indio_dev);
regulator_disable(state->vref_reg);
-
- return 0;
}
static const struct spi_device_id mcp4922_id[] = {
diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c
index 6beda2193683..4e1156e6deb2 100644
--- a/drivers/iio/dac/ti-dac082s085.c
+++ b/drivers/iio/dac/ti-dac082s085.c
@@ -313,7 +313,7 @@ err:
return ret;
}
-static int ti_dac_remove(struct spi_device *spi)
+static void ti_dac_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ti_dac_chip *ti_dac = iio_priv(indio_dev);
@@ -321,8 +321,6 @@ static int ti_dac_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
mutex_destroy(&ti_dac->lock);
regulator_disable(ti_dac->vref);
-
- return 0;
}
static const struct of_device_id ti_dac_of_id[] = {
diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c
index 99f275829ec2..e10d17e60ed3 100644
--- a/drivers/iio/dac/ti-dac7311.c
+++ b/drivers/iio/dac/ti-dac7311.c
@@ -292,7 +292,7 @@ err:
return ret;
}
-static int ti_dac_remove(struct spi_device *spi)
+static void ti_dac_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct ti_dac_chip *ti_dac = iio_priv(indio_dev);
@@ -300,7 +300,6 @@ static int ti_dac_remove(struct spi_device *spi)
iio_device_unregister(indio_dev);
mutex_destroy(&ti_dac->lock);
regulator_disable(ti_dac->vref);
- return 0;
}
static const struct of_device_id ti_dac_of_id[] = {
diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c
index 3d9eba716b69..f3521330f6fb 100644
--- a/drivers/iio/frequency/adf4350.c
+++ b/drivers/iio/frequency/adf4350.c
@@ -589,7 +589,7 @@ error_disable_clk:
return ret;
}
-static int adf4350_remove(struct spi_device *spi)
+static void adf4350_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct adf4350_state *st = iio_priv(indio_dev);
@@ -604,8 +604,6 @@ static int adf4350_remove(struct spi_device *spi)
if (!IS_ERR(reg))
regulator_disable(reg);
-
- return 0;
}
static const struct of_device_id adf4350_of_match[] = {
diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c
index 6cdeb50143af..3f3c478e9baa 100644
--- a/drivers/iio/frequency/admv1013.c
+++ b/drivers/iio/frequency/admv1013.c
@@ -348,7 +348,7 @@ static int admv1013_update_mixer_vgate(struct admv1013_state *st)
vcm = regulator_get_voltage(st->reg);
- if (vcm >= 0 && vcm < 1800000)
+ if (vcm < 1800000)
mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
else if (vcm > 1800000 && vcm < 2600000)
mixer_vgate = (2375 * vcm / 1000000 + 125) / 100;
diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 17b939a367ad..81a6d09788bd 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -1188,11 +1188,14 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(dev, "unable to register iio device\n");
- goto err_buffer_cleanup;
+ goto err_pm_cleanup;
}
return 0;
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
err_buffer_cleanup:
iio_triggered_buffer_cleanup(indio_dev);
err_trigger_unregister:
diff --git a/drivers/iio/gyro/bmg160_spi.c b/drivers/iio/gyro/bmg160_spi.c
index 745962e1e423..fc2e453527b9 100644
--- a/drivers/iio/gyro/bmg160_spi.c
+++ b/drivers/iio/gyro/bmg160_spi.c
@@ -27,11 +27,9 @@ static int bmg160_spi_probe(struct spi_device *spi)
return bmg160_core_probe(&spi->dev, regmap, spi->irq, id->name);
}
-static int bmg160_spi_remove(struct spi_device *spi)
+static void bmg160_spi_remove(struct spi_device *spi)
{
bmg160_core_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id bmg160_spi_id[] = {
diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c
index 77ceebef4e34..c3ac169facf9 100644
--- a/drivers/iio/gyro/fxas21002c_spi.c
+++ b/drivers/iio/gyro/fxas21002c_spi.c
@@ -34,11 +34,9 @@ static int fxas21002c_spi_probe(struct spi_device *spi)
return fxas21002c_core_probe(&spi->dev, regmap, spi->irq, id->name);
}
-static int fxas21002c_spi_remove(struct spi_device *spi)
+static void fxas21002c_spi_remove(struct spi_device *spi)
{
fxas21002c_core_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id fxas21002c_spi_id[] = {
diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
index 273f16dcaff8..856ec901b091 100644
--- a/drivers/iio/health/afe4403.c
+++ b/drivers/iio/health/afe4403.c
@@ -570,7 +570,7 @@ err_disable_reg:
return ret;
}
-static int afe4403_remove(struct spi_device *spi)
+static void afe4403_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
struct afe4403_data *afe = iio_priv(indio_dev);
@@ -586,8 +586,6 @@ static int afe4403_remove(struct spi_device *spi)
ret = regulator_disable(afe->regulator);
if (ret)
dev_warn(afe->dev, "Unable to disable regulator\n");
-
- return 0;
}
static const struct spi_device_id afe4403_ids[] = {
diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
index ed129321a14d..f9b4540db1f4 100644
--- a/drivers/iio/imu/adis16480.c
+++ b/drivers/iio/imu/adis16480.c
@@ -1403,6 +1403,7 @@ static int adis16480_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
const struct adis_data *adis16480_data;
+ irq_handler_t trigger_handler = NULL;
struct iio_dev *indio_dev;
struct adis16480 *st;
int ret;
@@ -1474,8 +1475,12 @@ static int adis16480_probe(struct spi_device *spi)
st->clk_freq = st->chip_info->int_clk;
}
+ /* Only use our trigger handler if burst mode is supported */
+ if (adis16480_data->burst_len)
+ trigger_handler = adis16480_trigger_handler;
+
ret = devm_adis_setup_buffer_and_trigger(&st->adis, indio_dev,
- adis16480_trigger_handler);
+ trigger_handler);
if (ret)
return ret;
diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c
index 1dabfd615dab..f89724481df9 100644
--- a/drivers/iio/imu/kmx61.c
+++ b/drivers/iio/imu/kmx61.c
@@ -1385,7 +1385,7 @@ static int kmx61_probe(struct i2c_client *client,
ret = iio_device_register(data->acc_indio_dev);
if (ret < 0) {
dev_err(&client->dev, "Failed to register acc iio device\n");
- goto err_buffer_cleanup_mag;
+ goto err_pm_cleanup;
}
ret = iio_device_register(data->mag_indio_dev);
@@ -1398,6 +1398,9 @@ static int kmx61_probe(struct i2c_client *client,
err_iio_unregister_acc:
iio_device_unregister(data->acc_indio_dev);
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(&client->dev);
+ pm_runtime_disable(&client->dev);
err_buffer_cleanup_mag:
if (client->irq > 0)
iio_triggered_buffer_cleanup(data->mag_indio_dev);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 727b4b6ac696..93f0c6bce502 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -1374,8 +1374,12 @@ static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor,
if (err < 0)
return err;
+ /*
+ * we need to wait for sensor settling time before
+ * reading data in order to avoid corrupted samples
+ */
delay = 1000000000 / sensor->odr;
- usleep_range(delay, 2 * delay);
+ usleep_range(3 * delay, 4 * delay);
err = st_lsm6dsx_read_locked(hw, addr, &data, sizeof(data));
if (err < 0)
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 94eb9f6cf128..208b5193c621 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -1569,9 +1569,17 @@ static long iio_device_buffer_getfd(struct iio_dev *indio_dev, unsigned long arg
}
if (copy_to_user(ival, &fd, sizeof(fd))) {
- put_unused_fd(fd);
- ret = -EFAULT;
- goto error_free_ib;
+ /*
+ * "Leak" the fd, as there's not much we can do about this
+ * anyway. 'fd' might have been closed already, as
+ * anon_inode_getfd() called fd_install() on it, which made
+ * it reachable by userland.
+ *
+ * Instead of allowing a malicious user to play tricks with
+ * us, rely on the process exit path to do any necessary
+ * cleanup, as in releasing the file, if still needed.
+ */
+ return -EFAULT;
}
return 0;
diff --git a/drivers/iio/magnetometer/bmc150_magn.c b/drivers/iio/magnetometer/bmc150_magn.c
index f96f53175349..3d4d21f979fa 100644
--- a/drivers/iio/magnetometer/bmc150_magn.c
+++ b/drivers/iio/magnetometer/bmc150_magn.c
@@ -962,13 +962,14 @@ int bmc150_magn_probe(struct device *dev, struct regmap *regmap,
ret = iio_device_register(indio_dev);
if (ret < 0) {
dev_err(dev, "unable to register iio device\n");
- goto err_disable_runtime_pm;
+ goto err_pm_cleanup;
}
dev_dbg(dev, "Registered device %s\n", name);
return 0;
-err_disable_runtime_pm:
+err_pm_cleanup:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
err_buffer_cleanup:
iio_triggered_buffer_cleanup(indio_dev);
diff --git a/drivers/iio/magnetometer/bmc150_magn_spi.c b/drivers/iio/magnetometer/bmc150_magn_spi.c
index c6ed3ea8460a..4c570412d65c 100644
--- a/drivers/iio/magnetometer/bmc150_magn_spi.c
+++ b/drivers/iio/magnetometer/bmc150_magn_spi.c
@@ -29,11 +29,9 @@ static int bmc150_magn_spi_probe(struct spi_device *spi)
return bmc150_magn_probe(&spi->dev, regmap, spi->irq, id->name);
}
-static int bmc150_magn_spi_remove(struct spi_device *spi)
+static void bmc150_magn_spi_remove(struct spi_device *spi)
{
bmc150_magn_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id bmc150_magn_spi_id[] = {
diff --git a/drivers/iio/magnetometer/hmc5843_spi.c b/drivers/iio/magnetometer/hmc5843_spi.c
index 89cf59a62c28..a99dd9b33e95 100644
--- a/drivers/iio/magnetometer/hmc5843_spi.c
+++ b/drivers/iio/magnetometer/hmc5843_spi.c
@@ -74,11 +74,9 @@ static int hmc5843_spi_probe(struct spi_device *spi)
id->driver_data, id->name);
}
-static int hmc5843_spi_remove(struct spi_device *spi)
+static void hmc5843_spi_remove(struct spi_device *spi)
{
hmc5843_common_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id hmc5843_id[] = {
diff --git a/drivers/iio/potentiometer/max5487.c b/drivers/iio/potentiometer/max5487.c
index 007c2bd324cb..42723c996c9f 100644
--- a/drivers/iio/potentiometer/max5487.c
+++ b/drivers/iio/potentiometer/max5487.c
@@ -112,7 +112,7 @@ static int max5487_spi_probe(struct spi_device *spi)
return iio_device_register(indio_dev);
}
-static int max5487_spi_remove(struct spi_device *spi)
+static void max5487_spi_remove(struct spi_device *spi)
{
struct iio_dev *indio_dev = spi_get_drvdata(spi);
int ret;
@@ -123,8 +123,6 @@ static int max5487_spi_remove(struct spi_device *spi)
ret = max5487_write_cmd(spi, MAX5487_COPY_AB_TO_NV);
if (ret)
dev_warn(&spi->dev, "Failed to save wiper regs to NV regs\n");
-
- return 0;
}
static const struct spi_device_id max5487_id[] = {
diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c
index 9fa2dcd71760..7ccd960ced5d 100644
--- a/drivers/iio/pressure/ms5611_spi.c
+++ b/drivers/iio/pressure/ms5611_spi.c
@@ -107,11 +107,9 @@ static int ms5611_spi_probe(struct spi_device *spi)
spi_get_device_id(spi)->driver_data);
}
-static int ms5611_spi_remove(struct spi_device *spi)
+static void ms5611_spi_remove(struct spi_device *spi)
{
ms5611_remove(spi_get_drvdata(spi));
-
- return 0;
}
static const struct of_device_id ms5611_spi_matches[] = {
diff --git a/drivers/iio/pressure/zpa2326_spi.c b/drivers/iio/pressure/zpa2326_spi.c
index 85201a4bae44..ee8ed77536ca 100644
--- a/drivers/iio/pressure/zpa2326_spi.c
+++ b/drivers/iio/pressure/zpa2326_spi.c
@@ -57,11 +57,9 @@ static int zpa2326_probe_spi(struct spi_device *spi)
spi->irq, ZPA2326_DEVICE_ID, regmap);
}
-static int zpa2326_remove_spi(struct spi_device *spi)
+static void zpa2326_remove_spi(struct spi_device *spi)
{
zpa2326_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id zpa2326_spi_ids[] = {
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c903b74f46a4..35f0d5e7533d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3322,7 +3322,7 @@ static int cm_lap_handler(struct cm_work *work)
ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
if (ret) {
rdma_destroy_ah_attr(&ah_attr);
- return -EINVAL;
+ goto deref;
}
spin_lock_irq(&cm_id_priv->lock);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 27a00ce2e101..50c53409ceb6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -67,8 +67,8 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
-static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
- union ib_gid *mgid);
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
@@ -1846,17 +1846,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
- if (ndev) {
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
union ib_gid mgid;
- cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
- &mgid);
-
- if (!send_only)
- cma_igmp_send(ndev, &mgid, false);
-
- dev_put(ndev);
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
}
+ dev_put(ndev);
cancel_work_sync(&mc->iboe_join.work);
}
@@ -3368,22 +3370,30 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr)
{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
- }
- return rdma_bind_addr(id, src_addr);
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr(id, src_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
}
/*
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 5a3bd41b331c..4d98f931a13d 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -2,6 +2,7 @@
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
*/
+#include <linux/memremap.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/pci-p2pdma.h>
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2b72c4fa9550..9d6ac9dff39a 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -95,6 +95,7 @@ struct ucma_context {
u64 uid;
struct list_head list;
+ struct list_head mc_list;
struct work_struct close_work;
};
@@ -105,6 +106,7 @@ struct ucma_multicast {
u64 uid;
u8 join_state;
+ struct list_head list;
struct sockaddr_storage addr;
};
@@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_WORK(&ctx->close_work, ucma_close_id);
init_completion(&ctx->comp);
+ INIT_LIST_HEAD(&ctx->mc_list);
/* So list_del() will work if we don't do ucma_finish_ctx() */
INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
@@ -484,19 +487,19 @@ err1:
static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
- struct ucma_multicast *mc;
- unsigned long index;
+ struct ucma_multicast *mc, *tmp;
- xa_for_each(&multicast_table, index, mc) {
- if (mc->ctx != ctx)
- continue;
+ xa_lock(&multicast_table);
+ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+ list_del(&mc->list);
/*
* At this point mc->ctx->ref is 0 so the mc cannot leave the
* lock on the reader and this is enough serialization
*/
- xa_erase(&multicast_table, index);
+ __xa_erase(&multicast_table, mc->id);
kfree(mc);
}
+ xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
@@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file,
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ xa_lock(&multicast_table);
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
GFP_KERNEL)) {
ret = -ENOMEM;
goto err_free_mc;
}
+ list_add_tail(&mc->list, &ctx->mc_list);
+ xa_unlock(&multicast_table);
+
mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
@@ -1500,8 +1507,11 @@ err_leave_multicast:
mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
err_xa_erase:
- xa_erase(&multicast_table, mc->id);
+ xa_lock(&multicast_table);
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
err_free_mc:
+ xa_unlock(&multicast_table);
kfree(mc);
err_put_ctx:
ucma_put_ctx(ctx);
@@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = ERR_PTR(-EINVAL);
else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
- else
- __xa_erase(&multicast_table, mc->id);
- xa_unlock(&multicast_table);
if (IS_ERR(mc)) {
+ xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
+
mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
mutex_unlock(&mc->ctx->mutex);
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index 909122934246..aec60d4888eb 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -55,7 +55,7 @@ union hfi1_ipoib_flow {
*/
struct ipoib_txreq {
struct sdma_txreq txreq;
- struct hfi1_sdma_header sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr;
int sdma_status;
int complete;
struct hfi1_ipoib_dev_priv *priv;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index e1a2b02bbd91..5d814afdf7f3 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
int ret;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
ret = priv->netdev_ops->ndo_init(dev);
if (ret)
- return ret;
+ goto out_ret;
ret = hfi1_netdev_add_data(priv->dd,
qpn_from_mac(priv->netdev->dev_addr),
dev);
if (ret < 0) {
priv->netdev_ops->ndo_uninit(dev);
- return ret;
+ goto out_ret;
}
return 0;
+out_ret:
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
}
static void hfi1_ipoib_dev_uninit(struct net_device *dev)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+
hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
priv->netdev_ops->ndo_uninit(dev);
@@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
hfi1_ipoib_rxq_deinit(priv->netdev);
free_percpu(dev->tstats);
-}
-
-static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
-{
- hfi1_ipoib_netdev_dtor(dev);
- free_netdev(dev);
+ dev->tstats = NULL;
}
static void hfi1_ipoib_set_id(struct net_device *dev, int id)
@@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
priv->port_num = port_num;
priv->netdev_ops = netdev->netdev_ops;
- netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
-
ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
rc = hfi1_ipoib_txreq_init(priv);
if (rc) {
dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
- hfi1_ipoib_free_rdma_netdev(netdev);
return rc;
}
rc = hfi1_ipoib_rxq_init(netdev);
if (rc) {
dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
- hfi1_ipoib_free_rdma_netdev(netdev);
+ hfi1_ipoib_txreq_deinit(priv);
return rc;
}
+ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
netdev->needs_free_netdev = true;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index f4010890309f..d6bbdb8fcb50 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
dd_dev_warn(priv->dd,
"%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
__func__, tx->sdma_status,
- le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+ le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
tx->txq->sde->this_idx);
}
@@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
{
struct hfi1_devdata *dd = txp->dd;
struct sdma_txreq *txreq = &tx->txreq;
- struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
u16 pkt_bytes =
sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
int ret;
@@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
struct ipoib_txparms *txp)
{
struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
- struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
struct sk_buff *skb = tx->skb;
struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
struct rdma_ah_attr *ah_attr = txp->ah_attr;
@@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
if (likely(!ret)) {
tx_ok:
trace_sdma_output_ibhdr(txq->priv->dd,
- &tx->sdma_hdr.hdr,
+ &tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
hfi1_ipoib_check_queue_depth(txq);
return NETDEV_TX_OK;
@@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
hfi1_ipoib_check_queue_depth(txq);
trace_sdma_output_ibhdr(txq->priv->dd,
- &tx->sdma_hdr.hdr,
+ &tx->sdma_hdr->hdr,
ib_is_sc5(txp->flow.sc5));
if (!netdev_xmit_more())
@@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
{
struct net_device *dev = priv->netdev;
u32 tx_ring_size, tx_item_size;
- int i;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ int i, j;
/*
* Ring holds 1 less than tx_ring_size
@@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct ipoib_txreq *tx;
+ tx_ring = &txq->tx_ring;
iowait_init(&txq->wait,
0,
hfi1_ipoib_flush_txq,
@@ -725,14 +728,19 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
priv->dd->node);
txq->tx_ring.items =
- kcalloc_node(tx_ring_size, tx_item_size,
- GFP_KERNEL, priv->dd->node);
+ kvzalloc_node(array_size(tx_ring_size, tx_item_size),
+ GFP_KERNEL, priv->dd->node);
if (!txq->tx_ring.items)
goto free_txqs;
txq->tx_ring.max_items = tx_ring_size;
- txq->tx_ring.shift = ilog2(tx_ring_size);
+ txq->tx_ring.shift = ilog2(tx_item_size);
txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
+ kzalloc_node(sizeof(*tx->sdma_hdr),
+ GFP_KERNEL, priv->dd->node);
netif_tx_napi_add(dev, &txq->napi,
hfi1_ipoib_poll_tx_ring,
@@ -746,7 +754,10 @@ free_txqs:
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
netif_napi_del(&txq->napi);
- kfree(txq->tx_ring.items);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
}
kfree(priv->txqs);
@@ -780,17 +791,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
{
- int i;
+ int i, j;
for (i = 0; i < priv->netdev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
iowait_cancel_work(&txq->wait);
iowait_sdma_drain(&txq->wait);
hfi1_ipoib_drain_tx_list(txq);
netif_napi_del(&txq->napi);
hfi1_ipoib_drain_tx_ring(txq);
- kfree(txq->tx_ring.items);
+ for (j = 0; j < tx_ring->max_items; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
}
kfree(priv->txqs);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1c3d97229988..93b1650eacfa 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -3237,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- break;
+ return;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 0a3b28142c05..41c272980f91 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -541,7 +541,7 @@ static struct attribute *port_diagc_attributes[] = {
};
static const struct attribute_group port_diagc_group = {
- .name = "linkcontrol",
+ .name = "diag_counters",
.attrs = port_diagc_attributes,
};
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 3305f2744bfa..ae50b56e8913 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -3073,6 +3073,8 @@ do_write:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
+ if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
+ goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 368959ae9a8c..df03d84c6868 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
return &qp->orq[qp->orq_get % qp->attrs.orq_size];
}
-static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
-{
- return &qp->orq[qp->orq_put % qp->attrs.orq_size];
-}
-
static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
{
- struct siw_sqe *orq_e = orq_get_tail(qp);
+ struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 60116f20653c..875ea6f1b04a 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
spin_lock_irqsave(&qp->orq_lock, flags);
- rreq = orq_get_current(qp);
-
/* free current orq entry */
+ rreq = orq_get_current(qp);
WRITE_ONCE(rreq->flags, 0);
+ qp->orq_get++;
+
if (qp->tx_ctx.orq_fence) {
if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
@@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
rv = -EPROTO;
goto out;
}
- /* resume SQ processing */
+ /* resume SQ processing, if possible */
if (tx_waiting->sqe.opcode == SIW_OP_READ ||
tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- rreq = orq_get_tail(qp);
+
+ /* SQ processing was stopped because of a full ORQ */
+ rreq = orq_get_free(qp);
if (unlikely(!rreq)) {
pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
rv = -EPROTO;
@@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
resume_tx = 1;
} else if (siw_orq_empty(qp)) {
+ /*
+ * SQ processing was stopped by fenced work request.
+ * Resume since all previous Read's are now completed.
+ */
qp->tx_ctx.orq_fence = 0;
resume_tx = 1;
- } else {
- pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
- qp_id(qp), qp->orq_get, qp->orq_put);
- rv = -EPROTO;
}
}
- qp->orq_get++;
out:
spin_unlock_irqrestore(&qp->orq_lock, flags);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index a3dd2cb6d5c9..54ef367b074a 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -313,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
siw_dbg(base_dev, "too many QP's\n");
- return -ENOMEM;
+ rv = -ENOMEM;
+ goto err_atomic;
}
if (attrs->qp_type != IB_QPT_RC) {
siw_dbg(base_dev, "only RC QP's supported\n");
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 7c3f98e57889..759b85f03331 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -2682,6 +2682,8 @@ static void rtrs_clt_dev_release(struct device *dev)
struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess,
dev);
+ mutex_destroy(&clt->paths_ev_mutex);
+ mutex_destroy(&clt->paths_mutex);
kfree(clt);
}
@@ -2711,6 +2713,8 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num,
return ERR_PTR(-ENOMEM);
}
+ clt->dev.class = rtrs_clt_dev_class;
+ clt->dev.release = rtrs_clt_dev_release;
uuid_gen(&clt->paths_uuid);
INIT_LIST_HEAD_RCU(&clt->paths_list);
clt->paths_num = paths_num;
@@ -2727,53 +2731,51 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num,
init_waitqueue_head(&clt->permits_wait);
mutex_init(&clt->paths_ev_mutex);
mutex_init(&clt->paths_mutex);
+ device_initialize(&clt->dev);
- clt->dev.class = rtrs_clt_dev_class;
- clt->dev.release = rtrs_clt_dev_release;
err = dev_set_name(&clt->dev, "%s", sessname);
if (err)
- goto err;
+ goto err_put;
+
/*
* Suppress user space notification until
* sysfs files are created
*/
dev_set_uevent_suppress(&clt->dev, true);
- err = device_register(&clt->dev);
- if (err) {
- put_device(&clt->dev);
- goto err;
- }
+ err = device_add(&clt->dev);
+ if (err)
+ goto err_put;
clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj);
if (!clt->kobj_paths) {
err = -ENOMEM;
- goto err_dev;
+ goto err_del;
}
err = rtrs_clt_create_sysfs_root_files(clt);
if (err) {
kobject_del(clt->kobj_paths);
kobject_put(clt->kobj_paths);
- goto err_dev;
+ goto err_del;
}
dev_set_uevent_suppress(&clt->dev, false);
kobject_uevent(&clt->dev.kobj, KOBJ_ADD);
return clt;
-err_dev:
- device_unregister(&clt->dev);
-err:
+err_del:
+ device_del(&clt->dev);
+err_put:
free_percpu(clt->pcpu_path);
- kfree(clt);
+ put_device(&clt->dev);
return ERR_PTR(err);
}
static void free_clt(struct rtrs_clt_sess *clt)
{
- free_permits(clt);
free_percpu(clt->pcpu_path);
- mutex_destroy(&clt->paths_ev_mutex);
- mutex_destroy(&clt->paths_mutex);
- /* release callback will free clt in last put */
+
+ /*
+ * release callback will free clt and destroy mutexes in last put
+ */
device_unregister(&clt->dev);
}
@@ -2890,6 +2892,7 @@ void rtrs_clt_close(struct rtrs_clt_sess *clt)
rtrs_clt_destroy_path_files(clt_path, NULL);
kobject_put(&clt_path->kobj);
}
+ free_permits(clt);
free_clt(clt);
}
EXPORT_SYMBOL(rtrs_clt_close);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e174e853f8a4..285b766e4e70 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -4047,9 +4047,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
spin_unlock(&host->target_lock);
/*
- * Wait for tl_err and target port removal tasks.
+ * srp_queue_remove_work() queues a call to
+ * srp_remove_target(). The latter function cancels
+ * target->tl_err_work so waiting for the remove works to
+ * finish is sufficient.
*/
- flush_workqueue(system_long_wq);
flush_workqueue(srp_remove_wq);
kfree(host);
diff --git a/drivers/input/input.c b/drivers/input/input.c
index ccaeb2426385..c3139bc2aa0d 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -2285,6 +2285,12 @@ int input_register_device(struct input_dev *dev)
/* KEY_RESERVED is not supposed to be transmitted to userspace. */
__clear_bit(KEY_RESERVED, dev->keybit);
+ /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */
+ if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) {
+ __clear_bit(BTN_RIGHT, dev->keybit);
+ __clear_bit(BTN_MIDDLE, dev->keybit);
+ }
+
/* Make sure that bitmasks not mentioned in dev->evbit are clean. */
input_cleanse_bitmasks(dev);
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 0c607da9ee10..9417ee0b1eff 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -556,7 +556,7 @@ config KEYBOARD_PMIC8XXX
config KEYBOARD_SAMSUNG
tristate "Samsung keypad support"
- depends on HAVE_CLK
+ depends on HAS_IOMEM && HAVE_CLK
select INPUT_MATRIXKMAP
help
Say Y here if you want to use the keypad on your Samsung mobile
diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c
index eda1b23002b5..d1f5354d5ea2 100644
--- a/drivers/input/keyboard/applespi.c
+++ b/drivers/input/keyboard/applespi.c
@@ -1858,7 +1858,7 @@ static void applespi_drain_reads(struct applespi_data *applespi)
spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
}
-static int applespi_remove(struct spi_device *spi)
+static void applespi_remove(struct spi_device *spi)
{
struct applespi_data *applespi = spi_get_drvdata(spi);
@@ -1871,8 +1871,6 @@ static int applespi_remove(struct spi_device *spi)
applespi_drain_reads(applespi);
debugfs_remove_recursive(applespi->debugfs_root);
-
- return 0;
}
static void applespi_shutdown(struct spi_device *spi)
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index 6e51c9bc619f..91e44d4c66f7 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -87,13 +87,11 @@ static int adxl34x_spi_probe(struct spi_device *spi)
return 0;
}
-static int adxl34x_spi_remove(struct spi_device *spi)
+static void adxl34x_spi_remove(struct spi_device *spi)
{
struct adxl34x *ac = spi_get_drvdata(spi);
adxl34x_remove(ac);
-
- return 0;
}
static int __maybe_unused adxl34x_spi_suspend(struct device *dev)
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 47af62c12267..e1758d5ffe42 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -186,55 +186,21 @@ static int elan_get_fwinfo(u16 ic_type, u8 iap_version, u16 *validpage_count,
return 0;
}
-static int elan_enable_power(struct elan_tp_data *data)
+static int elan_set_power(struct elan_tp_data *data, bool on)
{
int repeat = ETP_RETRY_COUNT;
int error;
- error = regulator_enable(data->vcc);
- if (error) {
- dev_err(&data->client->dev,
- "failed to enable regulator: %d\n", error);
- return error;
- }
-
do {
- error = data->ops->power_control(data->client, true);
+ error = data->ops->power_control(data->client, on);
if (error >= 0)
return 0;
msleep(30);
} while (--repeat > 0);
- dev_err(&data->client->dev, "failed to enable power: %d\n", error);
- return error;
-}
-
-static int elan_disable_power(struct elan_tp_data *data)
-{
- int repeat = ETP_RETRY_COUNT;
- int error;
-
- do {
- error = data->ops->power_control(data->client, false);
- if (!error) {
- error = regulator_disable(data->vcc);
- if (error) {
- dev_err(&data->client->dev,
- "failed to disable regulator: %d\n",
- error);
- /* Attempt to power the chip back up */
- data->ops->power_control(data->client, true);
- break;
- }
-
- return 0;
- }
-
- msleep(30);
- } while (--repeat > 0);
-
- dev_err(&data->client->dev, "failed to disable power: %d\n", error);
+ dev_err(&data->client->dev, "failed to set power %s: %d\n",
+ on ? "on" : "off", error);
return error;
}
@@ -1399,9 +1365,19 @@ static int __maybe_unused elan_suspend(struct device *dev)
/* Enable wake from IRQ */
data->irq_wake = (enable_irq_wake(client->irq) == 0);
} else {
- ret = elan_disable_power(data);
+ ret = elan_set_power(data, false);
+ if (ret)
+ goto err;
+
+ ret = regulator_disable(data->vcc);
+ if (ret) {
+ dev_err(dev, "error %d disabling regulator\n", ret);
+ /* Attempt to power the chip back up */
+ elan_set_power(data, true);
+ }
}
+err:
mutex_unlock(&data->sysfs_mutex);
return ret;
}
@@ -1412,12 +1388,18 @@ static int __maybe_unused elan_resume(struct device *dev)
struct elan_tp_data *data = i2c_get_clientdata(client);
int error;
- if (device_may_wakeup(dev) && data->irq_wake) {
+ if (!device_may_wakeup(dev)) {
+ error = regulator_enable(data->vcc);
+ if (error) {
+ dev_err(dev, "error %d enabling regulator\n", error);
+ goto err;
+ }
+ } else if (data->irq_wake) {
disable_irq_wake(client->irq);
data->irq_wake = false;
}
- error = elan_enable_power(data);
+ error = elan_set_power(data, true);
if (error) {
dev_err(dev, "power up when resuming failed: %d\n", error);
goto err;
diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c
index a472489ccbad..164f6c757f6b 100644
--- a/drivers/input/mouse/psmouse-smbus.c
+++ b/drivers/input/mouse/psmouse-smbus.c
@@ -75,6 +75,8 @@ static void psmouse_smbus_detach_i2c_client(struct i2c_client *client)
"Marking SMBus companion %s as gone\n",
dev_name(&smbdev->client->dev));
smbdev->dead = true;
+ device_link_remove(&smbdev->client->dev,
+ &smbdev->psmouse->ps2dev.serio->dev);
serio_rescan(smbdev->psmouse->ps2dev.serio);
} else {
list_del(&smbdev->node);
@@ -174,6 +176,8 @@ static void psmouse_smbus_disconnect(struct psmouse *psmouse)
kfree(smbdev);
} else {
smbdev->dead = true;
+ device_link_remove(&smbdev->client->dev,
+ &psmouse->ps2dev.serio->dev);
psmouse_dbg(smbdev->psmouse,
"posting removal request for SMBus companion %s\n",
dev_name(&smbdev->client->dev));
@@ -270,6 +274,12 @@ int psmouse_smbus_init(struct psmouse *psmouse,
if (smbdev->client) {
/* We have our companion device */
+ if (!device_link_add(&smbdev->client->dev,
+ &psmouse->ps2dev.serio->dev,
+ DL_FLAG_STATELESS))
+ psmouse_warn(psmouse,
+ "failed to set up link with iSMBus companion %s\n",
+ dev_name(&smbdev->client->dev));
return 0;
}
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index fcb1b646436a..1581f6ef0927 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -1787,15 +1787,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0);
input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
- /* Verify that a device really has an endpoint */
- if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
+ err = usb_find_common_endpoints(intf->cur_altsetting,
+ NULL, NULL, &endpoint, NULL);
+ if (err) {
dev_err(&intf->dev,
- "interface has %d endpoints, but must have minimum 1\n",
- intf->cur_altsetting->desc.bNumEndpoints);
- err = -EINVAL;
+ "interface has no int in endpoints, but must have minimum 1\n");
goto fail3;
}
- endpoint = &intf->cur_altsetting->endpoint[0].desc;
/* Go set up our URB, which is called when the tablet receives
* input.
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index a25a77dd9a32..bed68a68f330 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1411,13 +1411,11 @@ static int ads7846_probe(struct spi_device *spi)
return 0;
}
-static int ads7846_remove(struct spi_device *spi)
+static void ads7846_remove(struct spi_device *spi)
{
struct ads7846 *ts = spi_get_drvdata(spi);
ads7846_stop(ts);
-
- return 0;
}
static struct spi_driver ads7846_driver = {
diff --git a/drivers/input/touchscreen/cyttsp4_spi.c b/drivers/input/touchscreen/cyttsp4_spi.c
index 2aec41eb76b7..5d7db84f2749 100644
--- a/drivers/input/touchscreen/cyttsp4_spi.c
+++ b/drivers/input/touchscreen/cyttsp4_spi.c
@@ -164,12 +164,10 @@ static int cyttsp4_spi_probe(struct spi_device *spi)
return PTR_ERR_OR_ZERO(ts);
}
-static int cyttsp4_spi_remove(struct spi_device *spi)
+static void cyttsp4_spi_remove(struct spi_device *spi)
{
struct cyttsp4 *ts = spi_get_drvdata(spi);
cyttsp4_remove(ts);
-
- return 0;
}
static struct spi_driver cyttsp4_spi_driver = {
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index a3bfc7a41679..752e8ba4fecb 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
+#include <linux/platform_data/x86/soc.h>
#include <linux/slab.h>
#include <linux/acpi.h>
#include <linux/of.h>
@@ -805,21 +806,6 @@ static int goodix_reset(struct goodix_ts_data *ts)
}
#ifdef ACPI_GPIO_SUPPORT
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
-
-static const struct x86_cpu_id baytrail_cpu_ids[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, X86_FEATURE_ANY, },
- {}
-};
-
-static inline bool is_byt(void)
-{
- const struct x86_cpu_id *id = x86_match_cpu(baytrail_cpu_ids);
-
- return !!id;
-}
-
static const struct acpi_gpio_params first_gpio = { 0, 0, false };
static const struct acpi_gpio_params second_gpio = { 1, 0, false };
@@ -878,7 +864,7 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
const struct acpi_gpio_mapping *gpio_mapping = NULL;
struct device *dev = &ts->client->dev;
LIST_HEAD(resources);
- int ret;
+ int irq, ret;
ts->gpio_count = 0;
ts->gpio_int_idx = -1;
@@ -891,6 +877,20 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
acpi_dev_free_resource_list(&resources);
+ /*
+ * CHT devices should have a GpioInt + a regular GPIO ACPI resource.
+ * Some CHT devices have a bug (where the also is bogus Interrupt
+ * resource copied from a previous BYT based generation). i2c-core-acpi
+ * will use the non-working Interrupt resource, fix this up.
+ */
+ if (soc_intel_is_cht() && ts->gpio_count == 2 && ts->gpio_int_idx != -1) {
+ irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(dev), 0);
+ if (irq > 0 && irq != ts->client->irq) {
+ dev_warn(dev, "Overriding IRQ %d -> %d\n", ts->client->irq, irq);
+ ts->client->irq = irq;
+ }
+ }
+
if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) {
ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO;
gpio_mapping = acpi_goodix_int_first_gpios;
@@ -903,7 +903,7 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
dev_info(dev, "Using ACPI INTI and INTO methods for IRQ pin access\n");
ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_METHOD;
gpio_mapping = acpi_goodix_reset_only_gpios;
- } else if (is_byt() && ts->gpio_count == 2 && ts->gpio_int_idx == -1) {
+ } else if (soc_intel_is_byt() && ts->gpio_count == 2 && ts->gpio_int_idx == -1) {
dev_info(dev, "No ACPI GpioInt resource, assuming that the GPIO order is reset, int\n");
ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO;
gpio_mapping = acpi_goodix_int_last_gpios;
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index a2f55920b9b2..555dfe98b3c4 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -64,11 +64,9 @@ static int tsc2005_probe(struct spi_device *spi)
tsc2005_cmd);
}
-static int tsc2005_remove(struct spi_device *spi)
+static void tsc2005_remove(struct spi_device *spi)
{
tsc200x_remove(&spi->dev);
-
- return 0;
}
#ifdef CONFIG_OF
diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index 78d2ee99f37a..1b58611c8084 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -615,10 +615,9 @@ static int wm97xx_register_touch(struct wm97xx *wm)
* extensions)
*/
wm->touch_dev = platform_device_alloc("wm97xx-touch", -1);
- if (!wm->touch_dev) {
- ret = -ENOMEM;
- goto touch_err;
- }
+ if (!wm->touch_dev)
+ return -ENOMEM;
+
platform_set_drvdata(wm->touch_dev, wm);
wm->touch_dev->dev.parent = wm->dev;
wm->touch_dev->dev.platform_data = pdata;
@@ -629,9 +628,6 @@ static int wm97xx_register_touch(struct wm97xx *wm)
return 0;
touch_reg_err:
platform_device_put(wm->touch_dev);
-touch_err:
- input_unregister_device(wm->input_dev);
- wm->input_dev = NULL;
return ret;
}
@@ -639,8 +635,6 @@ touch_err:
static void wm97xx_unregister_touch(struct wm97xx *wm)
{
platform_device_unregister(wm->touch_dev);
- input_unregister_device(wm->input_dev);
- wm->input_dev = NULL;
}
static int _wm97xx_probe(struct wm97xx *wm)
diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 7c82c4f5fa6b..8bd03278ad9a 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -135,7 +135,7 @@ struct point_coord {
struct touch_event {
__le16 status;
- u8 finger_cnt;
+ u8 finger_mask;
u8 time_stamp;
struct point_coord point_coord[MAX_SUPPORTED_FINGER_NUM];
};
@@ -322,11 +322,32 @@ static int zinitix_send_power_on_sequence(struct bt541_ts_data *bt541)
static void zinitix_report_finger(struct bt541_ts_data *bt541, int slot,
const struct point_coord *p)
{
+ u16 x, y;
+
+ if (unlikely(!(p->sub_status &
+ (SUB_BIT_UP | SUB_BIT_DOWN | SUB_BIT_MOVE)))) {
+ dev_dbg(&bt541->client->dev, "unknown finger event %#02x\n",
+ p->sub_status);
+ return;
+ }
+
+ x = le16_to_cpu(p->x);
+ y = le16_to_cpu(p->y);
+
input_mt_slot(bt541->input_dev, slot);
- input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, true);
- touchscreen_report_pos(bt541->input_dev, &bt541->prop,
- le16_to_cpu(p->x), le16_to_cpu(p->y), true);
- input_report_abs(bt541->input_dev, ABS_MT_TOUCH_MAJOR, p->width);
+ if (input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER,
+ !(p->sub_status & SUB_BIT_UP))) {
+ touchscreen_report_pos(bt541->input_dev,
+ &bt541->prop, x, y, true);
+ input_report_abs(bt541->input_dev,
+ ABS_MT_TOUCH_MAJOR, p->width);
+ dev_dbg(&bt541->client->dev, "finger %d %s (%u, %u)\n",
+ slot, p->sub_status & SUB_BIT_DOWN ? "down" : "move",
+ x, y);
+ } else {
+ dev_dbg(&bt541->client->dev, "finger %d up (%u, %u)\n",
+ slot, x, y);
+ }
}
static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
@@ -334,6 +355,7 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
struct bt541_ts_data *bt541 = bt541_handler;
struct i2c_client *client = bt541->client;
struct touch_event touch_event;
+ unsigned long finger_mask;
int error;
int i;
@@ -346,10 +368,14 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
goto out;
}
- for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++)
- if (touch_event.point_coord[i].sub_status & SUB_BIT_EXIST)
- zinitix_report_finger(bt541, i,
- &touch_event.point_coord[i]);
+ finger_mask = touch_event.finger_mask;
+ for_each_set_bit(i, &finger_mask, MAX_SUPPORTED_FINGER_NUM) {
+ const struct point_coord *p = &touch_event.point_coord[i];
+
+ /* Only process contacts that are actually reported */
+ if (p->sub_status & SUB_BIT_EXIST)
+ zinitix_report_finger(bt541, i, p);
+ }
input_mt_sync_frame(bt541->input_dev);
input_sync(bt541->input_dev);
@@ -571,8 +597,20 @@ static SIMPLE_DEV_PM_OPS(zinitix_pm_ops, zinitix_suspend, zinitix_resume);
#ifdef CONFIG_OF
static const struct of_device_id zinitix_of_match[] = {
+ { .compatible = "zinitix,bt402" },
+ { .compatible = "zinitix,bt403" },
+ { .compatible = "zinitix,bt404" },
+ { .compatible = "zinitix,bt412" },
+ { .compatible = "zinitix,bt413" },
+ { .compatible = "zinitix,bt431" },
+ { .compatible = "zinitix,bt432" },
+ { .compatible = "zinitix,bt531" },
{ .compatible = "zinitix,bt532" },
+ { .compatible = "zinitix,bt538" },
{ .compatible = "zinitix,bt541" },
+ { .compatible = "zinitix,bt548" },
+ { .compatible = "zinitix,bt554" },
+ { .compatible = "zinitix,at100" },
{ }
};
MODULE_DEVICE_TABLE(of, zinitix_of_match);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..c79a0df090c0 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,8 +144,8 @@ config IOMMU_DMA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
-# Shared Virtual Addressing library
-config IOMMU_SVA_LIB
+# Shared Virtual Addressing
+config IOMMU_SVA
bool
select IOASID
@@ -379,7 +379,7 @@ config ARM_SMMU_V3
config ARM_SMMU_V3_SVA
bool "Shared Virtual Addressing support for the ARM SMMUv3"
depends on ARM_SMMU_V3
- select IOMMU_SVA_LIB
+ select IOMMU_SVA
select MMU_NOTIFIER
help
Support for sharing process address spaces with devices using the
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..44475a9b3eea 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 416815a525d6..bb95edf74415 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -14,6 +14,7 @@
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index ffc89c4fb120..47108ed44fbb 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -110,6 +110,7 @@
#define PASID_MASK 0x0000ffff
/* MMIO status bits */
+#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0)
#define MMIO_STATUS_EVT_INT_MASK (1 << 1)
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index dc338acf3338..7bfe37e52e21 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -21,6 +21,7 @@
#include <linux/export.h>
#include <linux/kmemleak.h>
#include <linux/cc_platform.h>
+#include <linux/iopoll.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/apic.h>
@@ -657,6 +658,16 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu)
}
/*
+ * This function restarts event logging in case the IOMMU experienced
+ * an event log buffer overflow.
+ */
+void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
+}
+
+/*
* This function resets the command buffer if the IOMMU stopped fetching
* commands from it.
*/
@@ -834,6 +845,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & (MMIO_STATUS_GALOG_RUN_MASK))
break;
+ udelay(10);
}
if (WARN_ON(i >= LOOP_TIMEOUT))
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index b1bf4125b0f7..6608d1717574 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -492,18 +492,18 @@ static void v1_free_pgtable(struct io_pgtable *iop)
dom = container_of(pgtable, struct protection_domain, iop);
- /* Update data structure */
- amd_iommu_domain_clr_pt_root(dom);
-
- /* Make changes visible to IOMMUs */
- amd_iommu_domain_update(dom);
-
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
pgtable->mode > PAGE_MODE_6_LEVEL);
free_sub_pt(pgtable->root, pgtable->mode, &freelist);
+ /* Update data structure */
+ amd_iommu_domain_clr_pt_root(dom);
+
+ /* Make changes visible to IOMMUs */
+ amd_iommu_domain_update(dom);
+
put_pages_list(&freelist);
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 461f1844ed1f..a18b549951bb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -764,7 +764,8 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
#endif /* !CONFIG_IRQ_REMAP */
#define AMD_IOMMU_INT_MASK \
- (MMIO_STATUS_EVT_INT_MASK | \
+ (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \
+ MMIO_STATUS_EVT_INT_MASK | \
MMIO_STATUS_PPR_INT_MASK | \
MMIO_STATUS_GALOG_INT_MASK)
@@ -774,7 +775,7 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
while (status & AMD_IOMMU_INT_MASK) {
- /* Enable EVT and PPR and GA interrupts again */
+ /* Enable interrupt sources again */
writel(AMD_IOMMU_INT_MASK,
iommu->mmio_base + MMIO_STATUS_OFFSET);
@@ -795,6 +796,11 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
}
#endif
+ if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) {
+ pr_info_ratelimited("IOMMU event log overflow\n");
+ amd_iommu_restart_event_logging(iommu);
+ }
+
/*
* Hardware bug: ERBT1312
* When re-enabling interrupt (by writing 1
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a737ba5f727e..22ddd05bbdcd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
ret = PTR_ERR(bond->smmu_mn);
- goto err_free_pasid;
+ goto err_free_bond;
}
list_add(&bond->list, &master->bonds);
return &bond->sva;
-err_free_pasid:
- iommu_sva_free_pasid(mm);
err_free_bond:
kfree(bond);
return ERR_PTR(ret);
@@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
if (refcount_dec_and_test(&bond->refs)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
- iommu_sva_free_pasid(bond->mm);
kfree(bond);
}
mutex_unlock(&sva_lock);
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..39a06d245f12 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
- select IOMMU_SVA_LIB
+ select IOMMU_SVA
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 92fea3fbbb11..1ce1741a7fa4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2738,7 +2738,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
spin_unlock_irqrestore(&device_domain_lock, flags);
/* PASID table is mandatory for a PCI device in scalable mode. */
- if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
@@ -4781,7 +4781,7 @@ attach_failed:
link_failed:
spin_unlock_irqrestore(&device_domain_lock, flags);
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
- ioasid_put(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
return ret;
}
@@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
- ioasid_put(domain->default_pasid);
+ ioasid_free(domain->default_pasid);
}
static int prepare_domain_attach_device(struct iommu_domain *domain,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f912fe45bea2..a67319597884 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -569,9 +569,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
fn, &intel_ir_domain_ops,
iommu);
if (!iommu->ir_domain) {
- irq_domain_free_fwnode(fn);
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
- goto out_free_bitmap;
+ goto out_free_fwnode;
}
iommu->ir_msi_domain =
arch_create_remap_msi_irq_domain(iommu->ir_domain,
@@ -595,7 +594,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
if (dmar_enable_qi(iommu)) {
pr_err("Failed to enable queued invalidation\n");
- goto out_free_bitmap;
+ goto out_free_ir_domain;
}
}
@@ -619,6 +618,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
return 0;
+out_free_ir_domain:
+ if (iommu->ir_msi_domain)
+ irq_domain_remove(iommu->ir_msi_domain);
+ iommu->ir_msi_domain = NULL;
+ irq_domain_remove(iommu->ir_domain);
+ iommu->ir_domain = NULL;
+out_free_fwnode:
+ irq_domain_free_fwnode(fn);
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5b5d69b04fcc..51ac2096b3da 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
}
-static void intel_svm_free_pasid(struct mm_struct *mm)
-{
- iommu_sva_free_pasid(mm);
-}
-
static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
struct device *dev,
struct mm_struct *mm,
@@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree(svm);
}
}
- /* Drop a PASID reference and free it if no reference. */
- intel_svm_free_pasid(mm);
}
out:
return ret;
@@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void
}
sva = intel_svm_bind_mm(iommu, dev, mm, flags);
- if (IS_ERR_OR_NULL(sva))
- intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;
diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
index 50ee27bbd04e..a786c034907c 100644
--- a/drivers/iommu/ioasid.c
+++ b/drivers/iommu/ioasid.c
@@ -2,7 +2,7 @@
/*
* I/O Address Space ID allocator. There is one global IOASID space, split into
* subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
- * free IOASIDs with ioasid_alloc and ioasid_put.
+ * free IOASIDs with ioasid_alloc() and ioasid_free().
*/
#include <linux/ioasid.h>
#include <linux/module.h>
@@ -15,7 +15,6 @@ struct ioasid_data {
struct ioasid_set *set;
void *private;
struct rcu_head rcu;
- refcount_t refs;
};
/*
@@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
data->set = set;
data->private = private;
- refcount_set(&data->refs, 1);
/*
* Custom allocator needs allocator data to perform platform specific
@@ -348,34 +346,11 @@ exit_free:
EXPORT_SYMBOL_GPL(ioasid_alloc);
/**
- * ioasid_get - obtain a reference to the IOASID
- */
-void ioasid_get(ioasid_t ioasid)
-{
- struct ioasid_data *ioasid_data;
-
- spin_lock(&ioasid_allocator_lock);
- ioasid_data = xa_load(&active_allocator->xa, ioasid);
- if (ioasid_data)
- refcount_inc(&ioasid_data->refs);
- else
- WARN_ON(1);
- spin_unlock(&ioasid_allocator_lock);
-}
-EXPORT_SYMBOL_GPL(ioasid_get);
-
-/**
- * ioasid_put - Release a reference to an ioasid
+ * ioasid_free - Free an ioasid
* @ioasid: the ID to remove
- *
- * Put a reference to the IOASID, free it when the number of references drops to
- * zero.
- *
- * Return: %true if the IOASID was freed, %false otherwise.
*/
-bool ioasid_put(ioasid_t ioasid)
+void ioasid_free(ioasid_t ioasid)
{
- bool free = false;
struct ioasid_data *ioasid_data;
spin_lock(&ioasid_allocator_lock);
@@ -385,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid)
goto exit_unlock;
}
- free = refcount_dec_and_test(&ioasid_data->refs);
- if (!free)
- goto exit_unlock;
-
active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
/* Custom allocator needs additional steps to free the xa element */
if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
@@ -398,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid)
exit_unlock:
spin_unlock(&ioasid_allocator_lock);
- return free;
}
-EXPORT_SYMBOL_GPL(ioasid_put);
+EXPORT_SYMBOL_GPL(ioasid_free);
/**
* ioasid_find - Find IOASID data
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index bd41405d34e9..106506143896 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
*
* Try to allocate a PASID for this mm, or take a reference to the existing one
* provided it fits within the [@min, @max] range. On success the PASID is
- * available in mm->pasid, and must be released with iommu_sva_free_pasid().
- * @min must be greater than 0, because 0 indicates an unused mm->pasid.
+ * available in mm->pasid and will be available for the lifetime of the mm.
*
* Returns 0 on success and < 0 on error.
*/
@@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
return -EINVAL;
mutex_lock(&iommu_sva_lock);
- if (mm->pasid) {
- if (mm->pasid >= min && mm->pasid <= max)
- ioasid_get(mm->pasid);
- else
+ /* Is a PASID already associated with this mm? */
+ if (pasid_valid(mm->pasid)) {
+ if (mm->pasid < min || mm->pasid >= max)
ret = -EOVERFLOW;
- } else {
- pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
- if (pasid == INVALID_IOASID)
- ret = -ENOMEM;
- else
- mm->pasid = pasid;
+ goto out;
}
+
+ pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
+ if (!pasid_valid(pasid))
+ ret = -ENOMEM;
+ else
+ mm_pasid_set(mm, pasid);
+out:
mutex_unlock(&iommu_sva_lock);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
-/**
- * iommu_sva_free_pasid - Release the mm's PASID
- * @mm: the mm
- *
- * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
- */
-void iommu_sva_free_pasid(struct mm_struct *mm)
-{
- mutex_lock(&iommu_sva_lock);
- if (ioasid_put(mm->pasid))
- mm->pasid = 0;
- mutex_unlock(&iommu_sva_lock);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
-
/* ioasid_find getter() requires a void * argument */
static bool __mmget_not_zero(void *mm)
{
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 031155010ca8..8909ea1094e3 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -9,7 +9,6 @@
#include <linux/mm_types.h>
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
-void iommu_sva_free_pasid(struct mm_struct *mm);
struct mm_struct *iommu_sva_find(ioasid_t pasid);
/* I/O Page fault */
@@ -17,7 +16,7 @@ struct device;
struct iommu_fault;
struct iopf_queue;
-#ifdef CONFIG_IOMMU_SVA_LIB
+#ifdef CONFIG_IOMMU_SVA
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
@@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
void iopf_queue_free(struct iopf_queue *queue);
int iopf_queue_discard_partial(struct iopf_queue *queue);
-#else /* CONFIG_IOMMU_SVA_LIB */
+#else /* CONFIG_IOMMU_SVA */
static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
{
return -ENODEV;
@@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
{
return -ENODEV;
}
-#endif /* CONFIG_IOMMU_SVA_LIB */
+#endif /* CONFIG_IOMMU_SVA */
#endif /* _IOMMU_SVA_LIB_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8b86406b7162..107dcf5938d6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -207,9 +207,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
static void dev_iommu_free(struct device *dev)
{
- iommu_fwspec_free(dev);
- kfree(dev->iommu);
+ struct dev_iommu *param = dev->iommu;
+
dev->iommu = NULL;
+ if (param->fwspec) {
+ fwnode_handle_put(param->fwspec->iommu_fwnode);
+ kfree(param->fwspec);
+ }
+ kfree(param);
}
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
@@ -980,17 +985,6 @@ static int iommu_group_device_count(struct iommu_group *group)
return ret;
}
-/**
- * iommu_group_for_each_dev - iterate over each device in the group
- * @group: the group
- * @data: caller opaque data to be passed to callback function
- * @fn: caller supplied callback function
- *
- * This function is called by group users to iterate over group devices.
- * Callers should hold a reference count to the group during callback.
- * The group->mutex is held across callbacks, which will block calls to
- * iommu_group_add/remove_device.
- */
static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
int (*fn)(struct device *, void *))
{
@@ -1005,7 +999,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
return ret;
}
-
+/**
+ * iommu_group_for_each_dev - iterate over each device in the group
+ * @group: the group
+ * @data: caller opaque data to be passed to callback function
+ * @fn: caller supplied callback function
+ *
+ * This function is called by group users to iterate over group devices.
+ * Callers should hold a reference count to the group during callback.
+ * The group->mutex is held across callbacks, which will block calls to
+ * iommu_group_add/remove_device.
+ */
int iommu_group_for_each_dev(struct iommu_group *group, void *data,
int (*fn)(struct device *, void *))
{
@@ -3032,6 +3036,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
* @mm: the mm to bind, caller must hold a reference to it
+ * @drvdata: opaque data pointer to pass to bind callback
*
* Create a bond between device and address space, allowing the device to access
* the mm using the returned PASID. If a bond already exists between @device and
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 91749654fd49..980e4af3f06b 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1085,7 +1085,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev)
}
/**
- * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation
+ * omap_iommu_prepare - prepare() dev_pm_ops implementation
* @dev: iommu device
*
* This function performs the necessary checks to determine if the IOMMU
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index e900e3c46903..2561ce8a2ce8 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -808,8 +808,10 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
return NULL;
mc = platform_get_drvdata(pdev);
- if (!mc)
+ if (!mc) {
+ put_device(&pdev->dev);
return NULL;
+ }
return mc->smmu;
}
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7038957f4a77..680d2fcf2686 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -430,6 +430,14 @@ config QCOM_PDC
Power Domain Controller driver to manage and configure wakeup
IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
+config QCOM_MPM
+ tristate "QCOM MPM"
+ depends on ARCH_QCOM
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ MSM Power Manager driver to manage and configure wakeup
+ IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
+
config CSKY_MPINTC
bool
depends on CSKY
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c1f611cbfbf8..1f8990f812f1 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_MESON_IRQ_GPIO) += irq-meson-gpio.o
obj-$(CONFIG_GOLDFISH_PIC) += irq-goldfish-pic.o
obj-$(CONFIG_NDS32) += irq-ativic32.o
obj-$(CONFIG_QCOM_PDC) += qcom-pdc.o
+obj-$(CONFIG_QCOM_MPM) += irq-qcom-mpm.o
obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o
obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o
obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 2543ef65825b..12dd48727a15 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -24,7 +24,7 @@
* - Default "this CPU" register view and explicit per-CPU views
*
* In addition, this driver also handles FIQs, as these are routed to the same
- * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and
+ * IRQ vector. These are used for Fast IPIs, the ARMv8 timer IRQs, and
* performance counters (TODO).
*
* Implementation notes:
@@ -52,9 +52,12 @@
#include <linux/irqchip.h>
#include <linux/irqchip/arm-vgic-info.h>
#include <linux/irqdomain.h>
+#include <linux/jump_label.h>
#include <linux/limits.h>
#include <linux/of_address.h>
#include <linux/slab.h>
+#include <asm/apple_m1_pmu.h>
+#include <asm/cputype.h>
#include <asm/exception.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
@@ -62,20 +65,22 @@
#include <dt-bindings/interrupt-controller/apple-aic.h>
/*
- * AIC registers (MMIO)
+ * AIC v1 registers (MMIO)
*/
#define AIC_INFO 0x0004
-#define AIC_INFO_NR_HW GENMASK(15, 0)
+#define AIC_INFO_NR_IRQ GENMASK(15, 0)
#define AIC_CONFIG 0x0010
#define AIC_WHOAMI 0x2000
#define AIC_EVENT 0x2004
-#define AIC_EVENT_TYPE GENMASK(31, 16)
+#define AIC_EVENT_DIE GENMASK(31, 24)
+#define AIC_EVENT_TYPE GENMASK(23, 16)
#define AIC_EVENT_NUM GENMASK(15, 0)
-#define AIC_EVENT_TYPE_HW 1
+#define AIC_EVENT_TYPE_FIQ 0 /* Software use */
+#define AIC_EVENT_TYPE_IRQ 1
#define AIC_EVENT_TYPE_IPI 4
#define AIC_EVENT_IPI_OTHER 1
#define AIC_EVENT_IPI_SELF 2
@@ -91,34 +96,73 @@
#define AIC_IPI_SELF BIT(31)
#define AIC_TARGET_CPU 0x3000
-#define AIC_SW_SET 0x4000
-#define AIC_SW_CLR 0x4080
-#define AIC_MASK_SET 0x4100
-#define AIC_MASK_CLR 0x4180
#define AIC_CPU_IPI_SET(cpu) (0x5008 + ((cpu) << 7))
#define AIC_CPU_IPI_CLR(cpu) (0x500c + ((cpu) << 7))
#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
+#define AIC_MAX_IRQ 0x400
+
+/*
+ * AIC v2 registers (MMIO)
+ */
+
+#define AIC2_VERSION 0x0000
+#define AIC2_VERSION_VER GENMASK(7, 0)
+
+#define AIC2_INFO1 0x0004
+#define AIC2_INFO1_NR_IRQ GENMASK(15, 0)
+#define AIC2_INFO1_LAST_DIE GENMASK(27, 24)
+
+#define AIC2_INFO2 0x0008
+
+#define AIC2_INFO3 0x000c
+#define AIC2_INFO3_MAX_IRQ GENMASK(15, 0)
+#define AIC2_INFO3_MAX_DIE GENMASK(27, 24)
+
+#define AIC2_RESET 0x0010
+#define AIC2_RESET_RESET BIT(0)
+
+#define AIC2_CONFIG 0x0014
+#define AIC2_CONFIG_ENABLE BIT(0)
+#define AIC2_CONFIG_PREFER_PCPU BIT(28)
+
+#define AIC2_TIMEOUT 0x0028
+#define AIC2_CLUSTER_PRIO 0x0030
+#define AIC2_DELAY_GROUPS 0x0100
+
+#define AIC2_IRQ_CFG 0x2000
+
+/*
+ * AIC2 registers are laid out like this, starting at AIC2_IRQ_CFG:
+ *
+ * Repeat for each die:
+ * IRQ_CFG: u32 * MAX_IRQS
+ * SW_SET: u32 * (MAX_IRQS / 32)
+ * SW_CLR: u32 * (MAX_IRQS / 32)
+ * MASK_SET: u32 * (MAX_IRQS / 32)
+ * MASK_CLR: u32 * (MAX_IRQS / 32)
+ * HW_STATE: u32 * (MAX_IRQS / 32)
+ *
+ * This is followed by a set of event registers, each 16K page aligned.
+ * The first one is the AP event register we will use. Unfortunately,
+ * the actual implemented die count is not specified anywhere in the
+ * capability registers, so we have to explicitly specify the event
+ * register as a second reg entry in the device tree to remain
+ * forward-compatible.
+ */
+
+#define AIC2_IRQ_CFG_TARGET GENMASK(3, 0)
+#define AIC2_IRQ_CFG_DELAY_IDX GENMASK(7, 5)
+
#define MASK_REG(x) (4 * ((x) >> 5))
#define MASK_BIT(x) BIT((x) & GENMASK(4, 0))
/*
* IMP-DEF sysregs that control FIQ sources
- * Note: sysreg-based IPIs are not supported yet.
*/
-/* Core PMC control register */
-#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
-#define PMCR0_IMODE GENMASK(10, 8)
-#define PMCR0_IMODE_OFF 0
-#define PMCR0_IMODE_PMI 1
-#define PMCR0_IMODE_AIC 2
-#define PMCR0_IMODE_HALT 3
-#define PMCR0_IMODE_FIQ 4
-#define PMCR0_IACT BIT(11)
-
/* IPI request registers */
#define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0)
#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1)
@@ -155,7 +199,18 @@
#define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4)
#define UPMSR_IACT BIT(0)
-#define AIC_NR_FIQ 4
+/* MPIDR fields */
+#define MPIDR_CPU(x) MPIDR_AFFINITY_LEVEL(x, 0)
+#define MPIDR_CLUSTER(x) MPIDR_AFFINITY_LEVEL(x, 1)
+
+#define AIC_IRQ_HWIRQ(die, irq) (FIELD_PREP(AIC_EVENT_DIE, die) | \
+ FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \
+ FIELD_PREP(AIC_EVENT_NUM, irq))
+#define AIC_FIQ_HWIRQ(x) (FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_FIQ) | \
+ FIELD_PREP(AIC_EVENT_NUM, x))
+#define AIC_HWIRQ_IRQ(x) FIELD_GET(AIC_EVENT_NUM, x)
+#define AIC_HWIRQ_DIE(x) FIELD_GET(AIC_EVENT_DIE, x)
+#define AIC_NR_FIQ 6
#define AIC_NR_SWIPI 32
/*
@@ -173,12 +228,81 @@
#define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS
#define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT
+DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
+
+struct aic_info {
+ int version;
+
+ /* Register offsets */
+ u32 event;
+ u32 target_cpu;
+ u32 irq_cfg;
+ u32 sw_set;
+ u32 sw_clr;
+ u32 mask_set;
+ u32 mask_clr;
+
+ u32 die_stride;
+
+ /* Features */
+ bool fast_ipi;
+};
+
+static const struct aic_info aic1_info = {
+ .version = 1,
+
+ .event = AIC_EVENT,
+ .target_cpu = AIC_TARGET_CPU,
+};
+
+static const struct aic_info aic1_fipi_info = {
+ .version = 1,
+
+ .event = AIC_EVENT,
+ .target_cpu = AIC_TARGET_CPU,
+
+ .fast_ipi = true,
+};
+
+static const struct aic_info aic2_info = {
+ .version = 2,
+
+ .irq_cfg = AIC2_IRQ_CFG,
+
+ .fast_ipi = true,
+};
+
+static const struct of_device_id aic_info_match[] = {
+ {
+ .compatible = "apple,t8103-aic",
+ .data = &aic1_fipi_info,
+ },
+ {
+ .compatible = "apple,aic",
+ .data = &aic1_info,
+ },
+ {
+ .compatible = "apple,aic2",
+ .data = &aic2_info,
+ },
+ {}
+};
+
struct aic_irq_chip {
void __iomem *base;
+ void __iomem *event;
struct irq_domain *hw_domain;
struct irq_domain *ipi_domain;
- int nr_hw;
- int ipi_hwirq;
+ struct {
+ cpumask_t aff;
+ } *fiq_aff[AIC_NR_FIQ];
+
+ int nr_irq;
+ int max_irq;
+ int nr_die;
+ int max_die;
+
+ struct aic_info info;
};
static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
@@ -206,18 +330,24 @@ static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
static void aic_irq_mask(struct irq_data *d)
{
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
- aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)),
- MASK_BIT(irqd_to_hwirq(d)));
+ u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
+ u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+ aic_ic_write(ic, ic->info.mask_set + off + MASK_REG(irq), MASK_BIT(irq));
}
static void aic_irq_unmask(struct irq_data *d)
{
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
- aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
- MASK_BIT(irqd_to_hwirq(d)));
+ u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
+ u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+ aic_ic_write(ic, ic->info.mask_clr + off + MASK_REG(irq), MASK_BIT(irq));
}
static void aic_irq_eoi(struct irq_data *d)
@@ -240,12 +370,12 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
* We cannot use a relaxed read here, as reads from DMA buffers
* need to be ordered after the IRQ fires.
*/
- event = readl(ic->base + AIC_EVENT);
+ event = readl(ic->event + ic->info.event);
type = FIELD_GET(AIC_EVENT_TYPE, event);
irq = FIELD_GET(AIC_EVENT_NUM, event);
- if (type == AIC_EVENT_TYPE_HW)
- generic_handle_domain_irq(aic_irqc->hw_domain, irq);
+ if (type == AIC_EVENT_TYPE_IRQ)
+ generic_handle_domain_irq(aic_irqc->hw_domain, event);
else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
aic_handle_ipi(regs);
else if (event != 0)
@@ -272,12 +402,14 @@ static int aic_irq_set_affinity(struct irq_data *d,
struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
int cpu;
+ BUG_ON(!ic->info.target_cpu);
+
if (force)
cpu = cpumask_first(mask_val);
else
cpu = cpumask_any_and(mask_val, cpu_online_mask);
- aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
+ aic_ic_write(ic, ic->info.target_cpu + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu));
irq_data_update_effective_affinity(d, cpumask_of(cpu));
return IRQ_SET_MASK_OK;
@@ -301,15 +433,21 @@ static struct irq_chip aic_chip = {
.irq_set_type = aic_irq_set_type,
};
+static struct irq_chip aic2_chip = {
+ .name = "AIC2",
+ .irq_mask = aic_irq_mask,
+ .irq_unmask = aic_irq_unmask,
+ .irq_eoi = aic_irq_eoi,
+ .irq_set_type = aic_irq_set_type,
+};
+
/*
* FIQ irqchip
*/
static unsigned long aic_fiq_get_idx(struct irq_data *d)
{
- struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
-
- return irqd_to_hwirq(d) - ic->nr_hw;
+ return AIC_HWIRQ_IRQ(irqd_to_hwirq(d));
}
static void aic_fiq_set_mask(struct irq_data *d)
@@ -387,17 +525,21 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
*/
if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
- pr_err_ratelimited("Fast IPI fired. Acking.\n");
- write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+ if (static_branch_likely(&use_fast_ipi)) {
+ aic_handle_ipi(regs);
+ } else {
+ pr_err_ratelimited("Fast IPI fired. Acking.\n");
+ write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+ }
}
if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
generic_handle_domain_irq(aic_irqc->hw_domain,
- aic_irqc->nr_hw + AIC_TMR_EL0_PHYS);
+ AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS));
if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
generic_handle_domain_irq(aic_irqc->hw_domain,
- aic_irqc->nr_hw + AIC_TMR_EL0_VIRT);
+ AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT));
if (is_kernel_in_hyp_mode()) {
uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);
@@ -405,24 +547,23 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
if ((enabled & VM_TMR_FIQ_ENABLE_P) &&
TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
generic_handle_domain_irq(aic_irqc->hw_domain,
- aic_irqc->nr_hw + AIC_TMR_EL02_PHYS);
+ AIC_FIQ_HWIRQ(AIC_TMR_EL02_PHYS));
if ((enabled & VM_TMR_FIQ_ENABLE_V) &&
TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
generic_handle_domain_irq(aic_irqc->hw_domain,
- aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
+ AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT));
}
- if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
- (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
- /*
- * Not supported yet, let's figure out how to handle this when
- * we implement these proprietary performance counters. For now,
- * just mask it and move on.
- */
- pr_err_ratelimited("PMC FIQ fired. Masking.\n");
- sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
- FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+ if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
+ int irq;
+ if (cpumask_test_cpu(smp_processor_id(),
+ &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))
+ irq = AIC_CPU_PMU_P;
+ else
+ irq = AIC_CPU_PMU_E;
+ generic_handle_domain_irq(aic_irqc->hw_domain,
+ AIC_FIQ_HWIRQ(irq));
}
if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&
@@ -456,13 +597,29 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
irq_hw_number_t hw)
{
struct aic_irq_chip *ic = id->host_data;
+ u32 type = FIELD_GET(AIC_EVENT_TYPE, hw);
+ struct irq_chip *chip = &aic_chip;
- if (hw < ic->nr_hw) {
- irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,
+ if (ic->info.version == 2)
+ chip = &aic2_chip;
+
+ if (type == AIC_EVENT_TYPE_IRQ) {
+ irq_domain_set_info(id, irq, hw, chip, id->host_data,
handle_fasteoi_irq, NULL, NULL);
irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
} else {
- irq_set_percpu_devid(irq);
+ int fiq = FIELD_GET(AIC_EVENT_NUM, hw);
+
+ switch (fiq) {
+ case AIC_CPU_PMU_P:
+ case AIC_CPU_PMU_E:
+ irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff);
+ break;
+ default:
+ irq_set_percpu_devid(irq);
+ break;
+ }
+
irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,
handle_percpu_devid_irq, NULL, NULL);
}
@@ -476,32 +633,46 @@ static int aic_irq_domain_translate(struct irq_domain *id,
unsigned int *type)
{
struct aic_irq_chip *ic = id->host_data;
+ u32 *args;
+ u32 die = 0;
- if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode))
+ if (fwspec->param_count < 3 || fwspec->param_count > 4 ||
+ !is_of_node(fwspec->fwnode))
return -EINVAL;
+ args = &fwspec->param[1];
+
+ if (fwspec->param_count == 4) {
+ die = args[0];
+ args++;
+ }
+
switch (fwspec->param[0]) {
case AIC_IRQ:
- if (fwspec->param[1] >= ic->nr_hw)
+ if (die >= ic->nr_die)
return -EINVAL;
- *hwirq = fwspec->param[1];
+ if (args[0] >= ic->nr_irq)
+ return -EINVAL;
+ *hwirq = AIC_IRQ_HWIRQ(die, args[0]);
break;
case AIC_FIQ:
- if (fwspec->param[1] >= AIC_NR_FIQ)
+ if (die != 0)
+ return -EINVAL;
+ if (args[0] >= AIC_NR_FIQ)
return -EINVAL;
- *hwirq = ic->nr_hw + fwspec->param[1];
+ *hwirq = AIC_FIQ_HWIRQ(args[0]);
/*
* In EL1 the non-redirected registers are the guest's,
* not EL2's, so remap the hwirqs to match.
*/
if (!is_kernel_in_hyp_mode()) {
- switch (fwspec->param[1]) {
+ switch (args[0]) {
case AIC_TMR_GUEST_PHYS:
- *hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS;
+ *hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS);
break;
case AIC_TMR_GUEST_VIRT:
- *hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT;
+ *hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT);
break;
case AIC_TMR_HV_PHYS:
case AIC_TMR_HV_VIRT:
@@ -515,7 +686,7 @@ static int aic_irq_domain_translate(struct irq_domain *id,
return -EINVAL;
}
- *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+ *type = args[1] & IRQ_TYPE_SENSE_MASK;
return 0;
}
@@ -564,6 +735,22 @@ static const struct irq_domain_ops aic_irq_domain_ops = {
* IPI irqchip
*/
+static void aic_ipi_send_fast(int cpu)
+{
+ u64 mpidr = cpu_logical_map(cpu);
+ u64 my_mpidr = read_cpuid_mpidr();
+ u64 cluster = MPIDR_CLUSTER(mpidr);
+ u64 idx = MPIDR_CPU(mpidr);
+
+ if (MPIDR_CLUSTER(my_mpidr) == cluster)
+ write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx),
+ SYS_IMP_APL_IPI_RR_LOCAL_EL1);
+ else
+ write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx) | FIELD_PREP(IPI_RR_CLUSTER, cluster),
+ SYS_IMP_APL_IPI_RR_GLOBAL_EL1);
+ isb();
+}
+
static void aic_ipi_mask(struct irq_data *d)
{
u32 irq_bit = BIT(irqd_to_hwirq(d));
@@ -589,8 +776,12 @@ static void aic_ipi_unmask(struct irq_data *d)
* If a pending vIPI was unmasked, raise a HW IPI to ourselves.
* No barriers needed here since this is a self-IPI.
*/
- if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)
- aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+ if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) {
+ if (static_branch_likely(&use_fast_ipi))
+ aic_ipi_send_fast(smp_processor_id());
+ else
+ aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+ }
}
static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
@@ -618,8 +809,12 @@ static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
smp_mb__after_atomic();
if (!(pending & irq_bit) &&
- (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))
- send |= AIC_IPI_SEND_CPU(cpu);
+ (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) {
+ if (static_branch_likely(&use_fast_ipi))
+ aic_ipi_send_fast(cpu);
+ else
+ send |= AIC_IPI_SEND_CPU(cpu);
+ }
}
/*
@@ -651,8 +846,16 @@ static void aic_handle_ipi(struct pt_regs *regs)
/*
* Ack the IPI. We need to order this after the AIC event read, but
* that is enforced by normal MMIO ordering guarantees.
+ *
+ * For the Fast IPI case, this needs to be ordered before the vIPI
+ * handling below, so we need to isb();
*/
- aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+ if (static_branch_likely(&use_fast_ipi)) {
+ write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+ isb();
+ } else {
+ aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+ }
/*
* The mask read does not need to be ordered. Only we can change
@@ -680,7 +883,8 @@ static void aic_handle_ipi(struct pt_regs *regs)
* No ordering needed here; at worst this just changes the timing of
* when the next IPI will be delivered.
*/
- aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+ if (!static_branch_likely(&use_fast_ipi))
+ aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
}
static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
@@ -767,20 +971,27 @@ static int aic_init_cpu(unsigned int cpu)
/* Commit all of the above */
isb();
- /*
- * Make sure the kernel's idea of logical CPU order is the same as AIC's
- * If we ever end up with a mismatch here, we will have to introduce
- * a mapping table similar to what other irqchip drivers do.
- */
- WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
+ if (aic_irqc->info.version == 1) {
+ /*
+ * Make sure the kernel's idea of logical CPU order is the same as AIC's
+ * If we ever end up with a mismatch here, we will have to introduce
+ * a mapping table similar to what other irqchip drivers do.
+ */
+ WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
- /*
- * Always keep IPIs unmasked at the hardware level (except auto-masking
- * by AIC during processing). We manage masks at the vIPI level.
- */
- aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
- aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
- aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+ /*
+ * Always keep IPIs unmasked at the hardware level (except auto-masking
+ * by AIC during processing). We manage masks at the vIPI level.
+ * These registers only exist on AICv1, AICv2 always uses fast IPIs.
+ */
+ aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
+ if (static_branch_likely(&use_fast_ipi)) {
+ aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER);
+ } else {
+ aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
+ aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+ }
+ }
/* Initialize the local mask state */
__this_cpu_write(aic_fiq_unmasked, 0);
@@ -794,68 +1005,193 @@ static struct gic_kvm_info vgic_info __initdata = {
.no_hw_deactivation = true,
};
+static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
+{
+ int i, n;
+ u32 fiq;
+
+ if (of_property_read_u32(aff, "apple,fiq-index", &fiq) ||
+ WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq])
+ return;
+
+ n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32));
+ if (WARN_ON(n < 0))
+ return;
+
+ ic->fiq_aff[fiq] = kzalloc(sizeof(*ic->fiq_aff[fiq]), GFP_KERNEL);
+ if (!ic->fiq_aff[fiq])
+ return;
+
+ for (i = 0; i < n; i++) {
+ struct device_node *cpu_node;
+ u32 cpu_phandle;
+ int cpu;
+
+ if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle))
+ continue;
+
+ cpu_node = of_find_node_by_phandle(cpu_phandle);
+ if (WARN_ON(!cpu_node))
+ continue;
+
+ cpu = of_cpu_node_to_id(cpu_node);
+ if (WARN_ON(cpu < 0))
+ continue;
+
+ cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff);
+ }
+}
+
static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
{
- int i;
+ int i, die;
+ u32 off, start_off;
void __iomem *regs;
- u32 info;
struct aic_irq_chip *irqc;
+ struct device_node *affs;
+ const struct of_device_id *match;
regs = of_iomap(node, 0);
if (WARN_ON(!regs))
return -EIO;
irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
- if (!irqc)
+ if (!irqc) {
+ iounmap(regs);
return -ENOMEM;
+ }
- aic_irqc = irqc;
irqc->base = regs;
- info = aic_ic_read(irqc, AIC_INFO);
- irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
+ match = of_match_node(aic_info_match, node);
+ if (!match)
+ goto err_unmap;
- irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),
- irqc->nr_hw + AIC_NR_FIQ,
- &aic_irq_domain_ops, irqc);
- if (WARN_ON(!irqc->hw_domain)) {
- iounmap(irqc->base);
- kfree(irqc);
- return -ENODEV;
+ irqc->info = *(struct aic_info *)match->data;
+
+ aic_irqc = irqc;
+
+ switch (irqc->info.version) {
+ case 1: {
+ u32 info;
+
+ info = aic_ic_read(irqc, AIC_INFO);
+ irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
+ irqc->max_irq = AIC_MAX_IRQ;
+ irqc->nr_die = irqc->max_die = 1;
+
+ off = start_off = irqc->info.target_cpu;
+ off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */
+
+ irqc->event = irqc->base;
+
+ break;
}
+ case 2: {
+ u32 info1, info3;
+
+ info1 = aic_ic_read(irqc, AIC2_INFO1);
+ info3 = aic_ic_read(irqc, AIC2_INFO3);
+
+ irqc->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1);
+ irqc->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3);
+ irqc->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1;
+ irqc->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3);
+
+ off = start_off = irqc->info.irq_cfg;
+ off += sizeof(u32) * irqc->max_irq; /* IRQ_CFG */
+
+ irqc->event = of_iomap(node, 1);
+ if (WARN_ON(!irqc->event))
+ goto err_unmap;
+
+ break;
+ }
+ }
+
+ irqc->info.sw_set = off;
+ off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_SET */
+ irqc->info.sw_clr = off;
+ off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_CLR */
+ irqc->info.mask_set = off;
+ off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_SET */
+ irqc->info.mask_clr = off;
+ off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_CLR */
+ off += sizeof(u32) * (irqc->max_irq >> 5); /* HW_STATE */
+
+ if (irqc->info.fast_ipi)
+ static_branch_enable(&use_fast_ipi);
+ else
+ static_branch_disable(&use_fast_ipi);
+
+ irqc->info.die_stride = off - start_off;
+
+ irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
+ &aic_irq_domain_ops, irqc);
+ if (WARN_ON(!irqc->hw_domain))
+ goto err_unmap;
irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
- if (aic_init_smp(irqc, node)) {
- irq_domain_remove(irqc->hw_domain);
- iounmap(irqc->base);
- kfree(irqc);
- return -ENODEV;
+ if (aic_init_smp(irqc, node))
+ goto err_remove_domain;
+
+ affs = of_get_child_by_name(node, "affinities");
+ if (affs) {
+ struct device_node *chld;
+
+ for_each_child_of_node(affs, chld)
+ build_fiq_affinity(irqc, chld);
}
set_handle_irq(aic_handle_irq);
set_handle_fiq(aic_handle_fiq);
- for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
- aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);
- for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
- aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);
- for (i = 0; i < irqc->nr_hw; i++)
- aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
+ off = 0;
+ for (die = 0; die < irqc->nr_die; die++) {
+ for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+ aic_ic_write(irqc, irqc->info.mask_set + off + i * 4, U32_MAX);
+ for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+ aic_ic_write(irqc, irqc->info.sw_clr + off + i * 4, U32_MAX);
+ if (irqc->info.target_cpu)
+ for (i = 0; i < irqc->nr_irq; i++)
+ aic_ic_write(irqc, irqc->info.target_cpu + off + i * 4, 1);
+ off += irqc->info.die_stride;
+ }
+
+ if (irqc->info.version == 2) {
+ u32 config = aic_ic_read(irqc, AIC2_CONFIG);
+
+ config |= AIC2_CONFIG_ENABLE;
+ aic_ic_write(irqc, AIC2_CONFIG, config);
+ }
if (!is_kernel_in_hyp_mode())
pr_info("Kernel running in EL1, mapping interrupts");
+ if (static_branch_likely(&use_fast_ipi))
+ pr_info("Using Fast IPIs");
+
cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
"irqchip/apple-aic/ipi:starting",
aic_init_cpu, NULL);
vgic_set_kvm_info(&vgic_info);
- pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
- irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
+ pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs",
+ irqc->nr_irq, irqc->max_irq, irqc->nr_die, irqc->max_die, AIC_NR_FIQ, AIC_NR_SWIPI);
return 0;
+
+err_remove_domain:
+ irq_domain_remove(irqc->hw_domain);
+err_unmap:
+ if (irqc->event && irqc->event != irqc->base)
+ iounmap(irqc->event);
+ iounmap(irqc->base);
+ kfree(irqc);
+ return -ENODEV;
}
-IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic2, "apple,aic2", aic_of_ic_init);
diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index 5cc268880f8e..46a3aa60e50e 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -11,7 +11,6 @@
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/irqchip.h>
-#include <linux/irqchip/versatile-fpga.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/of.h>
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d25b7a864bbb..cd772973114a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4856,6 +4856,38 @@ static struct syscore_ops its_syscore_ops = {
.resume = its_restore_enable,
};
+static void __init __iomem *its_map_one(struct resource *res, int *err)
+{
+ void __iomem *its_base;
+ u32 val;
+
+ its_base = ioremap(res->start, SZ_64K);
+ if (!its_base) {
+ pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK;
+ if (val != 0x30 && val != 0x40) {
+ pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start);
+ *err = -ENODEV;
+ goto out_unmap;
+ }
+
+ *err = its_force_quiescent(its_base);
+ if (*err) {
+ pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start);
+ goto out_unmap;
+ }
+
+ return its_base;
+
+out_unmap:
+ iounmap(its_base);
+ return NULL;
+}
+
static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
{
struct irq_domain *inner_domain;
@@ -4963,29 +4995,14 @@ static int __init its_probe_one(struct resource *res,
{
struct its_node *its;
void __iomem *its_base;
- u32 val, ctlr;
u64 baser, tmp, typer;
struct page *page;
+ u32 ctlr;
int err;
- its_base = ioremap(res->start, SZ_64K);
- if (!its_base) {
- pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
- return -ENOMEM;
- }
-
- val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK;
- if (val != 0x30 && val != 0x40) {
- pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start);
- err = -ENODEV;
- goto out_unmap;
- }
-
- err = its_force_quiescent(its_base);
- if (err) {
- pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start);
- goto out_unmap;
- }
+ its_base = its_map_one(res, &err);
+ if (!its_base)
+ return err;
pr_info("ITS %pR\n", res);
@@ -5241,13 +5258,31 @@ static int its_cpu_memreserve_lpi(unsigned int cpu)
out:
/* Last CPU being brought up gets to issue the cleanup */
- if (cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask))
+ if (!IS_ENABLED(CONFIG_SMP) ||
+ cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask))
schedule_work(&rdist_memreserve_cpuhp_cleanup_work);
gic_data_rdist()->flags |= RD_LOCAL_MEMRESERVE_DONE;
return ret;
}
+/* Mark all the BASER registers as invalid before they get reprogrammed */
+static int __init its_reset_one(struct resource *res)
+{
+ void __iomem *its_base;
+ int err, i;
+
+ its_base = its_map_one(res, &err);
+ if (!its_base)
+ return err;
+
+ for (i = 0; i < GITS_BASER_NR_REGS; i++)
+ gits_write_baser(0, its_base + GITS_BASER + (i << 3));
+
+ iounmap(its_base);
+ return 0;
+}
+
static const struct of_device_id its_device_id[] = {
{ .compatible = "arm,gic-v3-its", },
{},
@@ -5258,6 +5293,26 @@ static int __init its_of_probe(struct device_node *node)
struct device_node *np;
struct resource res;
+ /*
+ * Make sure *all* the ITS are reset before we probe any, as
+ * they may be sharing memory. If any of the ITS fails to
+ * reset, don't even try to go any further, as this could
+ * result in something even worse.
+ */
+ for (np = of_find_matching_node(node, its_device_id); np;
+ np = of_find_matching_node(np, its_device_id)) {
+ int err;
+
+ if (!of_device_is_available(np) ||
+ !of_property_read_bool(np, "msi-controller") ||
+ of_address_to_resource(np, 0, &res))
+ continue;
+
+ err = its_reset_one(&res);
+ if (err)
+ return err;
+ }
+
for (np = of_find_matching_node(node, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
if (!of_device_is_available(np))
@@ -5420,11 +5475,35 @@ dom_err:
return err;
}
+static int __init its_acpi_reset(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_generic_translator *its_entry;
+ struct resource res;
+
+ its_entry = (struct acpi_madt_generic_translator *)header;
+ res = (struct resource) {
+ .start = its_entry->base_address,
+ .end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1,
+ .flags = IORESOURCE_MEM,
+ };
+
+ return its_reset_one(&res);
+}
+
static void __init its_acpi_probe(void)
{
acpi_table_parse_srat_its();
- acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
- gic_acpi_parse_madt_its, 0);
+ /*
+ * Make sure *all* the ITS are reset before we probe any, as
+ * they may be sharing memory. If any of the ITS fails to
+ * reset, don't even try to go any further, as this could
+ * result in something even worse.
+ */
+ if (acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+ its_acpi_reset, 0) > 0)
+ acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+ gic_acpi_parse_madt_its, 0);
acpi_its_srat_maps_free();
}
#else
@@ -5438,6 +5517,9 @@ int __init its_lpi_memreserve_init(void)
if (!efi_enabled(EFI_CONFIG_TABLES))
return 0;
+ if (list_empty(&its_nodes))
+ return 0;
+
gic_rdists->cpuhp_memreserve_state = CPUHP_INVALID;
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"irqchip/arm/gicv3/memreserve:online",
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5e935d97207d..0efe1a9a9f3b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI.
*/
- wmb();
+ dsb(ishst);
for_each_cpu(cpu, mask) {
u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b8bb46c65a97..58ba835bee1f 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -34,6 +34,7 @@
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
@@ -66,7 +67,6 @@ union gic_base {
};
struct gic_chip_data {
- struct irq_chip chip;
union gic_base dist_base;
union gic_base cpu_base;
void __iomem *raw_dist_base;
@@ -397,18 +397,15 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-static const struct irq_chip gic_chip = {
- .irq_mask = gic_mask_irq,
- .irq_unmask = gic_unmask_irq,
- .irq_eoi = gic_eoi_irq,
- .irq_set_type = gic_set_type,
- .irq_retrigger = gic_retrigger,
- .irq_get_irqchip_state = gic_irq_get_irqchip_state,
- .irq_set_irqchip_state = gic_irq_set_irqchip_state,
- .flags = IRQCHIP_SET_TYPE_MASKED |
- IRQCHIP_SKIP_SET_WAKE |
- IRQCHIP_MASK_ON_SUSPEND,
-};
+static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
+
+ if (gic->domain->dev)
+ seq_printf(p, gic->domain->dev->of_node->name);
+ else
+ seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0]));
+}
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
@@ -799,8 +796,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
{
void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
+ struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
unsigned int cpu;
+ if (unlikely(gic != &gic_data[0]))
+ return -EINVAL;
+
if (!force)
cpu = cpumask_any_and(mask_val, cpu_online_mask);
else
@@ -880,6 +881,39 @@ static __init void gic_smp_init(void)
#define gic_ipi_send_mask NULL
#endif
+static const struct irq_chip gic_chip = {
+ .irq_mask = gic_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoi_irq,
+ .irq_set_type = gic_set_type,
+ .irq_retrigger = gic_retrigger,
+ .irq_set_affinity = gic_set_affinity,
+ .ipi_send_mask = gic_ipi_send_mask,
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_print_chip = gic_irq_print_chip,
+ .flags = IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gic_chip_mode1 = {
+ .name = "GICv2",
+ .irq_mask = gic_eoimode1_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoimode1_eoi_irq,
+ .irq_set_type = gic_set_type,
+ .irq_retrigger = gic_retrigger,
+ .irq_set_affinity = gic_set_affinity,
+ .ipi_send_mask = gic_ipi_send_mask,
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity,
+ .flags = IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
#ifdef CONFIG_BL_SWITCHER
/*
* gic_send_sgi - send a SGI directly to given CPU interface number
@@ -1024,15 +1058,19 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
{
struct gic_chip_data *gic = d->host_data;
struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq));
+ const struct irq_chip *chip;
+
+ chip = (static_branch_likely(&supports_deactivate_key) &&
+ gic == &gic_data[0]) ? &gic_chip_mode1 : &gic_chip;
switch (hw) {
case 0 ... 31:
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
break;
default:
- irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_probe(irq);
irqd_set_single_target(irqd);
@@ -1127,26 +1165,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
.unmap = gic_irq_domain_unmap,
};
-static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
- const char *name, bool use_eoimode1)
-{
- /* Initialize irq_chip */
- gic->chip = gic_chip;
- gic->chip.name = name;
- gic->chip.parent_device = dev;
-
- if (use_eoimode1) {
- gic->chip.irq_mask = gic_eoimode1_mask_irq;
- gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
- gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
- }
-
- if (gic == &gic_data[0]) {
- gic->chip.irq_set_affinity = gic_set_affinity;
- gic->chip.ipi_send_mask = gic_ipi_send_mask;
- }
-}
-
static int gic_init_bases(struct gic_chip_data *gic,
struct fwnode_handle *handle)
{
@@ -1246,7 +1264,6 @@ error:
static int __init __gic_init_bases(struct gic_chip_data *gic,
struct fwnode_handle *handle)
{
- char *name;
int i, ret;
if (WARN_ON(!gic || gic->domain))
@@ -1266,18 +1283,8 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
pr_info("GIC: Using split EOI/Deactivate mode\n");
}
- if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
- name = kasprintf(GFP_KERNEL, "GICv2");
- gic_init_chip(gic, NULL, name, true);
- } else {
- name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
- gic_init_chip(gic, NULL, name, false);
- }
-
ret = gic_init_bases(gic, handle);
- if (ret)
- kfree(name);
- else if (gic == &gic_data[0])
+ if (gic == &gic_data[0])
gic_smp_init();
return ret;
@@ -1460,8 +1467,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
if (!*gic)
return -ENOMEM;
- gic_init_chip(*gic, dev, dev->of_node->name, false);
-
ret = gic_of_setup(*gic, dev->of_node);
if (ret)
return ret;
@@ -1472,6 +1477,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
return ret;
}
+ irq_domain_set_pm_device((*gic)->domain, dev);
irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
return 0;
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index e86ff743e98c..80aaea82468a 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -61,7 +61,6 @@
#define CHAN_MAX_NUM 0x8
struct intmux_irqchip_data {
- struct irq_chip chip;
u32 saved_reg;
int chanidx;
int irq;
@@ -114,7 +113,7 @@ static void imx_intmux_irq_unmask(struct irq_data *d)
raw_spin_unlock_irqrestore(&data->lock, flags);
}
-static struct irq_chip imx_intmux_irq_chip = {
+static struct irq_chip imx_intmux_irq_chip __ro_after_init = {
.name = "intmux",
.irq_mask = imx_intmux_irq_mask,
.irq_unmask = imx_intmux_irq_unmask,
@@ -126,7 +125,7 @@ static int imx_intmux_irq_map(struct irq_domain *h, unsigned int irq,
struct intmux_irqchip_data *data = h->host_data;
irq_set_chip_data(irq, data);
- irq_set_chip_and_handler(irq, &data->chip, handle_level_irq);
+ irq_set_chip_and_handler(irq, &imx_intmux_irq_chip, handle_level_irq);
return 0;
}
@@ -241,8 +240,6 @@ static int imx_intmux_probe(struct platform_device *pdev)
}
for (i = 0; i < channum; i++) {
- data->irqchip_data[i].chip = imx_intmux_irq_chip;
- data->irqchip_data[i].chip.parent_device = &pdev->dev;
data->irqchip_data[i].chanidx = i;
data->irqchip_data[i].irq = irq_of_parse_and_map(np, i);
@@ -260,6 +257,7 @@ static int imx_intmux_probe(struct platform_device *pdev)
goto out;
}
data->irqchip_data[i].domain = domain;
+ irq_domain_set_pm_device(domain, &pdev->dev);
/* disable all interrupt sources of this channel firstly */
writel_relaxed(0, data->regs + CHANIER(i));
diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index 32562b7e681b..e3801c4a77ed 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -241,7 +241,7 @@ static int pch_msi_init(struct device_node *node,
return 0;
err_map:
- kfree(priv->msi_map);
+ bitmap_free(priv->msi_map);
err_priv:
kfree(priv);
return ret;
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index a29357f39450..4d70a857133f 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -11,6 +11,7 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <asm/exception.h>
@@ -25,8 +26,8 @@
struct lpc32xx_irq_chip {
void __iomem *base;
+ phys_addr_t addr;
struct irq_domain *domain;
- struct irq_chip chip;
};
static struct lpc32xx_irq_chip *lpc32xx_mic_irqc;
@@ -118,6 +119,24 @@ static int lpc32xx_irq_set_type(struct irq_data *d, unsigned int type)
return 0;
}
+static void lpc32xx_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct lpc32xx_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+ if (ic == lpc32xx_mic_irqc)
+ seq_printf(p, "%08x.mic", ic->addr);
+ else
+ seq_printf(p, "%08x.sic", ic->addr);
+}
+
+static const struct irq_chip lpc32xx_chip = {
+ .irq_ack = lpc32xx_irq_ack,
+ .irq_mask = lpc32xx_irq_mask,
+ .irq_unmask = lpc32xx_irq_unmask,
+ .irq_set_type = lpc32xx_irq_set_type,
+ .irq_print_chip = lpc32xx_irq_print_chip,
+};
+
static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs)
{
struct lpc32xx_irq_chip *ic = lpc32xx_mic_irqc;
@@ -153,7 +172,7 @@ static int lpc32xx_irq_domain_map(struct irq_domain *id, unsigned int virq,
struct lpc32xx_irq_chip *ic = id->host_data;
irq_set_chip_data(virq, ic);
- irq_set_chip_and_handler(virq, &ic->chip, handle_level_irq);
+ irq_set_chip_and_handler(virq, &lpc32xx_chip, handle_level_irq);
irq_set_status_flags(virq, IRQ_LEVEL);
irq_set_noprobe(virq);
@@ -183,6 +202,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
if (!irqc)
return -ENOMEM;
+ irqc->addr = addr;
irqc->base = of_iomap(node, 0);
if (!irqc->base) {
pr_err("%pOF: unable to map registers\n", node);
@@ -190,21 +210,11 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
return -EINVAL;
}
- irqc->chip.irq_ack = lpc32xx_irq_ack;
- irqc->chip.irq_mask = lpc32xx_irq_mask;
- irqc->chip.irq_unmask = lpc32xx_irq_unmask;
- irqc->chip.irq_set_type = lpc32xx_irq_set_type;
- if (is_mic)
- irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.mic", addr);
- else
- irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.sic", addr);
-
irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS,
&lpc32xx_irq_domain_ops, irqc);
if (!irqc->domain) {
pr_err("unable to add irq domain\n");
iounmap(irqc->base);
- kfree(irqc->chip.name);
kfree(irqc);
return -ENODEV;
}
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index d90ff0b92480..2aaa9aad3e87 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -16,7 +16,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
-#define NUM_CHANNEL 8
+#define MAX_NUM_CHANNEL 64
#define MAX_INPUT_MUX 256
#define REG_EDGE_POL 0x00
@@ -26,6 +26,8 @@
/* use for A1 like chips */
#define REG_PIN_A1_SEL 0x04
+/* Used for s4 chips */
+#define REG_EDGE_POL_S4 0x1c
/*
* Note: The S905X3 datasheet reports that BOTH_EDGE is controlled by
@@ -51,15 +53,22 @@ static void meson_a1_gpio_irq_sel_pin(struct meson_gpio_irq_controller *ctl,
unsigned int channel,
unsigned long hwirq);
static void meson_a1_gpio_irq_init(struct meson_gpio_irq_controller *ctl);
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+ unsigned int type, u32 *channel_hwirq);
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+ unsigned int type, u32 *channel_hwirq);
struct irq_ctl_ops {
void (*gpio_irq_sel_pin)(struct meson_gpio_irq_controller *ctl,
unsigned int channel, unsigned long hwirq);
void (*gpio_irq_init)(struct meson_gpio_irq_controller *ctl);
+ int (*gpio_irq_set_type)(struct meson_gpio_irq_controller *ctl,
+ unsigned int type, u32 *channel_hwirq);
};
struct meson_gpio_irq_params {
unsigned int nr_hwirq;
+ unsigned int nr_channels;
bool support_edge_both;
unsigned int edge_both_offset;
unsigned int edge_single_offset;
@@ -68,28 +77,44 @@ struct meson_gpio_irq_params {
struct irq_ctl_ops ops;
};
-#define INIT_MESON_COMMON(irqs, init, sel) \
+#define INIT_MESON_COMMON(irqs, init, sel, type) \
.nr_hwirq = irqs, \
.ops = { \
.gpio_irq_init = init, \
.gpio_irq_sel_pin = sel, \
+ .gpio_irq_set_type = type, \
},
#define INIT_MESON8_COMMON_DATA(irqs) \
INIT_MESON_COMMON(irqs, meson_gpio_irq_init_dummy, \
- meson8_gpio_irq_sel_pin) \
+ meson8_gpio_irq_sel_pin, \
+ meson8_gpio_irq_set_type) \
.edge_single_offset = 0, \
.pol_low_offset = 16, \
.pin_sel_mask = 0xff, \
+ .nr_channels = 8, \
#define INIT_MESON_A1_COMMON_DATA(irqs) \
INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init, \
- meson_a1_gpio_irq_sel_pin) \
+ meson_a1_gpio_irq_sel_pin, \
+ meson8_gpio_irq_set_type) \
.support_edge_both = true, \
.edge_both_offset = 16, \
.edge_single_offset = 8, \
.pol_low_offset = 0, \
.pin_sel_mask = 0x7f, \
+ .nr_channels = 8, \
+
+#define INIT_MESON_S4_COMMON_DATA(irqs) \
+ INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init, \
+ meson_a1_gpio_irq_sel_pin, \
+ meson_s4_gpio_irq_set_type) \
+ .support_edge_both = true, \
+ .edge_both_offset = 0, \
+ .edge_single_offset = 12, \
+ .pol_low_offset = 0, \
+ .pin_sel_mask = 0xff, \
+ .nr_channels = 12, \
static const struct meson_gpio_irq_params meson8_params = {
INIT_MESON8_COMMON_DATA(134)
@@ -121,6 +146,10 @@ static const struct meson_gpio_irq_params a1_params = {
INIT_MESON_A1_COMMON_DATA(62)
};
+static const struct meson_gpio_irq_params s4_params = {
+ INIT_MESON_S4_COMMON_DATA(82)
+};
+
static const struct of_device_id meson_irq_gpio_matches[] = {
{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -130,14 +159,15 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
{ .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
{ .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params },
{ .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
+ { .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
{ }
};
struct meson_gpio_irq_controller {
const struct meson_gpio_irq_params *params;
void __iomem *base;
- u32 channel_irqs[NUM_CHANNEL];
- DECLARE_BITMAP(channel_map, NUM_CHANNEL);
+ u32 channel_irqs[MAX_NUM_CHANNEL];
+ DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL);
spinlock_t lock;
};
@@ -207,8 +237,8 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl,
spin_lock_irqsave(&ctl->lock, flags);
/* Find a free channel */
- idx = find_first_zero_bit(ctl->channel_map, NUM_CHANNEL);
- if (idx >= NUM_CHANNEL) {
+ idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels);
+ if (idx >= ctl->params->nr_channels) {
spin_unlock_irqrestore(&ctl->lock, flags);
pr_err("No channel available\n");
return -ENOSPC;
@@ -256,9 +286,8 @@ meson_gpio_irq_release_channel(struct meson_gpio_irq_controller *ctl,
clear_bit(idx, ctl->channel_map);
}
-static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl,
- unsigned int type,
- u32 *channel_hwirq)
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+ unsigned int type, u32 *channel_hwirq)
{
u32 val = 0;
unsigned int idx;
@@ -299,6 +328,51 @@ static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl,
return 0;
}
+/*
+ * gpio irq relative registers for s4
+ * -PADCTRL_GPIO_IRQ_CTRL0
+ * bit[31]: enable/disable all the irq lines
+ * bit[12-23]: single edge trigger
+ * bit[0-11]: polarity trigger
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[X]
+ * bit[0-16]: 7 bits to choose gpio source for irq line 2*[X] - 2
+ * bit[16-22]:7 bits to choose gpio source for irq line 2*[X] - 1
+ * where X = 1-6
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[7]
+ * bit[0-11]: both edge trigger
+ */
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+ unsigned int type, u32 *channel_hwirq)
+{
+ u32 val = 0;
+ unsigned int idx;
+
+ idx = meson_gpio_irq_get_channel_idx(ctl, channel_hwirq);
+
+ type &= IRQ_TYPE_SENSE_MASK;
+
+ meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4, BIT(idx), 0);
+
+ if (type == IRQ_TYPE_EDGE_BOTH) {
+ val |= BIT(ctl->params->edge_both_offset + idx);
+ meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4,
+ BIT(ctl->params->edge_both_offset + idx), val);
+ return 0;
+ }
+
+ if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING))
+ val |= BIT(ctl->params->pol_low_offset + idx);
+
+ if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))
+ val |= BIT(ctl->params->edge_single_offset + idx);
+
+ meson_gpio_irq_update_bits(ctl, REG_EDGE_POL,
+ BIT(idx) | BIT(12 + idx), val);
+ return 0;
+};
+
static unsigned int meson_gpio_irq_type_output(unsigned int type)
{
unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
@@ -323,7 +397,7 @@ static int meson_gpio_irq_set_type(struct irq_data *data, unsigned int type)
u32 *channel_hwirq = irq_data_get_irq_chip_data(data);
int ret;
- ret = meson_gpio_irq_type_setup(ctl, type, channel_hwirq);
+ ret = ctl->params->ops.gpio_irq_set_type(ctl, type, channel_hwirq);
if (ret)
return ret;
@@ -450,10 +524,10 @@ static int meson_gpio_irq_parse_dt(struct device_node *node, struct meson_gpio_i
ret = of_property_read_variable_u32_array(node,
"amlogic,channel-interrupts",
ctl->channel_irqs,
- NUM_CHANNEL,
- NUM_CHANNEL);
+ ctl->params->nr_channels,
+ ctl->params->nr_channels);
if (ret < 0) {
- pr_err("can't get %d channel interrupts\n", NUM_CHANNEL);
+ pr_err("can't get %d channel interrupts\n", ctl->params->nr_channels);
return ret;
}
@@ -507,7 +581,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node *
}
pr_info("%d to %d gpio interrupt mux initialized\n",
- ctl->params->nr_hwirq, NUM_CHANNEL);
+ ctl->params->nr_hwirq, ctl->params->nr_channels);
return 0;
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index 870f9866b8da..ef3d3646ccc2 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/of_irq.h>
#include <linux/platform_device.h>
+#include <linux/seq_file.h>
#define PIC_CAUSE 0x0
#define PIC_MASK 0x4
@@ -29,7 +30,7 @@ struct mvebu_pic {
void __iomem *base;
u32 parent_irq;
struct irq_domain *domain;
- struct irq_chip irq_chip;
+ struct platform_device *pdev;
};
static void mvebu_pic_reset(struct mvebu_pic *pic)
@@ -66,6 +67,20 @@ static void mvebu_pic_unmask_irq(struct irq_data *d)
writel(reg, pic->base + PIC_MASK);
}
+static void mvebu_pic_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+
+ seq_printf(p, dev_name(&pic->pdev->dev));
+}
+
+static const struct irq_chip mvebu_pic_chip = {
+ .irq_mask = mvebu_pic_mask_irq,
+ .irq_unmask = mvebu_pic_unmask_irq,
+ .irq_eoi = mvebu_pic_eoi_irq,
+ .irq_print_chip = mvebu_pic_print_chip,
+};
+
static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq)
{
@@ -73,8 +88,7 @@ static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
irq_set_percpu_devid(virq);
irq_set_chip_data(virq, pic);
- irq_set_chip_and_handler(virq, &pic->irq_chip,
- handle_percpu_devid_irq);
+ irq_set_chip_and_handler(virq, &mvebu_pic_chip, handle_percpu_devid_irq);
irq_set_status_flags(virq, IRQ_LEVEL);
irq_set_probe(virq);
@@ -120,22 +134,16 @@ static int mvebu_pic_probe(struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct mvebu_pic *pic;
- struct irq_chip *irq_chip;
pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL);
if (!pic)
return -ENOMEM;
+ pic->pdev = pdev;
pic->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(pic->base))
return PTR_ERR(pic->base);
- irq_chip = &pic->irq_chip;
- irq_chip->name = dev_name(&pdev->dev);
- irq_chip->irq_mask = mvebu_pic_mask_irq;
- irq_chip->irq_unmask = mvebu_pic_unmask_irq;
- irq_chip->irq_eoi = mvebu_pic_eoi_irq;
-
pic->parent_irq = irq_of_parse_and_map(node, 0);
if (pic->parent_irq <= 0) {
dev_err(&pdev->dev, "Failed to parse parent interrupt\n");
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index ba4759b3e269..94230306e0ee 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node,
if (!nvic_irq_domain) {
pr_warn("Failed to allocate irq domain\n");
+ iounmap(nvic_base);
return -ENOMEM;
}
@@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node,
if (ret) {
pr_warn("Failed to allocate irq chips\n");
irq_domain_remove(nvic_irq_domain);
+ iounmap(nvic_base);
return ret;
}
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
new file mode 100644
index 000000000000..eea5a753618c
--- /dev/null
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Linaro Limited
+ * Copyright (c) 2010-2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/irq.h>
+#include <linux/spinlock.h>
+
+/*
+ * This is the driver for Qualcomm MPM (MSM Power Manager) interrupt controller,
+ * which is commonly found on Qualcomm SoCs built on the RPM architecture.
+ * Sitting in always-on domain, MPM monitors the wakeup interrupts when SoC is
+ * asleep, and wakes up the AP when one of those interrupts occurs. This driver
+ * doesn't directly access physical MPM registers though. Instead, the access
+ * is bridged via a piece of internal memory (SRAM) that is accessible to both
+ * AP and RPM. This piece of memory is called 'vMPM' in the driver.
+ *
+ * When SoC is awake, the vMPM is owned by AP and the register setup by this
+ * driver all happens on vMPM. When AP is about to get power collapsed, the
+ * driver sends a mailbox notification to RPM, which will take over the vMPM
+ * ownership and dump vMPM into physical MPM registers. On wakeup, AP is woken
+ * up by a MPM pin/interrupt, and RPM will copy STATUS registers into vMPM.
+ * Then AP start owning vMPM again.
+ *
+ * vMPM register map:
+ *
+ * 31 0
+ * +--------------------------------+
+ * | TIMER0 | 0x00
+ * +--------------------------------+
+ * | TIMER1 | 0x04
+ * +--------------------------------+
+ * | ENABLE0 | 0x08
+ * +--------------------------------+
+ * | ... | ...
+ * +--------------------------------+
+ * | ENABLEn |
+ * +--------------------------------+
+ * | FALLING_EDGE0 |
+ * +--------------------------------+
+ * | ... |
+ * +--------------------------------+
+ * | STATUSn |
+ * +--------------------------------+
+ *
+ * n = DIV_ROUND_UP(pin_cnt, 32)
+ *
+ */
+
+#define MPM_REG_ENABLE 0
+#define MPM_REG_FALLING_EDGE 1
+#define MPM_REG_RISING_EDGE 2
+#define MPM_REG_POLARITY 3
+#define MPM_REG_STATUS 4
+
+/* MPM pin map to GIC hwirq */
+struct mpm_gic_map {
+ int pin;
+ irq_hw_number_t hwirq;
+};
+
+struct qcom_mpm_priv {
+ void __iomem *base;
+ raw_spinlock_t lock;
+ struct mbox_client mbox_client;
+ struct mbox_chan *mbox_chan;
+ struct mpm_gic_map *maps;
+ unsigned int map_cnt;
+ unsigned int reg_stride;
+ struct irq_domain *domain;
+ struct generic_pm_domain genpd;
+};
+
+static u32 qcom_mpm_read(struct qcom_mpm_priv *priv, unsigned int reg,
+ unsigned int index)
+{
+ unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+ return readl_relaxed(priv->base + offset);
+}
+
+static void qcom_mpm_write(struct qcom_mpm_priv *priv, unsigned int reg,
+ unsigned int index, u32 val)
+{
+ unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+ writel_relaxed(val, priv->base + offset);
+
+ /* Ensure the write is completed */
+ wmb();
+}
+
+static void qcom_mpm_enable_irq(struct irq_data *d, bool en)
+{
+ struct qcom_mpm_priv *priv = d->chip_data;
+ int pin = d->hwirq;
+ unsigned int index = pin / 32;
+ unsigned int shift = pin % 32;
+ unsigned long flags, val;
+
+ raw_spin_lock_irqsave(&priv->lock, flags);
+
+ val = qcom_mpm_read(priv, MPM_REG_ENABLE, index);
+ __assign_bit(shift, &val, en);
+ qcom_mpm_write(priv, MPM_REG_ENABLE, index, val);
+
+ raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void qcom_mpm_mask(struct irq_data *d)
+{
+ qcom_mpm_enable_irq(d, false);
+
+ if (d->parent_data)
+ irq_chip_mask_parent(d);
+}
+
+static void qcom_mpm_unmask(struct irq_data *d)
+{
+ qcom_mpm_enable_irq(d, true);
+
+ if (d->parent_data)
+ irq_chip_unmask_parent(d);
+}
+
+static void mpm_set_type(struct qcom_mpm_priv *priv, bool set, unsigned int reg,
+ unsigned int index, unsigned int shift)
+{
+ unsigned long flags, val;
+
+ raw_spin_lock_irqsave(&priv->lock, flags);
+
+ val = qcom_mpm_read(priv, reg, index);
+ __assign_bit(shift, &val, set);
+ qcom_mpm_write(priv, reg, index, val);
+
+ raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int qcom_mpm_set_type(struct irq_data *d, unsigned int type)
+{
+ struct qcom_mpm_priv *priv = d->chip_data;
+ int pin = d->hwirq;
+ unsigned int index = pin / 32;
+ unsigned int shift = pin % 32;
+
+ if (type & IRQ_TYPE_EDGE_RISING)
+ mpm_set_type(priv, true, MPM_REG_RISING_EDGE, index, shift);
+ else
+ mpm_set_type(priv, false, MPM_REG_RISING_EDGE, index, shift);
+
+ if (type & IRQ_TYPE_EDGE_FALLING)
+ mpm_set_type(priv, true, MPM_REG_FALLING_EDGE, index, shift);
+ else
+ mpm_set_type(priv, false, MPM_REG_FALLING_EDGE, index, shift);
+
+ if (type & IRQ_TYPE_LEVEL_HIGH)
+ mpm_set_type(priv, true, MPM_REG_POLARITY, index, shift);
+ else
+ mpm_set_type(priv, false, MPM_REG_POLARITY, index, shift);
+
+ if (!d->parent_data)
+ return 0;
+
+ if (type & IRQ_TYPE_EDGE_BOTH)
+ type = IRQ_TYPE_EDGE_RISING;
+
+ if (type & IRQ_TYPE_LEVEL_MASK)
+ type = IRQ_TYPE_LEVEL_HIGH;
+
+ return irq_chip_set_type_parent(d, type);
+}
+
+static struct irq_chip qcom_mpm_chip = {
+ .name = "mpm",
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_mask = qcom_mpm_mask,
+ .irq_unmask = qcom_mpm_unmask,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_type = qcom_mpm_set_type,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .flags = IRQCHIP_MASK_ON_SUSPEND |
+ IRQCHIP_SKIP_SET_WAKE,
+};
+
+static struct mpm_gic_map *get_mpm_gic_map(struct qcom_mpm_priv *priv, int pin)
+{
+ struct mpm_gic_map *maps = priv->maps;
+ int i;
+
+ for (i = 0; i < priv->map_cnt; i++) {
+ if (maps[i].pin == pin)
+ return &maps[i];
+ }
+
+ return NULL;
+}
+
+static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *data)
+{
+ struct qcom_mpm_priv *priv = domain->host_data;
+ struct irq_fwspec *fwspec = data;
+ struct irq_fwspec parent_fwspec;
+ struct mpm_gic_map *map;
+ irq_hw_number_t pin;
+ unsigned int type;
+ int ret;
+
+ ret = irq_domain_translate_twocell(domain, fwspec, &pin, &type);
+ if (ret)
+ return ret;
+
+ ret = irq_domain_set_hwirq_and_chip(domain, virq, pin,
+ &qcom_mpm_chip, priv);
+ if (ret)
+ return ret;
+
+ map = get_mpm_gic_map(priv, pin);
+ if (map == NULL)
+ return irq_domain_disconnect_hierarchy(domain->parent, virq);
+
+ if (type & IRQ_TYPE_EDGE_BOTH)
+ type = IRQ_TYPE_EDGE_RISING;
+
+ if (type & IRQ_TYPE_LEVEL_MASK)
+ type = IRQ_TYPE_LEVEL_HIGH;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 3;
+ parent_fwspec.param[0] = 0;
+ parent_fwspec.param[1] = map->hwirq;
+ parent_fwspec.param[2] = type;
+
+ return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+ &parent_fwspec);
+}
+
+static const struct irq_domain_ops qcom_mpm_ops = {
+ .alloc = qcom_mpm_alloc,
+ .free = irq_domain_free_irqs_common,
+ .translate = irq_domain_translate_twocell,
+};
+
+/* Triggered by RPM when system resumes from deep sleep */
+static irqreturn_t qcom_mpm_handler(int irq, void *dev_id)
+{
+ struct qcom_mpm_priv *priv = dev_id;
+ unsigned long enable, pending;
+ irqreturn_t ret = IRQ_NONE;
+ unsigned long flags;
+ int i, j;
+
+ for (i = 0; i < priv->reg_stride; i++) {
+ raw_spin_lock_irqsave(&priv->lock, flags);
+ enable = qcom_mpm_read(priv, MPM_REG_ENABLE, i);
+ pending = qcom_mpm_read(priv, MPM_REG_STATUS, i);
+ pending &= enable;
+ raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+ for_each_set_bit(j, &pending, 32) {
+ unsigned int pin = 32 * i + j;
+ struct irq_desc *desc = irq_resolve_mapping(priv->domain, pin);
+ struct irq_data *d = &desc->irq_data;
+
+ if (!irqd_is_level_type(d))
+ irq_set_irqchip_state(d->irq,
+ IRQCHIP_STATE_PENDING, true);
+ ret = IRQ_HANDLED;
+ }
+ }
+
+ return ret;
+}
+
+static int mpm_pd_power_off(struct generic_pm_domain *genpd)
+{
+ struct qcom_mpm_priv *priv = container_of(genpd, struct qcom_mpm_priv,
+ genpd);
+ int i, ret;
+
+ for (i = 0; i < priv->reg_stride; i++)
+ qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+
+ /* Notify RPM to write vMPM into HW */
+ ret = mbox_send_message(priv->mbox_chan, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static bool gic_hwirq_is_mapped(struct mpm_gic_map *maps, int cnt, u32 hwirq)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++)
+ if (maps[i].hwirq == hwirq)
+ return true;
+
+ return false;
+}
+
+static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
+{
+ struct platform_device *pdev = of_find_device_by_node(np);
+ struct device *dev = &pdev->dev;
+ struct irq_domain *parent_domain;
+ struct generic_pm_domain *genpd;
+ struct qcom_mpm_priv *priv;
+ unsigned int pin_cnt;
+ int i, irq;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ ret = of_property_read_u32(np, "qcom,mpm-pin-count", &pin_cnt);
+ if (ret) {
+ dev_err(dev, "failed to read qcom,mpm-pin-count: %d\n", ret);
+ return ret;
+ }
+
+ priv->reg_stride = DIV_ROUND_UP(pin_cnt, 32);
+
+ ret = of_property_count_u32_elems(np, "qcom,mpm-pin-map");
+ if (ret < 0) {
+ dev_err(dev, "failed to read qcom,mpm-pin-map: %d\n", ret);
+ return ret;
+ }
+
+ if (ret % 2) {
+ dev_err(dev, "invalid qcom,mpm-pin-map\n");
+ return -EINVAL;
+ }
+
+ priv->map_cnt = ret / 2;
+ priv->maps = devm_kcalloc(dev, priv->map_cnt, sizeof(*priv->maps),
+ GFP_KERNEL);
+ if (!priv->maps)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->map_cnt; i++) {
+ u32 pin, hwirq;
+
+ of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2, &pin);
+ of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2 + 1, &hwirq);
+
+ if (gic_hwirq_is_mapped(priv->maps, i, hwirq)) {
+ dev_warn(dev, "failed to map pin %d as GIC hwirq %d is already mapped\n",
+ pin, hwirq);
+ continue;
+ }
+
+ priv->maps[i].pin = pin;
+ priv->maps[i].hwirq = hwirq;
+ }
+
+ raw_spin_lock_init(&priv->lock);
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (!priv->base)
+ return PTR_ERR(priv->base);
+
+ for (i = 0; i < priv->reg_stride; i++) {
+ qcom_mpm_write(priv, MPM_REG_ENABLE, i, 0);
+ qcom_mpm_write(priv, MPM_REG_FALLING_EDGE, i, 0);
+ qcom_mpm_write(priv, MPM_REG_RISING_EDGE, i, 0);
+ qcom_mpm_write(priv, MPM_REG_POLARITY, i, 0);
+ qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ genpd = &priv->genpd;
+ genpd->flags = GENPD_FLAG_IRQ_SAFE;
+ genpd->power_off = mpm_pd_power_off;
+
+ genpd->name = devm_kasprintf(dev, GFP_KERNEL, "%s", dev_name(dev));
+ if (!genpd->name)
+ return -ENOMEM;
+
+ ret = pm_genpd_init(genpd, NULL, false);
+ if (ret) {
+ dev_err(dev, "failed to init genpd: %d\n", ret);
+ return ret;
+ }
+
+ ret = of_genpd_add_provider_simple(np, genpd);
+ if (ret) {
+ dev_err(dev, "failed to add genpd provider: %d\n", ret);
+ goto remove_genpd;
+ }
+
+ priv->mbox_client.dev = dev;
+ priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0);
+ if (IS_ERR(priv->mbox_chan)) {
+ ret = PTR_ERR(priv->mbox_chan);
+ dev_err(dev, "failed to acquire IPC channel: %d\n", ret);
+ return ret;
+ }
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ dev_err(dev, "failed to find MPM parent domain\n");
+ ret = -ENXIO;
+ goto free_mbox;
+ }
+
+ priv->domain = irq_domain_create_hierarchy(parent_domain,
+ IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP, pin_cnt,
+ of_node_to_fwnode(np), &qcom_mpm_ops, priv);
+ if (!priv->domain) {
+ dev_err(dev, "failed to create MPM domain\n");
+ ret = -ENOMEM;
+ goto free_mbox;
+ }
+
+ irq_domain_update_bus_token(priv->domain, DOMAIN_BUS_WAKEUP);
+
+ ret = devm_request_irq(dev, irq, qcom_mpm_handler, IRQF_NO_SUSPEND,
+ "qcom_mpm", priv);
+ if (ret) {
+ dev_err(dev, "failed to request irq: %d\n", ret);
+ goto remove_domain;
+ }
+
+ return 0;
+
+remove_domain:
+ irq_domain_remove(priv->domain);
+free_mbox:
+ mbox_free_channel(priv->mbox_chan);
+remove_genpd:
+ pm_genpd_remove(genpd);
+ return ret;
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_mpm)
+IRQCHIP_MATCH("qcom,mpm", qcom_mpm_init)
+IRQCHIP_PLATFORM_DRIVER_END(qcom_mpm)
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. MSM Power Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index fd9f275592d2..50a56820c99b 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -62,7 +62,7 @@ static struct irq_chip realtek_ictl_irq = {
static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
- irq_set_chip_and_handler(hw, &realtek_ictl_irq, handle_level_irq);
+ irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq);
return 0;
}
@@ -76,16 +76,20 @@ static void realtek_irq_dispatch(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_domain *domain;
- unsigned int pending;
+ unsigned long pending;
+ unsigned int soc_int;
chained_irq_enter(chip, desc);
pending = readl(REG(RTL_ICTL_GIMR)) & readl(REG(RTL_ICTL_GISR));
+
if (unlikely(!pending)) {
spurious_interrupt();
goto out;
}
+
domain = irq_desc_get_handler_data(desc);
- generic_handle_domain_irq(domain, __ffs(pending));
+ for_each_set_bit(soc_int, &pending, 32)
+ generic_handle_domain_irq(domain, soc_int);
out:
chained_irq_exit(chip, desc);
@@ -95,7 +99,8 @@ out:
* SoC interrupts are cascaded to MIPS CPU interrupts according to the
* interrupt-map in the device tree. Each SoC interrupt gets 4 bits for
* the CPU interrupt in an Interrupt Routing Register. Max 32 SoC interrupts
- * thus go into 4 IRRs.
+ * thus go into 4 IRRs. A routing value of '0' means the interrupt is left
+ * disconnected. Routing values {1..15} connect to output lines {0..14}.
*/
static int __init map_interrupts(struct device_node *node, struct irq_domain *domain)
{
@@ -134,7 +139,7 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do
of_node_put(cpu_ictl);
cpu_int = be32_to_cpup(imap + 2);
- if (cpu_int > 7)
+ if (cpu_int > 7 || cpu_int < 2)
return -EINVAL;
if (!(mips_irqs_set & BIT(cpu_int))) {
@@ -143,7 +148,8 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do
mips_irqs_set |= BIT(cpu_int);
}
- regs[(soc_int * 4) / 32] |= cpu_int << (soc_int * 4) % 32;
+ /* Use routing values (1..6) for CPU interrupts (2..7) */
+ regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32;
imap += 3;
}
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 37f9a4499fdb..e83756aca14e 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -508,7 +508,6 @@ static int intc_irqpin_probe(struct platform_device *pdev)
irq_chip = &p->irq_chip;
irq_chip->name = "intc-irqpin";
- irq_chip->parent_device = dev;
irq_chip->irq_mask = disable_fn;
irq_chip->irq_unmask = enable_fn;
irq_chip->irq_set_type = intc_irqpin_irq_set_type;
@@ -523,6 +522,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
goto err0;
}
+ irq_domain_set_pm_device(p->irq_domain, dev);
+
if (p->shared_irqs) {
/* request one shared interrupt */
if (devm_request_irq(dev, p->irq[0].requested_irq,
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 909325f88239..1ee5e9941f67 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -188,13 +188,14 @@ static int irqc_probe(struct platform_device *pdev)
p->gc->reg_base = p->cpu_int_base;
p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
- p->gc->chip_types[0].chip.parent_device = dev;
p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type;
p->gc->chip_types[0].chip.irq_set_wake = irqc_irq_set_wake;
p->gc->chip_types[0].chip.flags = IRQCHIP_MASK_ON_SUSPEND;
+ irq_domain_set_pm_device(p->irq_domain, dev);
+
/* request interrupts one by one */
for (k = 0; k < p->number_of_irqs; k++) {
if (devm_request_irq(dev, p->irq[k].requested_irq,
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 259065d271ef..bb87e4c3b88e 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -44,8 +44,8 @@
* Each hart context has a vector of interrupt enable bits associated with it.
* There's one bit for each interrupt source.
*/
-#define ENABLE_BASE 0x2000
-#define ENABLE_PER_HART 0x80
+#define CONTEXT_ENABLE_BASE 0x2000
+#define CONTEXT_ENABLE_SIZE 0x80
/*
* Each hart context has a set of control registers associated with it. Right
@@ -53,7 +53,7 @@
* take an interrupt, and a register to claim interrupts.
*/
#define CONTEXT_BASE 0x200000
-#define CONTEXT_PER_HART 0x1000
+#define CONTEXT_SIZE 0x1000
#define CONTEXT_THRESHOLD 0x00
#define CONTEXT_CLAIM 0x04
@@ -81,17 +81,21 @@ static int plic_parent_irq __ro_after_init;
static bool plic_cpuhp_setup_done __ro_after_init;
static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
-static inline void plic_toggle(struct plic_handler *handler,
- int hwirq, int enable)
+static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable)
{
- u32 __iomem *reg = handler->enable_base + (hwirq / 32) * sizeof(u32);
+ u32 __iomem *reg = enable_base + (hwirq / 32) * sizeof(u32);
u32 hwirq_mask = 1 << (hwirq % 32);
- raw_spin_lock(&handler->enable_lock);
if (enable)
writel(readl(reg) | hwirq_mask, reg);
else
writel(readl(reg) & ~hwirq_mask, reg);
+}
+
+static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
+{
+ raw_spin_lock(&handler->enable_lock);
+ __plic_toggle(handler->enable_base, hwirq, enable);
raw_spin_unlock(&handler->enable_lock);
}
@@ -324,8 +328,18 @@ static int __init plic_init(struct device_node *node,
* Skip contexts other than external interrupts for our
* privilege level.
*/
- if (parent.args[0] != RV_IRQ_EXT)
+ if (parent.args[0] != RV_IRQ_EXT) {
+ /* Disable S-mode enable bits if running in M-mode. */
+ if (IS_ENABLED(CONFIG_RISCV_M_MODE)) {
+ void __iomem *enable_base = priv->regs +
+ CONTEXT_ENABLE_BASE +
+ i * CONTEXT_ENABLE_SIZE;
+
+ for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
+ __plic_toggle(enable_base, hwirq, 0);
+ }
continue;
+ }
hartid = riscv_of_parent_hartid(parent.np);
if (hartid < 0) {
@@ -361,11 +375,11 @@ static int __init plic_init(struct device_node *node,
cpumask_set_cpu(cpu, &priv->lmask);
handler->present = true;
- handler->hart_base =
- priv->regs + CONTEXT_BASE + i * CONTEXT_PER_HART;
+ handler->hart_base = priv->regs + CONTEXT_BASE +
+ i * CONTEXT_SIZE;
raw_spin_lock_init(&handler->enable_lock);
- handler->enable_base =
- priv->regs + ENABLE_BASE + i * ENABLE_PER_HART;
+ handler->enable_base = priv->regs + CONTEXT_ENABLE_BASE +
+ i * CONTEXT_ENABLE_SIZE;
handler->priv = priv;
done:
for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
@@ -398,3 +412,4 @@ out_free_priv:
IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
+IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b7cb2da71888..9d18f47040eb 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -214,6 +214,48 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
{ .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip },
};
+static const struct stm32_desc_irq stm32mp13_desc_irq[] = {
+ { .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip },
+ { .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip },
+ { .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip },
+ { .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip },
+ { .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip },
+ { .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip },
+ { .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip },
+ { .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip },
+ { .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip },
+ { .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip },
+ { .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip },
+ { .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip },
+ { .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip },
+ { .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip },
+ { .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip },
+ { .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip },
+ { .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip },
+ { .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct },
+ { .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct },
+};
+
static const struct stm32_exti_drv_data stm32mp1_drv_data = {
.exti_banks = stm32mp1_exti_banks,
.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
@@ -221,6 +263,13 @@ static const struct stm32_exti_drv_data stm32mp1_drv_data = {
.irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
};
+static const struct stm32_exti_drv_data stm32mp13_drv_data = {
+ .exti_banks = stm32mp1_exti_banks,
+ .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
+ .desc_irqs = stm32mp13_desc_irq,
+ .irq_nr = ARRAY_SIZE(stm32mp13_desc_irq),
+};
+
static const struct
stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data,
irq_hw_number_t hwirq)
@@ -922,6 +971,7 @@ static int stm32_exti_probe(struct platform_device *pdev)
/* platform driver only for MP1 */
static const struct of_device_id stm32_exti_ids[] = {
{ .compatible = "st,stm32mp1-exti", .data = &stm32mp1_drv_data},
+ { .compatible = "st,stm32mp13-exti", .data = &stm32mp13_drv_data},
{},
};
MODULE_DEVICE_TABLE(of, stm32_exti_ids);
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index f032db23b30f..b2d61d4f6fe6 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -19,14 +19,15 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/platform_device.h>
+#include <linux/seq_file.h>
#define IRQ_MASK 0x4
#define IRQ_STATUS 0x8
struct ts4800_irq_data {
void __iomem *base;
+ struct platform_device *pdev;
struct irq_domain *domain;
- struct irq_chip irq_chip;
};
static void ts4800_irq_mask(struct irq_data *d)
@@ -47,12 +48,25 @@ static void ts4800_irq_unmask(struct irq_data *d)
writew(reg & ~mask, data->base + IRQ_MASK);
}
+static void ts4800_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+
+ seq_printf(p, "%s", dev_name(&data->pdev->dev));
+}
+
+static const struct irq_chip ts4800_chip = {
+ .irq_mask = ts4800_irq_mask,
+ .irq_unmask = ts4800_irq_unmask,
+ .irq_print_chip = ts4800_irq_print_chip,
+};
+
static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
struct ts4800_irq_data *data = d->host_data;
- irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq);
+ irq_set_chip_and_handler(irq, &ts4800_chip, handle_simple_irq);
irq_set_chip_data(irq, data);
irq_set_noprobe(irq);
@@ -92,13 +106,13 @@ static int ts4800_ic_probe(struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct ts4800_irq_data *data;
- struct irq_chip *irq_chip;
int parent_irq;
data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
+ data->pdev = pdev;
data->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(data->base))
return PTR_ERR(data->base);
@@ -111,11 +125,6 @@ static int ts4800_ic_probe(struct platform_device *pdev)
return -EINVAL;
}
- irq_chip = &data->irq_chip;
- irq_chip->name = dev_name(&pdev->dev);
- irq_chip->irq_mask = ts4800_irq_mask;
- irq_chip->irq_unmask = ts4800_irq_unmask;
-
data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
if (!data->domain) {
dev_err(&pdev->dev, "cannot add IRQ domain\n");
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index f2757b6aecc8..ba543ed9c154 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -7,12 +7,12 @@
#include <linux/io.h>
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
-#include <linux/irqchip/versatile-fpga.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/seq_file.h>
#include <asm/exception.h>
#include <asm/mach/irq.h>
@@ -34,14 +34,12 @@
/**
* struct fpga_irq_data - irq data container for the FPGA IRQ controller
* @base: memory offset in virtual memory
- * @chip: chip container for this instance
* @domain: IRQ domain for this instance
* @valid: mask for valid IRQs on this controller
* @used_irqs: number of active IRQs on this controller
*/
struct fpga_irq_data {
void __iomem *base;
- struct irq_chip chip;
u32 valid;
struct irq_domain *domain;
u8 used_irqs;
@@ -67,6 +65,20 @@ static void fpga_irq_unmask(struct irq_data *d)
writel(mask, f->base + IRQ_ENABLE_SET);
}
+static void fpga_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+ struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
+
+ seq_printf(p, irq_domain_get_of_node(f->domain)->name);
+}
+
+static const struct irq_chip fpga_chip = {
+ .irq_ack = fpga_irq_mask,
+ .irq_mask = fpga_irq_mask,
+ .irq_unmask = fpga_irq_unmask,
+ .irq_print_chip = fpga_irq_print_chip,
+};
+
static void fpga_irq_handle(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -116,7 +128,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
* Keep iterating over all registered FPGA IRQ controllers until there are
* no pending interrupts.
*/
-asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
+static asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
{
int i, handled;
@@ -135,8 +147,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
if (!(f->valid & BIT(hwirq)))
return -EPERM;
irq_set_chip_data(irq, f);
- irq_set_chip_and_handler(irq, &f->chip,
- handle_level_irq);
+ irq_set_chip_and_handler(irq, &fpga_chip, handle_level_irq);
irq_set_probe(irq);
return 0;
}
@@ -146,8 +157,8 @@ static const struct irq_domain_ops fpga_irqdomain_ops = {
.xlate = irq_domain_xlate_onetwocell,
};
-void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
- int parent_irq, u32 valid, struct device_node *node)
+static void __init fpga_irq_init(void __iomem *base, int parent_irq,
+ u32 valid, struct device_node *node)
{
struct fpga_irq_data *f;
int i;
@@ -158,10 +169,6 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
}
f = &fpga_irq_devices[fpga_irq_id];
f->base = base;
- f->chip.name = name;
- f->chip.irq_ack = fpga_irq_mask;
- f->chip.irq_mask = fpga_irq_mask;
- f->chip.irq_unmask = fpga_irq_unmask;
f->valid = valid;
if (parent_irq != -1) {
@@ -169,20 +176,19 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
f);
}
- /* This will also allocate irq descriptors */
- f->domain = irq_domain_add_simple(node, fls(valid), irq_start,
+ f->domain = irq_domain_add_linear(node, fls(valid),
&fpga_irqdomain_ops, f);
/* This will allocate all valid descriptors in the linear case */
for (i = 0; i < fls(valid); i++)
if (valid & BIT(i)) {
- if (!irq_start)
- irq_create_mapping(f->domain, i);
+ /* Is this still required? */
+ irq_create_mapping(f->domain, i);
f->used_irqs++;
}
pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs",
- fpga_irq_id, name, base, f->used_irqs);
+ fpga_irq_id, node->name, base, f->used_irqs);
if (parent_irq != -1)
pr_cont(", parent IRQ: %d\n", parent_irq);
else
@@ -192,8 +198,8 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
}
#ifdef CONFIG_OF
-int __init fpga_irq_of_init(struct device_node *node,
- struct device_node *parent)
+static int __init fpga_irq_of_init(struct device_node *node,
+ struct device_node *parent)
{
void __iomem *base;
u32 clear_mask;
@@ -222,7 +228,7 @@ int __init fpga_irq_of_init(struct device_node *node,
parent_irq = -1;
}
- fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
+ fpga_irq_init(base, parent_irq, valid_mask, node);
/*
* On Versatile AB/PB, some secondary interrupts have a direct
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 356a59755d63..238d3d344949 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -32,6 +32,8 @@
#define MER_ME (1<<0)
#define MER_HIE (1<<1)
+#define SPURIOUS_IRQ (-1U)
+
static DEFINE_STATIC_KEY_FALSE(xintc_is_be);
struct xintc_irq_chip {
@@ -110,20 +112,6 @@ static struct irq_chip intc_dev = {
.irq_mask_ack = intc_mask_ack,
};
-unsigned int xintc_get_irq(void)
-{
- unsigned int irq = -1;
- u32 hwirq;
-
- hwirq = xintc_read(primary_intc, IVR);
- if (hwirq != -1U)
- irq = irq_find_mapping(primary_intc->root_domain, hwirq);
-
- pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq);
-
- return irq;
-}
-
static int xintc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
struct xintc_irq_chip *irqc = d->host_data;
@@ -164,6 +152,19 @@ static void xil_intc_irq_handler(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
+static void xil_intc_handle_irq(struct pt_regs *regs)
+{
+ u32 hwirq;
+
+ do {
+ hwirq = xintc_read(primary_intc, IVR);
+ if (unlikely(hwirq == SPURIOUS_IRQ))
+ break;
+
+ generic_handle_domain_irq(primary_intc->root_domain, hwirq);
+ } while (true);
+}
+
static int __init xilinx_intc_of_init(struct device_node *intc,
struct device_node *parent)
{
@@ -233,6 +234,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
} else {
primary_intc = irqc;
irq_set_default_host(primary_intc->root_domain);
+ set_handle_irq(xil_intc_handle_irq);
}
return 0;
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 173e6520e06e..d96916cf6a41 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -21,23 +21,19 @@
#include <linux/slab.h>
#include <linux/types.h>
-#define PDC_MAX_IRQS 168
#define PDC_MAX_GPIO_IRQS 256
-#define CLEAR_INTR(reg, intr) (reg & ~(1 << intr))
-#define ENABLE_INTR(reg, intr) (reg | (1 << intr))
-
#define IRQ_ENABLE_BANK 0x10
#define IRQ_i_CFG 0x110
-#define PDC_NO_PARENT_IRQ ~0UL
-
struct pdc_pin_region {
u32 pin_base;
u32 parent_base;
u32 cnt;
};
+#define pin_to_hwirq(r, p) ((r)->parent_base + (p) - (r)->pin_base)
+
static DEFINE_RAW_SPINLOCK(pdc_lock);
static void __iomem *pdc_base;
static struct pdc_pin_region *pdc_region;
@@ -56,17 +52,18 @@ static u32 pdc_reg_read(int reg, u32 i)
static void pdc_enable_intr(struct irq_data *d, bool on)
{
int pin_out = d->hwirq;
+ unsigned long enable;
+ unsigned long flags;
u32 index, mask;
- u32 enable;
index = pin_out / 32;
mask = pin_out % 32;
- raw_spin_lock(&pdc_lock);
+ raw_spin_lock_irqsave(&pdc_lock, flags);
enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
- enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
+ __assign_bit(mask, &enable, on);
pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
- raw_spin_unlock(&pdc_lock);
+ raw_spin_unlock_irqrestore(&pdc_lock, flags);
}
static void qcom_pdc_gic_disable(struct irq_data *d)
@@ -186,34 +183,17 @@ static struct irq_chip qcom_pdc_gic_chip = {
.irq_set_affinity = irq_chip_set_affinity_parent,
};
-static irq_hw_number_t get_parent_hwirq(int pin)
+static struct pdc_pin_region *get_pin_region(int pin)
{
int i;
- struct pdc_pin_region *region;
for (i = 0; i < pdc_region_cnt; i++) {
- region = &pdc_region[i];
- if (pin >= region->pin_base &&
- pin < region->pin_base + region->cnt)
- return (region->parent_base + pin - region->pin_base);
+ if (pin >= pdc_region[i].pin_base &&
+ pin < pdc_region[i].pin_base + pdc_region[i].cnt)
+ return &pdc_region[i];
}
- return PDC_NO_PARENT_IRQ;
-}
-
-static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
- unsigned long *hwirq, unsigned int *type)
-{
- if (is_of_node(fwspec->fwnode)) {
- if (fwspec->param_count != 2)
- return -EINVAL;
-
- *hwirq = fwspec->param[0];
- *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
- return 0;
- }
-
- return -EINVAL;
+ return NULL;
}
static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
@@ -221,55 +201,12 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
{
struct irq_fwspec *fwspec = data;
struct irq_fwspec parent_fwspec;
- irq_hw_number_t hwirq, parent_hwirq;
- unsigned int type;
- int ret;
-
- ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
- if (ret)
- return ret;
-
- ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
- &qcom_pdc_gic_chip, NULL);
- if (ret)
- return ret;
-
- parent_hwirq = get_parent_hwirq(hwirq);
- if (parent_hwirq == PDC_NO_PARENT_IRQ)
- return irq_domain_disconnect_hierarchy(domain->parent, virq);
-
- if (type & IRQ_TYPE_EDGE_BOTH)
- type = IRQ_TYPE_EDGE_RISING;
-
- if (type & IRQ_TYPE_LEVEL_MASK)
- type = IRQ_TYPE_LEVEL_HIGH;
-
- parent_fwspec.fwnode = domain->parent->fwnode;
- parent_fwspec.param_count = 3;
- parent_fwspec.param[0] = 0;
- parent_fwspec.param[1] = parent_hwirq;
- parent_fwspec.param[2] = type;
-
- return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
- &parent_fwspec);
-}
-
-static const struct irq_domain_ops qcom_pdc_ops = {
- .translate = qcom_pdc_translate,
- .alloc = qcom_pdc_alloc,
- .free = irq_domain_free_irqs_common,
-};
-
-static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs, void *data)
-{
- struct irq_fwspec *fwspec = data;
- struct irq_fwspec parent_fwspec;
- irq_hw_number_t hwirq, parent_hwirq;
+ struct pdc_pin_region *region;
+ irq_hw_number_t hwirq;
unsigned int type;
int ret;
- ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
+ ret = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type);
if (ret)
return ret;
@@ -281,8 +218,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
if (ret)
return ret;
- parent_hwirq = get_parent_hwirq(hwirq);
- if (parent_hwirq == PDC_NO_PARENT_IRQ)
+ region = get_pin_region(hwirq);
+ if (!region)
return irq_domain_disconnect_hierarchy(domain->parent, virq);
if (type & IRQ_TYPE_EDGE_BOTH)
@@ -294,23 +231,16 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
parent_fwspec.fwnode = domain->parent->fwnode;
parent_fwspec.param_count = 3;
parent_fwspec.param[0] = 0;
- parent_fwspec.param[1] = parent_hwirq;
+ parent_fwspec.param[1] = pin_to_hwirq(region, hwirq);
parent_fwspec.param[2] = type;
return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
&parent_fwspec);
}
-static int qcom_pdc_gpio_domain_select(struct irq_domain *d,
- struct irq_fwspec *fwspec,
- enum irq_domain_bus_token bus_token)
-{
- return bus_token == DOMAIN_BUS_WAKEUP;
-}
-
-static const struct irq_domain_ops qcom_pdc_gpio_ops = {
- .select = qcom_pdc_gpio_domain_select,
- .alloc = qcom_pdc_gpio_alloc,
+static const struct irq_domain_ops qcom_pdc_ops = {
+ .translate = irq_domain_translate_twocell,
+ .alloc = qcom_pdc_alloc,
.free = irq_domain_free_irqs_common,
};
@@ -361,7 +291,7 @@ static int pdc_setup_pin_mapping(struct device_node *np)
static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
{
- struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain;
+ struct irq_domain *parent_domain, *pdc_domain;
int ret;
pdc_base = of_iomap(node, 0);
@@ -383,32 +313,21 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
goto fail;
}
- pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
- of_fwnode_handle(node),
- &qcom_pdc_ops, NULL);
- if (!pdc_domain) {
- pr_err("%pOF: GIC domain add failed\n", node);
- ret = -ENOMEM;
- goto fail;
- }
-
- pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
+ pdc_domain = irq_domain_create_hierarchy(parent_domain,
IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
PDC_MAX_GPIO_IRQS,
of_fwnode_handle(node),
- &qcom_pdc_gpio_ops, NULL);
- if (!pdc_gpio_domain) {
- pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
+ &qcom_pdc_ops, NULL);
+ if (!pdc_domain) {
+ pr_err("%pOF: PDC domain add failed\n", node);
ret = -ENOMEM;
- goto remove;
+ goto fail;
}
- irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
+ irq_domain_update_bus_token(pdc_domain, DOMAIN_BUS_WAKEUP);
return 0;
-remove:
- irq_domain_remove(pdc_domain);
fail:
kfree(pdc_region);
iounmap(pdc_base);
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index bd087cca1c1d..af17459c1a5c 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2005,7 +2005,11 @@ setup_hw(struct hfc_pci *hc)
}
/* Allocate memory for FIFOS */
/* the memory needs to be on a 32k boundary within the first 4G */
- dma_set_mask(&hc->pdev->dev, 0xFFFF8000);
+ if (dma_set_mask(&hc->pdev->dev, 0xFFFF8000)) {
+ printk(KERN_WARNING
+ "HFC-PCI: No usable DMA configuration!\n");
+ return -EIO;
+ }
buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle,
GFP_KERNEL);
/* We silently assume the address is okay if nonzero */
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
index e11ca6bbc7f4..c3b2c99b5cd5 100644
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -192,7 +192,7 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
{
int found = 0;
- char *dup, *tok, *name, *args;
+ char *dup, *next, *tok, *name, *args;
struct dsp_element_entry *entry, *n;
struct dsp_pipeline_entry *pipeline_entry;
struct mISDN_dsp_element *elem;
@@ -203,10 +203,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
if (!list_empty(&pipeline->list))
_dsp_pipeline_destroy(pipeline);
- dup = kstrdup(cfg, GFP_ATOMIC);
+ dup = next = kstrdup(cfg, GFP_ATOMIC);
if (!dup)
return 0;
- while ((tok = strsep(&dup, "|"))) {
+ while ((tok = strsep(&next, "|"))) {
if (!strlen(tok))
continue;
name = strsep(&tok, "(");
diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c
index d03cfd3c0bfb..c87686bd7c18 100644
--- a/drivers/leds/leds-cr0014114.c
+++ b/drivers/leds/leds-cr0014114.c
@@ -266,14 +266,12 @@ static int cr0014114_probe(struct spi_device *spi)
return 0;
}
-static int cr0014114_remove(struct spi_device *spi)
+static void cr0014114_remove(struct spi_device *spi)
{
struct cr0014114 *priv = spi_get_drvdata(spi);
cancel_delayed_work_sync(&priv->work);
mutex_destroy(&priv->lock);
-
- return 0;
}
static const struct of_device_id cr0014114_dt_ids[] = {
diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c
index 20dc9b9d7dea..cf5fb1195f87 100644
--- a/drivers/leds/leds-dac124s085.c
+++ b/drivers/leds/leds-dac124s085.c
@@ -85,15 +85,13 @@ eledcr:
return ret;
}
-static int dac124s085_remove(struct spi_device *spi)
+static void dac124s085_remove(struct spi_device *spi)
{
struct dac124s085 *dac = spi_get_drvdata(spi);
int i;
for (i = 0; i < ARRAY_SIZE(dac->leds); i++)
led_classdev_unregister(&dac->leds[i].ldev);
-
- return 0;
}
static struct spi_driver dac124s085_driver = {
diff --git a/drivers/leds/leds-el15203000.c b/drivers/leds/leds-el15203000.c
index f9eb59a25570..7e7b617bcd56 100644
--- a/drivers/leds/leds-el15203000.c
+++ b/drivers/leds/leds-el15203000.c
@@ -315,13 +315,11 @@ static int el15203000_probe(struct spi_device *spi)
return el15203000_probe_dt(priv);
}
-static int el15203000_remove(struct spi_device *spi)
+static void el15203000_remove(struct spi_device *spi)
{
struct el15203000 *priv = spi_get_drvdata(spi);
mutex_destroy(&priv->lock);
-
- return 0;
}
static const struct of_device_id el15203000_dt_ids[] = {
diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c
index f1964c96fb15..2bc5c99daf51 100644
--- a/drivers/leds/leds-spi-byte.c
+++ b/drivers/leds/leds-spi-byte.c
@@ -130,13 +130,11 @@ static int spi_byte_probe(struct spi_device *spi)
return 0;
}
-static int spi_byte_remove(struct spi_device *spi)
+static void spi_byte_remove(struct spi_device *spi)
{
struct spi_byte_led *led = spi_get_drvdata(spi);
mutex_destroy(&led->mutex);
-
- return 0;
}
static struct spi_driver spi_byte_driver = {
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b5ea378e66cb..998a5cfdbc4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -204,6 +204,7 @@ config BLK_DEV_DM
tristate "Device mapper support"
select BLOCK_HOLDER_DEPRECATED if SYSFS
select BLK_DEV_DM_BUILTIN
+ select BLK_MQ_STACKING
depends on DAX || DAX=n
help
Device-mapper is a low level volume manager. It works by allowing
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 88c573eeb598..ad9f16689419 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c)
}
}
+ /*
+ * Must wait for all threads to stop.
+ */
wait_event_interruptible(check_state->wait,
- atomic_read(&check_state->started) == 0 ||
- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+ atomic_read(&check_state->started) == 0);
for (i = 0; i < check_state->total_threads; i++) {
if (check_state->infos[i].result) {
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9c6f9ec55b72..020712c5203f 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -26,7 +26,8 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
struct bio *bio = &b->bio;
- bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
+ bio_init(bio, NULL, bio->bi_inline_vecs,
+ meta_bucket_pages(&c->cache->sb), 0);
return bio;
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 61bd79babf7a..7c2ca52ca3e4 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -53,14 +53,12 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
reread: left = ca->sb.bucket_size - offset;
len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
- bio_reset(bio);
+ bio_reset(bio, ca->bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = bucket + offset;
- bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = len << 9;
bio->bi_end_io = journal_read_endio;
bio->bi_private = &cl;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
closure_bio_submit(ca->set, bio, &cl);
@@ -611,11 +609,9 @@ static void do_journal_discard(struct cache *ca)
atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
- bio_init(bio, bio->bi_inline_vecs, 1);
- bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
+ bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD);
bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
ca->sb.d[ja->discard_idx]);
- bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = bucket_bytes(ca);
bio->bi_end_io = journal_discard_endio;
@@ -773,16 +769,14 @@ static void journal_write_unlocked(struct closure *cl)
atomic_long_add(sectors, &ca->meta_sectors_written);
- bio_reset(bio);
+ bio_reset(bio, ca->bdev, REQ_OP_WRITE |
+ REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA);
+ bch_bio_map(bio, w->data);
bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
- bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio;
bio->bi_private = w;
- bio_set_op_attrs(bio, REQ_OP_WRITE,
- REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
- bch_bio_map(bio, w->data);
trace_bcache_journal_write(bio, w->data->keys);
bio_list_add(&list, bio);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index b9c3d27ec093..99499d1f6e66 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -79,8 +79,8 @@ static void moving_init(struct moving_io *io)
{
struct bio *bio = &io->bio.bio;
- bio_init(bio, bio->bi_inline_vecs,
- DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS));
+ bio_init(bio, NULL, bio->bi_inline_vecs,
+ DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
bio_get(bio);
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index d15aae6c51c1..fdd0194f84dd 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k)
uint64_t csum = 0;
bio_for_each_segment(bv, bio, iter) {
- void *d = kmap(bv.bv_page) + bv.bv_offset;
+ void *d = bvec_kmap_local(&bv);
csum = crc64_be(csum, d, bv.bv_len);
- kunmap(bv.bv_page);
+ kunmap_local(d);
}
k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
@@ -685,8 +685,7 @@ static void do_bio_hook(struct search *s,
{
struct bio *bio = &s->bio.bio;
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
+ bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO);
/*
* bi_end_io can be set separately somewhere else, e.g. the
* variants in,
@@ -831,11 +830,11 @@ static void cached_dev_read_done(struct closure *cl)
*/
if (s->iop.bio) {
- bio_reset(s->iop.bio);
+ bio_reset(s->iop.bio, s->cache_miss->bi_bdev, REQ_OP_READ);
s->iop.bio->bi_iter.bi_sector =
s->cache_miss->bi_iter.bi_sector;
- bio_copy_dev(s->iop.bio, s->cache_miss);
s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
+ bio_clone_blkg_association(s->iop.bio, s->cache_miss);
bch_bio_map(s->iop.bio, NULL);
bio_copy_data(s->cache_miss, s->iop.bio);
@@ -913,14 +912,13 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss == bio ? MAP_DONE : -EINTR;
- cache_bio = bio_alloc_bioset(GFP_NOWAIT,
+ cache_bio = bio_alloc_bioset(miss->bi_bdev,
DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
- &dc->disk.bio_split);
+ 0, GFP_NOWAIT, &dc->disk.bio_split);
if (!cache_bio)
goto out_submit;
cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
- bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
cache_bio->bi_end_io = backing_request_endio;
@@ -1025,21 +1023,21 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
*/
struct bio *flush;
- flush = bio_alloc_bioset(GFP_NOIO, 0,
- &dc->disk.bio_split);
+ flush = bio_alloc_bioset(bio->bi_bdev, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH,
+ GFP_NOIO, &dc->disk.bio_split);
if (!flush) {
s->iop.status = BLK_STS_RESOURCE;
goto insert_data;
}
- bio_copy_dev(flush, bio);
flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, flush, cl);
}
} else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
+ s->iop.bio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &dc->disk.bio_split);
/* I/O request sent to backing device */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 140f35dc0c45..bf3de149d3c9 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -18,7 +18,6 @@
#include <linux/blkdev.h>
#include <linux/pagemap.h>
#include <linux/debugfs.h>
-#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
@@ -343,8 +342,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex);
closure_init(cl, parent);
- bio_init(bio, dc->sb_bv, 1);
- bio_set_dev(bio, dc->bdev);
+ bio_init(bio, dc->bdev, dc->sb_bv, 1, 0);
bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc;
@@ -387,8 +385,7 @@ void bcache_write_super(struct cache_set *c)
if (ca->sb.version < version)
ca->sb.version = version;
- bio_init(bio, ca->sb_bv, 1);
- bio_set_dev(bio, ca->bdev);
+ bio_init(bio, ca->bdev, ca->sb_bv, 1, 0);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
@@ -2240,7 +2237,7 @@ static int cache_alloc(struct cache *ca)
__module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype);
- bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
+ bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
/*
* when ca->sb.njournal_buckets is not zero, journal exists,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index c7560f66dca8..9ee0005874cd 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -292,8 +292,8 @@ static void dirty_init(struct keybuf_key *w)
struct dirty_io *io = w->private;
struct bio *bio = &io->bio;
- bio_init(bio, bio->bi_inline_vecs,
- DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS));
+ bio_init(bio, NULL, bio->bi_inline_vecs,
+ DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
if (!io->dc->writeback_percent)
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
@@ -585,10 +585,13 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
sectors_dirty = atomic_add_return(s,
d->stripe_sectors_dirty + stripe);
- if (sectors_dirty == d->stripe_size)
- set_bit(stripe, d->full_dirty_stripes);
- else
- clear_bit(stripe, d->full_dirty_stripes);
+ if (sectors_dirty == d->stripe_size) {
+ if (!test_bit(stripe, d->full_dirty_stripes))
+ set_bit(stripe, d->full_dirty_stripes);
+ } else {
+ if (test_bit(stripe, d->full_dirty_stripes))
+ clear_bit(stripe, d->full_dirty_stripes);
+ }
nr_sectors -= s;
stripe_offset = 0;
@@ -998,9 +1001,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
}
}
+ /*
+ * Must wait for all threads to stop.
+ */
wait_event_interruptible(state->wait,
- atomic_read(&state->started) == 0 ||
- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+ atomic_read(&state->started) == 0);
out:
kfree(state);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 447d030036d1..89fdfb49d564 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -744,21 +744,14 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
spin_unlock_irq(&cache->lock);
}
-static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
- dm_oblock_t oblock, bool bio_has_pbd)
-{
- if (bio_has_pbd)
- check_if_tick_bio_needed(cache, bio);
- remap_to_origin(cache, bio);
- if (bio_data_dir(bio) == WRITE)
- clear_discard(cache, oblock_to_dblock(cache, oblock));
-}
-
static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock)
{
// FIXME: check_if_tick_bio_needed() is called way too much through this interface
- __remap_to_origin_clear_discard(cache, bio, oblock, true);
+ check_if_tick_bio_needed(cache, bio);
+ remap_to_origin(cache, bio);
+ if (bio_data_dir(bio) == WRITE)
+ clear_discard(cache, oblock_to_dblock(cache, oblock));
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -826,16 +819,15 @@ static void issue_op(struct bio *bio, void *context)
static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
- struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
+ struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
+ GFP_NOIO, &cache->bs);
BUG_ON(!origin_bio);
bio_chain(origin_bio, bio);
- /*
- * Passing false to __remap_to_origin_clear_discard() skips
- * all code that might use per_bio_data (since clone doesn't have it)
- */
- __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
+
+ if (bio_data_dir(origin_bio) == WRITE)
+ clear_discard(cache, oblock_to_dblock(cache, oblock));
submit_bio(origin_bio);
remap_to_cache(cache, bio, cblock);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index b855fef4f38a..72d18c3fbf1f 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -11,7 +11,6 @@
#include <linux/kthread.h>
#include <linux/ktime.h>
-#include <linux/genhd.h>
#include <linux/blk-mq.h>
#include <linux/blk-crypto-profile.h>
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d4ae31558826..e2b0af4a2ee8 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -234,7 +234,7 @@ static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16)
-static void clone_init(struct dm_crypt_io *, struct bio *);
+static void crypt_endio(struct bio *clone);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
struct scatterlist *sg);
@@ -1364,11 +1364,10 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
}
if (r == -EBADMSG) {
- char b[BDEVNAME_SIZE];
sector_t s = le64_to_cpu(*sector);
- DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
- bio_devname(ctx->bio_in, b), s);
+ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+ ctx->bio_in->bi_bdev, s);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
ctx->bio_in, s, 0);
}
@@ -1672,11 +1671,10 @@ retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_lock(&cc->bio_alloc_lock);
- clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
- if (!clone)
- goto out;
-
- clone_init(io, clone);
+ clone = bio_alloc_bioset(cc->dev->bdev, nr_iovecs, io->base_bio->bi_opf,
+ GFP_NOIO, &cc->bs);
+ clone->bi_private = io;
+ clone->bi_end_io = crypt_endio;
remaining_size = size;
@@ -1702,7 +1700,7 @@ retry:
bio_put(clone);
clone = NULL;
}
-out:
+
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_unlock(&cc->bio_alloc_lock);
@@ -1829,34 +1827,25 @@ static void crypt_endio(struct bio *clone)
crypt_dec_pending(io);
}
-static void clone_init(struct dm_crypt_io *io, struct bio *clone)
-{
- struct crypt_config *cc = io->cc;
-
- clone->bi_private = io;
- clone->bi_end_io = crypt_endio;
- bio_set_dev(clone, cc->dev->bdev);
- clone->bi_opf = io->base_bio->bi_opf;
-}
-
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
/*
- * We need the original biovec array in order to decrypt
- * the whole bio data *afterwards* -- thanks to immutable
- * biovecs we don't need to worry about the block layer
- * modifying the biovec array; so leverage bio_clone_fast().
+ * We need the original biovec array in order to decrypt the whole bio
+ * data *afterwards* -- thanks to immutable biovecs we don't need to
+ * worry about the block layer modifying the biovec array; so leverage
+ * bio_alloc_clone().
*/
- clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
+ clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs);
if (!clone)
return 1;
+ clone->bi_private = io;
+ clone->bi_end_io = crypt_endio;
crypt_inc_pending(io);
- clone_init(io, clone);
clone->bi_iter.bi_sector = cc->start + io->sector;
if (dm_crypt_integrity_io_alloc(io, clone)) {
@@ -2179,11 +2168,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
error = cc->iv_gen_ops->post(cc, org_iv_of_dmreq(cc, dmreq), dmreq);
if (error == -EBADMSG) {
- char b[BDEVNAME_SIZE];
sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
- DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
- bio_devname(ctx->bio_in, b), s);
+ DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+ ctx->bio_in->bi_bdev, s);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
ctx->bio_in, s, 0);
io->error = BLK_STS_PROTECTION;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index eb4b5e52bd6f..c58a5111cb57 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1788,12 +1788,11 @@ again:
checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
if (unlikely(r)) {
if (r > 0) {
- char b[BDEVNAME_SIZE];
sector_t s;
s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
- DMERR_LIMIT("%s: Checksum failed at sector 0x%llx",
- bio_devname(bio, b), s);
+ DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+ bio->bi_bdev, s);
r = -EILSEQ;
atomic64_inc(&ic->number_of_mismatches);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2d3cda0acacb..23e038f8dc84 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -345,11 +345,10 @@ static void do_region(int op, int op_flags, unsigned region,
(PAGE_SIZE >> SECTOR_SHIFT)));
}
- bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
+ bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags,
+ GFP_NOIO, &io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
- bio_set_dev(bio, where->bdev);
bio->bi_end_io = endio;
- bio_set_op_attrs(bio, op, op_flags);
store_io_and_region_in_bio(bio, io, region);
if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 139b09b06eda..c9d036d6bb2e 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -217,18 +217,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
void *ptr;
size_t ret;
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
log_end_super : log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -275,18 +269,12 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
atomic_inc(&lc->io_blocks);
- bio = bio_alloc(GFP_KERNEL, bio_pages);
- if (!bio) {
- DMERR("Couldn't alloc inline data bio");
- goto error;
- }
-
+ bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE,
+ GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < bio_pages; i++) {
pg_datalen = min_t(int, datalen, PAGE_SIZE);
@@ -322,7 +310,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
error_bio:
bio_free_pages(bio);
bio_put(bio);
-error:
put_io_block(lc);
return -1;
}
@@ -363,17 +350,12 @@ static int log_one_block(struct log_writes_c *lc,
goto out;
atomic_inc(&lc->io_blocks);
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt),
+ REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < block->vec_cnt; i++) {
/*
@@ -385,18 +367,13 @@ static int log_one_block(struct log_writes_c *lc,
if (ret != block->vecs[i].bv_len) {
atomic_inc(&lc->io_blocks);
submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL,
- bio_max_segs(block->vec_cnt - i));
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev,
+ bio_max_segs(block->vec_cnt - i),
+ REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, block->vecs[i].bv_page,
block->vecs[i].bv_len, 0);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 579ab6183d4d..6948d5db9092 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -303,21 +303,6 @@ static void end_clone_request(struct request *clone, blk_status_t error)
dm_complete_request(tio->orig, error);
}
-static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
-{
- blk_status_t r;
-
- if (blk_queue_io_stat(clone->q))
- clone->rq_flags |= RQF_IO_STAT;
-
- clone->start_time_ns = ktime_get_ns();
- r = blk_insert_cloned_request(clone->q, clone);
- if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
- /* must complete clone in terms of original request */
- dm_complete_request(rq, r);
- return r;
-}
-
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
void *data)
{
@@ -398,13 +383,20 @@ static int map_request(struct dm_rq_target_io *tio)
/* The target has remapped the I/O so dispatch it */
trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
- ret = dm_dispatch_clone_request(clone, rq);
- if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
+ ret = blk_insert_cloned_request(clone);
+ switch (ret) {
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_RESOURCE:
+ case BLK_STS_DEV_RESOURCE:
blk_rq_unprep_clone(clone);
blk_mq_cleanup_rq(clone);
tio->ti->type->release_clone_rq(clone, &tio->info);
tio->clone = NULL;
return DM_MAPIO_REQUEUE;
+ default:
+ /* must complete clone in terms of original request */
+ dm_complete_request(rq, ret);
}
break;
case DM_MAPIO_REQUEUE:
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index dcf34c6b05ad..0d336b5ec571 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -141,11 +141,6 @@ struct dm_snapshot {
* for them to be committed.
*/
struct bio_list bios_queued_during_merge;
-
- /*
- * Flush data after merge.
- */
- struct bio flush_bio;
};
/*
@@ -1127,17 +1122,6 @@ shut:
static void error_bios(struct bio *bio);
-static int flush_data(struct dm_snapshot *s)
-{
- struct bio *flush_bio = &s->flush_bio;
-
- bio_reset(flush_bio);
- bio_set_dev(flush_bio, s->origin->bdev);
- flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
- return submit_bio_wait(flush_bio);
-}
-
static void merge_callback(int read_err, unsigned long write_err, void *context)
{
struct dm_snapshot *s = context;
@@ -1151,7 +1135,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
goto shut;
}
- if (flush_data(s) < 0) {
+ if (blkdev_issue_flush(s->origin->bdev) < 0) {
DMERR("Flush after merge failed: shutting down merge");
goto shut;
}
@@ -1340,7 +1324,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->first_merging_chunk = 0;
s->num_merging_chunks = 0;
bio_list_init(&s->bios_queued_during_merge);
- bio_init(&s->flush_bio, NULL, 0);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -1528,8 +1511,6 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
- bio_uninit(&s->flush_bio);
-
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ec119d2422d5..f4234d615aa1 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -282,8 +282,6 @@ struct pool {
struct dm_bio_prison_cell **cell_sort_array;
mempool_t mapping_pool;
-
- struct bio flush_bio;
};
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
@@ -1179,25 +1177,17 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
return;
}
- discard_parent = bio_alloc(GFP_NOIO, 1);
- if (!discard_parent) {
- DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
- dm_device_name(tc->pool->pool_md));
- queue_passdown_pt2(m);
-
- } else {
- discard_parent->bi_end_io = passdown_endio;
- discard_parent->bi_private = m;
-
- if (m->maybe_shared)
- passdown_double_checking_shared_status(m, discard_parent);
- else {
- struct discard_op op;
-
- begin_discard(&op, tc, discard_parent);
- r = issue_discard(&op, m->data_block, data_end);
- end_discard(&op, r);
- }
+ discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO);
+ discard_parent->bi_end_io = passdown_endio;
+ discard_parent->bi_private = m;
+ if (m->maybe_shared)
+ passdown_double_checking_shared_status(m, discard_parent);
+ else {
+ struct discard_op op;
+
+ begin_discard(&op, tc, discard_parent);
+ r = issue_discard(&op, m->data_block, data_end);
+ end_discard(&op, r);
}
}
@@ -2913,7 +2903,6 @@ static void __pool_destroy(struct pool *pool)
if (pool->next_mapping)
mempool_free(pool->next_mapping, &pool->mapping_pool);
mempool_exit(&pool->mapping_pool);
- bio_uninit(&pool->flush_bio);
dm_deferred_set_destroy(pool->shared_read_ds);
dm_deferred_set_destroy(pool->all_io_ds);
kfree(pool);
@@ -2994,7 +2983,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
pool->low_water_triggered = false;
pool->suspended = true;
pool->out_of_data_space = false;
- bio_init(&pool->flush_bio, NULL, 0);
pool->shared_read_ds = dm_deferred_set_create();
if (!pool->shared_read_ds) {
@@ -3201,13 +3189,8 @@ static void metadata_low_callback(void *context)
static int metadata_pre_commit_callback(void *context)
{
struct pool *pool = context;
- struct bio *flush_bio = &pool->flush_bio;
-
- bio_reset(flush_bio);
- bio_set_dev(flush_bio, pool->data_dev);
- flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
- return submit_bio_wait(flush_bio);
+ return blkdev_issue_flush(pool->data_dev);
}
static sector_t get_dev_size(struct block_device *bdev)
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 4f31591d2d25..5630b470ba42 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1821,11 +1821,11 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
max_pages = e->wc_list_contiguous;
- bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set);
+ bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE,
+ GFP_NOIO, &wc->bio_set);
wb = container_of(bio, struct writeback_struct, bio);
wb->wc = wc;
bio->bi_end_io = writecache_writeback_endio;
- bio_set_dev(bio, wc->dev->bdev);
bio->bi_iter.bi_sector = read_original_sector(wc, e);
if (max_pages <= WB_LIST_INLINE ||
unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
@@ -1852,7 +1852,8 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
wb->wc_list[wb->wc_list_n++] = f;
e = f;
}
- bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
+ if (WC_MODE_FUA(wc))
+ bio->bi_opf |= REQ_FUA;
if (writecache_has_error(wc)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index ee4626d08557..e5f1eb27ce2e 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -550,11 +550,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
if (!mblk)
return ERR_PTR(-ENOMEM);
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio) {
- dmz_free_mblock(zmd, mblk);
- return ERR_PTR(-ENOMEM);
- }
+ bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO,
+ GFP_NOIO);
spin_lock(&zmd->mblk_lock);
@@ -578,10 +575,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
/* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
@@ -725,19 +720,14 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
if (dmz_bdev_is_dying(dev))
return -EIO;
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio) {
- set_bit(DMZ_META_ERROR, &mblk->state);
- return -ENOMEM;
- }
+ bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO,
+ GFP_NOIO);
set_bit(DMZ_META_WRITING, &mblk->state);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
@@ -759,13 +749,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
if (dmz_bdev_is_dying(dev))
return -EIO;
- bio = bio_alloc(GFP_NOIO, 1);
- if (!bio)
- return -ENOMEM;
-
+ bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO,
+ GFP_NOIO);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_set_dev(bio, dev->bdev);
- bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
ret = submit_bio_wait(bio);
bio_put(bio);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 166c4e9d99c9..a3f6d3ef3817 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -125,11 +125,10 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
if (dev->flags & DMZ_BDEV_DYING)
return -EIO;
- clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
+ clone = bio_alloc_clone(dev->bdev, bio, GFP_NOIO, &dmz->bio_set);
if (!clone)
return -ENOMEM;
- bio_set_dev(clone, dev->bdev);
bioctx->dev = dev;
clone->bi_iter.bi_sector =
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c0ae8087c602..183ce0d6728f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -79,10 +79,14 @@ struct clone_info {
#define DM_IO_BIO_OFFSET \
(offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
+static inline struct dm_target_io *clone_to_tio(struct bio *clone)
+{
+ return container_of(clone, struct dm_target_io, clone);
+}
+
void *dm_per_bio_data(struct bio *bio, size_t data_size)
{
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
- if (!tio->inside_dm_io)
+ if (!clone_to_tio(bio)->inside_dm_io)
return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
return (char *)bio - DM_IO_BIO_OFFSET - data_size;
}
@@ -477,10 +481,7 @@ out:
u64 dm_start_time_ns_from_clone(struct bio *bio)
{
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
- struct dm_io *io = tio->io;
-
- return jiffies_to_nsecs(io->start_time);
+ return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time);
}
EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
@@ -489,7 +490,7 @@ static void start_io_acct(struct dm_io *io)
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
- io->start_time = bio_start_io_acct(bio);
+ bio_start_io_acct_time(bio, io->start_time);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
@@ -519,11 +520,9 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
- if (!clone)
- return NULL;
+ clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &md->io_bs);
- tio = container_of(clone, struct dm_target_io, clone);
+ tio = clone_to_tio(clone);
tio->inside_dm_io = true;
tio->io = NULL;
@@ -535,7 +534,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->md = md;
spin_lock_init(&io->endio_lock);
- start_io_acct(io);
+ io->start_time = jiffies;
return io;
}
@@ -545,8 +544,8 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
bio_put(&io->tio.clone);
}
-static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
- unsigned target_bio_nr, gfp_t gfp_mask)
+static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
+ unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
{
struct dm_target_io *tio;
@@ -554,11 +553,12 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
/* the dm_target_io embedded in ci->io is available */
tio = &ci->io->tio;
} else {
- struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
+ struct bio *clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio,
+ gfp_mask, &ci->io->md->bs);
if (!clone)
return NULL;
- tio = container_of(clone, struct dm_target_io, clone);
+ tio = clone_to_tio(clone);
tio->inside_dm_io = false;
}
@@ -566,15 +566,16 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
tio->io = ci->io;
tio->ti = ti;
tio->target_bio_nr = target_bio_nr;
+ tio->len_ptr = len;
- return tio;
+ return &tio->clone;
}
-static void free_tio(struct dm_target_io *tio)
+static void free_tio(struct bio *clone)
{
- if (tio->inside_dm_io)
+ if (clone_to_tio(clone)->inside_dm_io)
return;
- bio_put(&tio->clone);
+ bio_put(clone);
}
/*
@@ -879,7 +880,7 @@ static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
static void clone_endio(struct bio *bio)
{
blk_status_t error = bio->bi_status;
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+ struct dm_target_io *tio = clone_to_tio(bio);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
@@ -930,7 +931,7 @@ static void clone_endio(struct bio *bio)
up(&md->swap_bios_semaphore);
}
- free_tio(tio);
+ free_tio(bio);
dm_io_dec_pending(io, error);
}
@@ -1085,7 +1086,7 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
*/
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+ struct dm_target_io *tio = clone_to_tio(bio);
unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
BUG_ON(bio->bi_opf & REQ_PREFLUSH);
@@ -1115,11 +1116,11 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
mutex_unlock(&md->swap_bios_lock);
}
-static void __map_bio(struct dm_target_io *tio)
+static void __map_bio(struct bio *clone)
{
+ struct dm_target_io *tio = clone_to_tio(clone);
int r;
sector_t sector;
- struct bio *clone = &tio->clone;
struct dm_io *io = tio->io;
struct dm_target *ti = tio->ti;
@@ -1164,7 +1165,7 @@ static void __map_bio(struct dm_target_io *tio)
struct mapped_device *md = io->md;
up(&md->swap_bios_semaphore);
}
- free_tio(tio);
+ free_tio(clone);
dm_io_dec_pending(io, BLK_STS_IOERR);
break;
case DM_MAPIO_REQUEUE:
@@ -1172,7 +1173,7 @@ static void __map_bio(struct dm_target_io *tio)
struct mapped_device *md = io->md;
up(&md->swap_bios_semaphore);
}
- free_tio(tio);
+ free_tio(clone);
dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
break;
default:
@@ -1190,106 +1191,75 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
/*
* Creates a bio that consists of range of complete bvecs.
*/
-static int clone_bio(struct dm_target_io *tio, struct bio *bio,
- sector_t sector, unsigned len)
+static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
+ sector_t sector, unsigned *len)
{
- struct bio *clone = &tio->clone;
- int r;
-
- __bio_clone_fast(clone, bio);
-
- r = bio_crypt_clone(clone, bio, GFP_NOIO);
- if (r < 0)
- return r;
-
- if (bio_integrity(bio)) {
- if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
- !dm_target_passes_integrity(tio->ti->type))) {
- DMWARN("%s: the target %s doesn't support integrity data.",
- dm_device_name(tio->io->md),
- tio->ti->type->name);
- return -EIO;
- }
-
- r = bio_integrity_clone(clone, bio, GFP_NOIO);
- if (r < 0)
- return r;
- }
+ struct bio *bio = ci->bio, *clone;
+ clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
- clone->bi_iter.bi_size = to_bytes(len);
+ clone->bi_iter.bi_size = to_bytes(*len);
if (bio_integrity(bio))
bio_integrity_trim(clone);
+ __map_bio(clone);
return 0;
}
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
- struct dm_target *ti, unsigned num_bios)
+ struct dm_target *ti, unsigned num_bios,
+ unsigned *len)
{
- struct dm_target_io *tio;
+ struct bio *bio;
int try;
- if (!num_bios)
- return;
-
- if (num_bios == 1) {
- tio = alloc_tio(ci, ti, 0, GFP_NOIO);
- bio_list_add(blist, &tio->clone);
- return;
- }
-
for (try = 0; try < 2; try++) {
int bio_nr;
- struct bio *bio;
if (try)
mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
- tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
- if (!tio)
+ bio = alloc_tio(ci, ti, bio_nr, len,
+ try ? GFP_NOIO : GFP_NOWAIT);
+ if (!bio)
break;
- bio_list_add(blist, &tio->clone);
+ bio_list_add(blist, bio);
}
if (try)
mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios)
return;
- while ((bio = bio_list_pop(blist))) {
- tio = container_of(bio, struct dm_target_io, clone);
- free_tio(tio);
- }
+ while ((bio = bio_list_pop(blist)))
+ free_tio(bio);
}
}
-static void __clone_and_map_simple_bio(struct clone_info *ci,
- struct dm_target_io *tio, unsigned *len)
-{
- struct bio *clone = &tio->clone;
-
- tio->len_ptr = len;
-
- __bio_clone_fast(clone, ci->bio);
- if (len)
- bio_setup_sector(clone, ci->sector, *len);
- __map_bio(tio);
-}
-
static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
- struct bio *bio;
- struct dm_target_io *tio;
-
- alloc_multiple_bios(&blist, ci, ti, num_bios);
+ struct bio *clone;
- while ((bio = bio_list_pop(&blist))) {
- tio = container_of(bio, struct dm_target_io, clone);
- __clone_and_map_simple_bio(ci, tio, len);
+ switch (num_bios) {
+ case 0:
+ break;
+ case 1:
+ clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
+ if (len)
+ bio_setup_sector(clone, ci->sector, *len);
+ __map_bio(clone);
+ break;
+ default:
+ alloc_multiple_bios(&blist, ci, ti, num_bios, len);
+ while ((clone = bio_list_pop(&blist))) {
+ if (len)
+ bio_setup_sector(clone, ci->sector, *len);
+ __map_bio(clone);
+ }
+ break;
}
}
@@ -1304,9 +1274,8 @@ static int __send_empty_flush(struct clone_info *ci)
* need to reference it after submit. It's just used as
* the basis for the clone(s).
*/
- bio_init(&flush_bio, NULL, 0);
- flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
- bio_set_dev(&flush_bio, ci->io->md->disk->part0);
+ bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
ci->bio = &flush_bio;
ci->sector_count = 0;
@@ -1319,25 +1288,6 @@ static int __send_empty_flush(struct clone_info *ci)
return 0;
}
-static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
- sector_t sector, unsigned *len)
-{
- struct bio *bio = ci->bio;
- struct dm_target_io *tio;
- int r;
-
- tio = alloc_tio(ci, ti, 0, GFP_NOIO);
- tio->len_ptr = len;
- r = clone_bio(tio, bio, sector, *len);
- if (r < 0) {
- free_tio(tio);
- return r;
- }
- __map_bio(tio);
-
- return 0;
-}
-
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
unsigned num_bios)
{
@@ -1442,9 +1392,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
ci->sector = bio->bi_iter.bi_sector;
}
-#define __dm_part_stat_sub(part, field, subnd) \
- (part_stat_get(part, field) -= (subnd))
-
/*
* Entry point to split a bio into clones and submit them to the targets.
*/
@@ -1480,23 +1427,12 @@ static void __split_and_process_bio(struct mapped_device *md,
GFP_NOIO, &md->queue->bio_split);
ci.io->orig_bio = b;
- /*
- * Adjust IO stats for each split, otherwise upon queue
- * reentry there will be redundant IO accounting.
- * NOTE: this is a stop-gap fix, a proper fix involves
- * significant refactoring of DM core's bio splitting
- * (by eliminating DM's splitting and just using bio_split)
- */
- part_stat_lock();
- __dm_part_stat_sub(dm_disk(md)->part0,
- sectors[op_stat_group(bio_op(bio))], ci.sector_count);
- part_stat_unlock();
-
bio_chain(b, bio);
trace_block_split(b, bio->bi_iter.bi_sector);
submit_bio_noacct(bio);
}
}
+ start_io_acct(ci.io);
/* drop the extra reference count */
dm_io_dec_pending(ci.io, errno_to_blk_status(error));
@@ -2091,7 +2027,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
- blk_set_queue_dying(md->queue);
+ blk_mark_disk_dead(md->disk);
/*
* Take suspend_lock so that presuspend and postsuspend methods
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index c0dc6f2ef4a3..50ad818978a4 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -205,9 +205,9 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
}
}
if (failit) {
- struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
- bio_set_dev(b, conf->rdev->bdev);
b->bi_private = bio;
b->bi_end_io = faulty_fail;
bio = b;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index e7d6486f090f..3081a936350d 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -121,11 +121,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
}
multipath = conf->multipaths + mp_bh->path;
- bio_init(&mp_bh->bio, NULL, 0);
- __bio_clone_fast(&mp_bh->bio, bio);
+ bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
- bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
mp_bh->bio.bi_end_io = multipath_end_request;
mp_bh->bio.bi_private = mp_bh;
@@ -299,7 +297,6 @@ static void multipathd(struct md_thread *thread)
md_check_recovery(mddev);
for (;;) {
- char b[BDEVNAME_SIZE];
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head))
break;
@@ -311,13 +308,13 @@ static void multipathd(struct md_thread *thread)
bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
if ((mp_bh->path = multipath_map (conf))<0) {
- pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
- bio_devname(bio, b),
+ pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
+ bio->bi_bdev,
(unsigned long long)bio->bi_iter.bi_sector);
multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
- pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
- bio_devname(bio, b),
+ pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
+ bio->bi_bdev,
(unsigned long long)bio->bi_iter.bi_sector);
*bio = *(mp_bh->master_bio);
bio->bi_iter.bi_sector +=
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5881d05a76eb..309b3af906ad 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -562,11 +562,11 @@ static void submit_flushes(struct work_struct *ws)
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
+ bi = bio_alloc_bioset(rdev->bdev, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH,
+ GFP_NOIO, &mddev->bio_set);
bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
- bio_set_dev(bi, rdev->bdev);
- bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
atomic_inc(&mddev->flush_pending);
submit_bio(bi);
rcu_read_lock();
@@ -955,7 +955,6 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
* If an error occurred, call md_error
*/
struct bio *bio;
- int ff = 0;
if (!page)
return;
@@ -963,11 +962,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
if (test_bit(Faulty, &rdev->flags))
return;
- bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
+ bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
+ 1,
+ REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA,
+ GFP_NOIO, &mddev->sync_set);
atomic_inc(&rdev->nr_pending);
- bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
bio->bi_iter.bi_sector = sector;
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
@@ -976,8 +977,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
test_bit(FailFast, &rdev->flags) &&
!test_bit(LastDev, &rdev->flags))
- ff = MD_FAILFAST;
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
+ bio->bi_opf |= MD_FAILFAST;
atomic_inc(&mddev->pending_writes);
submit_bio(bio);
@@ -998,13 +998,11 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct bio bio;
struct bio_vec bvec;
- bio_init(&bio, &bvec, 1);
-
if (metadata_op && rdev->meta_bdev)
- bio_set_dev(&bio, rdev->meta_bdev);
+ bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags);
else
- bio_set_dev(&bio, rdev->bdev);
- bio.bi_opf = op | op_flags;
+ bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags);
+
if (metadata_op)
bio.bi_iter.bi_sector = sector + rdev->sb_start;
else if (rdev->mddev->reshape_position != MaxSector &&
@@ -5869,10 +5867,6 @@ int md_run(struct mddev *mddev)
nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev));
}
- /* Set the NOWAIT flags if all underlying devices support it */
- if (nowait)
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
-
if (!bioset_initialized(&mddev->bio_set)) {
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
@@ -6010,6 +6004,10 @@ int md_run(struct mddev *mddev)
else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
+
+ /* Set the NOWAIT flags if all underlying devices support it */
+ if (nowait)
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -8636,13 +8634,14 @@ static void md_end_io_acct(struct bio *bio)
*/
void md_account_bio(struct mddev *mddev, struct bio **bio)
{
+ struct block_device *bdev = (*bio)->bi_bdev;
struct md_io_acct *md_io_acct;
struct bio *clone;
- if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
+ if (!blk_queue_io_stat(bdev->bd_disk->queue))
return;
- clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
+ clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
@@ -9583,7 +9582,7 @@ static int md_notify_reboot(struct notifier_block *this,
* driver, we do want to have a safe RAID driver ...
*/
if (need_delay)
- mdelay(1000*1);
+ msleep(1000);
return NOTIFY_DONE;
}
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 83f9a4f3d82e..e61f6cad4e08 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -28,6 +28,11 @@ struct resync_pages {
struct page *pages[RESYNC_PAGES];
};
+struct raid1_plug_cb {
+ struct blk_plug_cb cb;
+ struct bio_list pending;
+};
+
static void rbio_pool_free(void *rbio, void *data)
{
kfree(rbio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e2d8acb1e988..934186724d21 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -824,7 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
- conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock);
/*
@@ -1126,7 +1125,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
int i = 0;
struct bio *behind_bio = NULL;
- behind_bio = bio_alloc_bioset(GFP_NOIO, vcnt, &r1_bio->mddev->bio_set);
+ behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO,
+ &r1_bio->mddev->bio_set);
if (!behind_bio)
return;
@@ -1166,12 +1166,6 @@ free_pages:
bio_put(behind_bio);
}
-struct raid1_plug_cb {
- struct blk_plug_cb cb;
- struct bio_list pending;
- int pending_cnt;
-};
-
static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
@@ -1183,7 +1177,6 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
if (from_schedule || current->bio_list) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
- conf->pending_count += plug->pending_cnt;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier);
md_wakeup_thread(mddev->thread);
@@ -1319,13 +1312,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r1_bio->start_time = bio_start_io_acct(bio);
- read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+ read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
+ &mddev->bio_set);
r1_bio->bios[rdisk] = read_bio;
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
- bio_set_dev(read_bio, mirror->rdev->bdev);
read_bio->bi_end_io = raid1_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &mirror->rdev->flags) &&
@@ -1545,24 +1538,25 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
first_clone = 0;
}
- if (r1_bio->behind_master_bio)
- mbio = bio_clone_fast(r1_bio->behind_master_bio,
- GFP_NOIO, &mddev->bio_set);
- else
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
-
if (r1_bio->behind_master_bio) {
+ mbio = bio_alloc_clone(rdev->bdev,
+ r1_bio->behind_master_bio,
+ GFP_NOIO, &mddev->bio_set);
if (test_bit(CollisionCheck, &rdev->flags))
wait_for_serialization(rdev, r1_bio);
if (test_bit(WriteMostly, &rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
- } else if (mddev->serialize_policy)
- wait_for_serialization(rdev, r1_bio);
+ } else {
+ mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
+
+ if (mddev->serialize_policy)
+ wait_for_serialization(rdev, r1_bio);
+ }
r1_bio->bios[i] = mbio;
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
- bio_set_dev(mbio, rdev->bdev);
mbio->bi_end_io = raid1_end_write_request;
mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
if (test_bit(FailFast, &rdev->flags) &&
@@ -1586,11 +1580,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
plug = NULL;
if (plug) {
bio_list_add(&plug->pending, mbio);
- plug->pending_cnt++;
} else {
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
}
@@ -2070,15 +2062,14 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
} while (!success && d != r1_bio->read_disk);
if (!success) {
- char b[BDEVNAME_SIZE];
int abort = 0;
/* Cannot read from anywhere, this block is lost.
* Record a bad block on each device. If that doesn't
* work just disable and interrupt the recovery.
* Don't fail devices as that won't really help.
*/
- pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
- mdname(mddev), bio_devname(bio, b),
+ pr_crit_ratelimited("md/raid1:%s: %pg: unrecoverable I/O read error for block %llu\n",
+ mdname(mddev), bio->bi_bdev,
(unsigned long long)r1_bio->sector);
for (d = 0; d < conf->raid_disks * 2; d++) {
rdev = conf->mirrors[d].rdev;
@@ -2165,11 +2156,10 @@ static void process_checks(struct r1bio *r1_bio)
continue;
/* fixup the bio for reuse, but preserve errno */
status = b->bi_status;
- bio_reset(b);
+ bio_reset(b, conf->mirrors[i].rdev->bdev, REQ_OP_READ);
b->bi_status = status;
b->bi_iter.bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
- bio_set_dev(b, conf->mirrors[i].rdev->bdev);
b->bi_end_io = end_sync_read;
rp->raid_bio = r1_bio;
b->bi_private = rp;
@@ -2416,12 +2406,12 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
/* Write at 'sector' for 'sectors'*/
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- wbio = bio_clone_fast(r1_bio->behind_master_bio,
- GFP_NOIO,
- &mddev->bio_set);
+ wbio = bio_alloc_clone(rdev->bdev,
+ r1_bio->behind_master_bio,
+ GFP_NOIO, &mddev->bio_set);
} else {
- wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
- &mddev->bio_set);
+ wbio = bio_alloc_clone(rdev->bdev, r1_bio->master_bio,
+ GFP_NOIO, &mddev->bio_set);
}
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
@@ -2430,7 +2420,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
bio_trim(wbio, sector - r1_bio->sector, sectors);
wbio->bi_iter.bi_sector += rdev->data_offset;
- bio_set_dev(wbio, rdev->bdev);
if (submit_bio_wait(wbio) < 0)
/* failure! */
@@ -2650,7 +2639,7 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
for (i = conf->poolinfo->raid_disks; i--; ) {
bio = r1bio->bios[i];
rps = bio->bi_private;
- bio_reset(bio);
+ bio_reset(bio, NULL, 0);
bio->bi_private = rps;
}
r1bio->master_bio = NULL;
@@ -3058,7 +3047,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
init_waitqueue_head(&conf->wait_barrier);
bio_list_init(&conf->pending_bio_list);
- conf->pending_count = 0;
conf->recovery_disabled = mddev->recovery_disabled - 1;
err = -EIO;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index ccf10e59b116..ebb6788820e7 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -87,7 +87,6 @@ struct r1conf {
/* queue pending writes to be submitted on unplug */
struct bio_list pending_bio_list;
- int pending_count;
/* for use when syncing mirrors:
* We don't allow both normal IO and resync/recovery IO at
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2b969f70a31f..b369ebb965a9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf)
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
- conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock);
/*
@@ -1054,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
return rdev->new_data_offset;
}
-struct raid10_plug_cb {
- struct blk_plug_cb cb;
- struct bio_list pending;
- int pending_cnt;
-};
-
static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
- struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
- cb);
+ struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb);
struct mddev *mddev = plug->cb.data;
struct r10conf *conf = mddev->private;
struct bio *bio;
@@ -1071,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
if (from_schedule || current->bio_list) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
- conf->pending_count += plug->pending_cnt;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier);
md_wakeup_thread(mddev->thread);
@@ -1208,14 +1199,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
- read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+ read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
- bio_set_dev(read_bio, rdev->bdev);
read_bio->bi_end_io = raid10_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &rdev->flags) &&
@@ -1239,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
unsigned long flags;
struct blk_plug_cb *cb;
- struct raid10_plug_cb *plug = NULL;
+ struct raid1_plug_cb *plug = NULL;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
int devnum = r10_bio->devs[n_copy].devnum;
@@ -1255,7 +1245,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} else
rdev = conf->mirrors[devnum].rdev;
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
if (replacement)
r10_bio->devs[n_copy].repl_bio = mbio;
else
@@ -1263,7 +1253,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
- bio_set_dev(mbio, rdev->bdev);
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
if (!replacement && test_bit(FailFast,
@@ -1282,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
if (cb)
- plug = container_of(cb, struct raid10_plug_cb, cb);
+ plug = container_of(cb, struct raid1_plug_cb, cb);
else
plug = NULL;
if (plug) {
bio_list_add(&plug->pending, mbio);
- plug->pending_cnt++;
} else {
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
}
@@ -1812,7 +1799,8 @@ retry_discard:
*/
if (r10_bio->devs[disk].bio) {
struct md_rdev *rdev = conf->mirrors[disk].rdev;
- mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
r10_bio->devs[disk].bio = mbio;
@@ -1825,7 +1813,8 @@ retry_discard:
}
if (r10_bio->devs[disk].repl_bio) {
struct md_rdev *rrdev = conf->mirrors[disk].replacement;
- rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;
r10_bio->devs[disk].repl_bio = rbio;
@@ -2422,7 +2411,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* bi_vecs, as the read request might have corrupted these
*/
rp = get_resync_pages(tbio);
- bio_reset(tbio);
+ bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
@@ -2430,7 +2419,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tbio->bi_private = rp;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
tbio->bi_end_io = end_sync_write;
- bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
bio_copy_data(tbio, fbio);
@@ -2441,7 +2429,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
tbio->bi_opf |= MD_FAILFAST;
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
- bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
submit_bio_noacct(tbio);
}
@@ -2894,12 +2881,12 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
- wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+ &mddev->bio_set);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
choose_data_offset(r10_bio, rdev);
- bio_set_dev(wbio, rdev->bdev);
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0)
@@ -3160,12 +3147,12 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
for (i = 0; i < nalloc; i++) {
bio = r10bio->devs[i].bio;
rp = bio->bi_private;
- bio_reset(bio);
+ bio_reset(bio, NULL, 0);
bio->bi_private = rp;
bio = r10bio->devs[i].repl_bio;
if (bio) {
rp = bio->bi_private;
- bio_reset(bio);
+ bio_reset(bio, NULL, 0);
bio->bi_private = rp;
}
}
@@ -4892,14 +4879,12 @@ read_more:
return sectors_done;
}
- read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
-
- bio_set_dev(read_bio, rdev->bdev);
+ read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
+ GFP_KERNEL, &mddev->bio_set);
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
read_bio->bi_end_io = end_reshape_read;
- bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
r10_bio->master_bio = read_bio;
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index c34bb196790e..5c0804d8bb1f 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -75,7 +75,6 @@ struct r10conf {
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;
- int pending_count;
spinlock_t resync_lock;
atomic_t nr_pending;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0b5dcaabbc15..a7d50ff9020a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -735,10 +735,9 @@ static void r5l_submit_current_io(struct r5l_log *log)
static struct bio *r5l_bio_alloc(struct r5l_log *log)
{
- struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs);
+ struct bio *bio = bio_alloc_bioset(log->rdev->bdev, BIO_MAX_VECS,
+ REQ_OP_WRITE, GFP_NOIO, &log->bs);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
return bio;
@@ -1267,6 +1266,8 @@ static void r5l_log_flush_endio(struct bio *bio)
r5l_io_run_stripes(io);
list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
spin_unlock_irqrestore(&log->io_list_lock, flags);
+
+ bio_uninit(bio);
}
/*
@@ -1302,10 +1303,9 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
if (!do_flush)
return;
- bio_reset(&log->flush_bio);
- bio_set_dev(&log->flush_bio, log->rdev->bdev);
+ bio_init(&log->flush_bio, log->rdev->bdev, NULL, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH);
log->flush_bio.bi_end_io = r5l_log_flush_endio;
- log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
submit_bio(&log->flush_bio);
}
@@ -1623,10 +1623,10 @@ struct r5l_recovery_ctx {
* just copy data from the pool.
*/
struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE];
+ struct bio_vec ra_bvec[R5L_RECOVERY_PAGE_POOL_SIZE];
sector_t pool_offset; /* offset of first page in the pool */
int total_pages; /* total allocated pages */
int valid_pages; /* pages with valid data */
- struct bio *ra_bio; /* bio to do the read ahead */
};
static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
@@ -1634,10 +1634,6 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
{
struct page *page;
- ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs);
- if (!ctx->ra_bio)
- return -ENOMEM;
-
ctx->valid_pages = 0;
ctx->total_pages = 0;
while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) {
@@ -1649,10 +1645,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
ctx->total_pages += 1;
}
- if (ctx->total_pages == 0) {
- bio_put(ctx->ra_bio);
+ if (ctx->total_pages == 0)
return -ENOMEM;
- }
ctx->pool_offset = 0;
return 0;
@@ -1665,7 +1659,6 @@ static void r5l_recovery_free_ra_pool(struct r5l_log *log,
for (i = 0; i < ctx->total_pages; ++i)
put_page(ctx->ra_pool[i]);
- bio_put(ctx->ra_bio);
}
/*
@@ -1678,17 +1671,19 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
struct r5l_recovery_ctx *ctx,
sector_t offset)
{
- bio_reset(ctx->ra_bio);
- bio_set_dev(ctx->ra_bio, log->rdev->bdev);
- bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0);
- ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
+ struct bio bio;
+ int ret;
+
+ bio_init(&bio, log->rdev->bdev, ctx->ra_bvec,
+ R5L_RECOVERY_PAGE_POOL_SIZE, REQ_OP_READ);
+ bio.bi_iter.bi_sector = log->rdev->data_offset + offset;
ctx->valid_pages = 0;
ctx->pool_offset = offset;
while (ctx->valid_pages < ctx->total_pages) {
- bio_add_page(ctx->ra_bio,
- ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0);
+ __bio_add_page(&bio, ctx->ra_pool[ctx->valid_pages], PAGE_SIZE,
+ 0);
ctx->valid_pages += 1;
offset = r5l_ring_add(log, offset, BLOCK_SECTORS);
@@ -1697,7 +1692,9 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
break;
}
- return submit_bio_wait(ctx->ra_bio);
+ ret = submit_bio_wait(&bio);
+ bio_uninit(&bio);
+ return ret;
}
/*
@@ -3108,7 +3105,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
INIT_LIST_HEAD(&log->io_end_ios);
INIT_LIST_HEAD(&log->flushing_ios);
INIT_LIST_HEAD(&log->finished_ios);
- bio_init(&log->flush_bio, NULL, 0);
log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
if (!log->io_kc)
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 4ab417915d7f..ea4cd8dd4dc3 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -250,7 +250,8 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
INIT_LIST_HEAD(&io->stripe_list);
atomic_set(&io->pending_stripes, 0);
atomic_set(&io->pending_flushes, 0);
- bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
+ bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS,
+ REQ_OP_WRITE | REQ_FUA);
pplhdr = page_address(io->header_page);
clear_page(pplhdr);
@@ -416,12 +417,10 @@ static void ppl_log_endio(struct bio *bio)
static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
{
- char b[BDEVNAME_SIZE];
-
- pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n",
+ pr_debug("%s: seq: %llu size: %u sector: %llu dev: %pg\n",
__func__, io->seq, bio->bi_iter.bi_size,
(unsigned long long)bio->bi_iter.bi_sector,
- bio_devname(bio, b));
+ bio->bi_bdev);
submit_bio(bio);
}
@@ -465,8 +464,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
bio->bi_end_io = ppl_log_endio;
- bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
- bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->next_io_sector;
bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
bio->bi_write_hint = ppl_conf->write_hint;
@@ -496,11 +493,10 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
struct bio *prev = bio;
- bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS,
+ bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS,
+ prev->bi_opf, GFP_NOIO,
&ppl_conf->bs);
- bio->bi_opf = prev->bi_opf;
bio->bi_write_hint = prev->bi_write_hint;
- bio_copy_dev(bio, prev);
bio->bi_iter.bi_sector = bio_end_sector(prev);
bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
@@ -590,9 +586,8 @@ static void ppl_flush_endio(struct bio *bio)
struct ppl_log *log = io->log;
struct ppl_conf *ppl_conf = log->ppl_conf;
struct r5conf *conf = ppl_conf->mddev->private;
- char b[BDEVNAME_SIZE];
- pr_debug("%s: dev: %s\n", __func__, bio_devname(bio, b));
+ pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev);
if (bio->bi_status) {
struct md_rdev *rdev;
@@ -635,16 +630,14 @@ static void ppl_do_flush(struct ppl_io_unit *io)
if (bdev) {
struct bio *bio;
- char b[BDEVNAME_SIZE];
- bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs);
- bio_set_dev(bio, bdev);
+ bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+ REQ_OP_WRITE | REQ_PREFLUSH,
+ &ppl_conf->flush_bs);
bio->bi_private = io;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
bio->bi_end_io = ppl_flush_endio;
- pr_debug("%s: dev: %s\n", __func__,
- bio_devname(bio, b));
+ pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev);
submit_bio(bio);
flushed_disks++;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ffe720c73b0a..8bd5f06390ea 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1060,6 +1060,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
int i, disks = sh->disks;
struct stripe_head *head_sh = sh;
struct bio_list pending_bios = BIO_EMPTY_LIST;
+ struct r5dev *dev;
bool should_defer;
might_sleep();
@@ -1094,8 +1095,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
op_flags |= REQ_SYNC;
again:
- bi = &sh->dev[i].req;
- rbi = &sh->dev[i].rreq; /* For writing to replacement */
+ dev = &sh->dev[i];
+ bi = &dev->req;
+ rbi = &dev->rreq; /* For writing to replacement */
rcu_read_lock();
rrdev = rcu_dereference(conf->disks[i].replacement);
@@ -1171,8 +1173,7 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
- bio_set_dev(bi, rdev->bdev);
- bio_set_op_attrs(bi, op, op_flags);
+ bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags);
bi->bi_end_io = op_is_write(op)
? raid5_end_write_request
: raid5_end_read_request;
@@ -1238,8 +1239,7 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
- bio_set_dev(rbi, rrdev->bdev);
- bio_set_op_attrs(rbi, op, op_flags);
+ bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags);
BUG_ON(!op_is_write(op));
rbi->bi_end_io = raid5_end_write_request;
rbi->bi_private = sh;
@@ -2294,7 +2294,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
int disks, struct r5conf *conf)
{
struct stripe_head *sh;
- int i;
sh = kmem_cache_zalloc(sc, gfp);
if (sh) {
@@ -2307,12 +2306,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
atomic_set(&sh->count, 1);
sh->raid_conf = conf;
sh->log_start = MaxSector;
- for (i = 0; i < disks; i++) {
- struct r5dev *dev = &sh->dev[i];
-
- bio_init(&dev->req, &dev->vec, 1);
- bio_init(&dev->rreq, &dev->rvec, 1);
- }
if (raid5_has_ppl(conf)) {
sh->ppl_page = alloc_page(gfp);
@@ -2677,7 +2670,6 @@ static void raid5_end_read_request(struct bio * bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_status);
if (i == disks) {
- bio_reset(bi);
BUG();
return;
}
@@ -2785,7 +2777,7 @@ static void raid5_end_read_request(struct bio * bi)
}
}
rdev_dec_pending(rdev, conf->mddev);
- bio_reset(bi);
+ bio_uninit(bi);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh);
@@ -2823,7 +2815,6 @@ static void raid5_end_write_request(struct bio *bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_status);
if (i == disks) {
- bio_reset(bi);
BUG();
return;
}
@@ -2860,7 +2851,7 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && bi->bi_status && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
- bio_reset(bi);
+ bio_uninit(bi);
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
@@ -5438,14 +5429,14 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
return 0;
}
- align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
+ align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
+ &mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
- bio_set_dev(align_bio, rdev->bdev);
align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct;
align_bio->bi_iter.bi_sector = sector;
diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
index 5b9b57f4d9bf..68cf68dbcace 100644
--- a/drivers/media/platform/omap3isp/ispstat.c
+++ b/drivers/media/platform/omap3isp/ispstat.c
@@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat,
int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
struct omap3isp_stat_data_time32 *data)
{
- struct omap3isp_stat_data data64;
+ struct omap3isp_stat_data data64 = { };
int ret;
ret = omap3isp_stat_request_statistics(stat, &data64);
@@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
data->ts.tv_sec = data64.ts.tv_sec;
data->ts.tv_usec = data64.ts.tv_usec;
- memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts));
+ data->buf = (uintptr_t)data64.buf;
+ memcpy(&data->frame, &data64.frame, sizeof(data->frame));
return 0;
}
diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c
index 6f2a66bc87fb..6be4e5528879 100644
--- a/drivers/media/spi/cxd2880-spi.c
+++ b/drivers/media/spi/cxd2880-spi.c
@@ -625,7 +625,7 @@ fail_regulator:
return ret;
}
-static int
+static void
cxd2880_spi_remove(struct spi_device *spi)
{
struct cxd2880_dvb_spi *dvb_spi = spi_get_drvdata(spi);
@@ -643,8 +643,6 @@ cxd2880_spi_remove(struct spi_device *spi)
kfree(dvb_spi);
pr_info("cxd2880_spi remove ok.\n");
-
- return 0;
}
static const struct spi_device_id cxd2880_spi_id[] = {
diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c
index f86ef1ca1288..75c21a93e6d0 100644
--- a/drivers/media/spi/gs1662.c
+++ b/drivers/media/spi/gs1662.c
@@ -458,13 +458,11 @@ static int gs_probe(struct spi_device *spi)
return ret;
}
-static int gs_remove(struct spi_device *spi)
+static void gs_remove(struct spi_device *spi)
{
struct v4l2_subdev *sd = spi_get_drvdata(spi);
v4l2_device_unregister_subdev(sd);
-
- return 0;
}
static struct spi_driver gs_driver = {
diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c
index 44247049a319..ad6c72c1ed04 100644
--- a/drivers/media/tuners/msi001.c
+++ b/drivers/media/tuners/msi001.c
@@ -472,7 +472,7 @@ err:
return ret;
}
-static int msi001_remove(struct spi_device *spi)
+static void msi001_remove(struct spi_device *spi)
{
struct v4l2_subdev *sd = spi_get_drvdata(spi);
struct msi001_dev *dev = sd_to_msi001_dev(sd);
@@ -486,7 +486,6 @@ static int msi001_remove(struct spi_device *spi)
v4l2_device_unregister_subdev(&dev->sd);
v4l2_ctrl_handler_free(&dev->hdl);
kfree(dev);
- return 0;
}
static const struct spi_device_id msi001_id_table[] = {
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 0cda6c6baefc..3993bdd4b519 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1943,22 +1943,6 @@ static void msb_io_work(struct work_struct *work)
static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */
-static int msb_bd_open(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct msb_data *msb = disk->private_data;
-
- dbg_verbose("block device open");
-
- mutex_lock(&msb_disk_lock);
-
- if (msb && msb->card)
- msb->usage_count++;
-
- mutex_unlock(&msb_disk_lock);
- return 0;
-}
-
static void msb_data_clear(struct msb_data *msb)
{
kfree(msb->boot_page);
@@ -1968,33 +1952,6 @@ static void msb_data_clear(struct msb_data *msb)
msb->card = NULL;
}
-static int msb_disk_release(struct gendisk *disk)
-{
- struct msb_data *msb = disk->private_data;
-
- dbg_verbose("block device release");
- mutex_lock(&msb_disk_lock);
-
- if (msb) {
- if (msb->usage_count)
- msb->usage_count--;
-
- if (!msb->usage_count) {
- disk->private_data = NULL;
- idr_remove(&msb_disk_idr, msb->disk_id);
- put_disk(disk);
- kfree(msb);
- }
- }
- mutex_unlock(&msb_disk_lock);
- return 0;
-}
-
-static void msb_bd_release(struct gendisk *disk, fmode_t mode)
-{
- msb_disk_release(disk);
-}
-
static int msb_bd_getgeo(struct block_device *bdev,
struct hd_geometry *geo)
{
@@ -2003,6 +1960,17 @@ static int msb_bd_getgeo(struct block_device *bdev,
return 0;
}
+static void msb_bd_free_disk(struct gendisk *disk)
+{
+ struct msb_data *msb = disk->private_data;
+
+ mutex_lock(&msb_disk_lock);
+ idr_remove(&msb_disk_idr, msb->disk_id);
+ mutex_unlock(&msb_disk_lock);
+
+ kfree(msb);
+}
+
static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -2096,10 +2064,9 @@ static void msb_start(struct memstick_dev *card)
}
static const struct block_device_operations msb_bdops = {
- .open = msb_bd_open,
- .release = msb_bd_release,
- .getgeo = msb_bd_getgeo,
- .owner = THIS_MODULE
+ .owner = THIS_MODULE,
+ .getgeo = msb_bd_getgeo,
+ .free_disk = msb_bd_free_disk,
};
static const struct blk_mq_ops msb_mq_ops = {
@@ -2147,7 +2114,6 @@ static int msb_init_disk(struct memstick_dev *card)
set_capacity(msb->disk, capacity);
dbg("Set total disk size to %lu sectors", capacity);
- msb->usage_count = 1;
msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
INIT_WORK(&msb->io_work, msb_io_work);
sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
@@ -2229,7 +2195,7 @@ static void msb_remove(struct memstick_dev *card)
msb_data_clear(msb);
mutex_unlock(&msb_disk_lock);
- msb_disk_release(msb->disk);
+ put_disk(msb->disk);
memstick_set_drvdata(card, NULL);
}
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
index 122e1a8a8bd5..7058f9aefeb9 100644
--- a/drivers/memstick/core/ms_block.h
+++ b/drivers/memstick/core/ms_block.h
@@ -143,7 +143,6 @@ struct ms_boot_page {
} __packed;
struct msb_data {
- unsigned int usage_count;
struct memstick_dev *card;
struct gendisk *disk;
struct request_queue *queue;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c0450397b673..725ba74ded30 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -133,7 +133,6 @@ struct mspro_devinfo {
struct mspro_block_data {
struct memstick_dev *card;
- unsigned int usage_count;
unsigned int caps;
struct gendisk *disk;
struct request_queue *queue;
@@ -178,53 +177,16 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error);
/*** Block device ***/
-static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct mspro_block_data *msb = disk->private_data;
- int rc = -ENXIO;
-
- mutex_lock(&mspro_block_disk_lock);
-
- if (msb && msb->card) {
- msb->usage_count++;
- if ((mode & FMODE_WRITE) && msb->read_only)
- rc = -EROFS;
- else
- rc = 0;
- }
-
- mutex_unlock(&mspro_block_disk_lock);
-
- return rc;
-}
-
-
-static void mspro_block_disk_release(struct gendisk *disk)
+static void mspro_block_bd_free_disk(struct gendisk *disk)
{
struct mspro_block_data *msb = disk->private_data;
int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
mutex_lock(&mspro_block_disk_lock);
-
- if (msb) {
- if (msb->usage_count)
- msb->usage_count--;
-
- if (!msb->usage_count) {
- kfree(msb);
- disk->private_data = NULL;
- idr_remove(&mspro_block_disk_idr, disk_id);
- put_disk(disk);
- }
- }
-
+ idr_remove(&mspro_block_disk_idr, disk_id);
mutex_unlock(&mspro_block_disk_lock);
-}
-static void mspro_block_bd_release(struct gendisk *disk, fmode_t mode)
-{
- mspro_block_disk_release(disk);
+ kfree(msb);
}
static int mspro_block_bd_getgeo(struct block_device *bdev,
@@ -240,10 +202,9 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
}
static const struct block_device_operations ms_block_bdops = {
- .open = mspro_block_bd_open,
- .release = mspro_block_bd_release,
- .getgeo = mspro_block_bd_getgeo,
- .owner = THIS_MODULE
+ .owner = THIS_MODULE,
+ .getgeo = mspro_block_bd_getgeo,
+ .free_disk = mspro_block_bd_free_disk,
};
/*** Information ***/
@@ -1226,7 +1187,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
msb->disk->fops = &ms_block_bdops;
- msb->usage_count = 1;
msb->disk->private_data = msb;
sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
@@ -1239,6 +1199,9 @@ static int mspro_block_init_disk(struct memstick_dev *card)
set_capacity(msb->disk, capacity);
dev_dbg(&card->dev, "capacity set %ld\n", capacity);
+ if (msb->read_only)
+ set_disk_ro(msb->disk, true);
+
rc = device_add_disk(&card->dev, msb->disk, NULL);
if (rc)
goto out_cleanup_disk;
@@ -1341,7 +1304,7 @@ static void mspro_block_remove(struct memstick_dev *card)
mspro_block_data_clear(msb);
mutex_unlock(&mspro_block_disk_lock);
- mspro_block_disk_release(msb->disk);
+ put_disk(msb->disk);
memstick_set_drvdata(card, NULL);
}
diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c
index 9fe06dda3782..03620c8efe34 100644
--- a/drivers/mfd/arizona-spi.c
+++ b/drivers/mfd/arizona-spi.c
@@ -206,13 +206,11 @@ static int arizona_spi_probe(struct spi_device *spi)
return arizona_dev_init(arizona);
}
-static int arizona_spi_remove(struct spi_device *spi)
+static void arizona_spi_remove(struct spi_device *spi)
{
struct arizona *arizona = spi_get_drvdata(spi);
arizona_dev_exit(arizona);
-
- return 0;
}
static const struct spi_device_id arizona_spi_ids[] = {
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 5faf3766a5e2..b79a57b45c1e 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -55,12 +55,11 @@ static int da9052_spi_probe(struct spi_device *spi)
return da9052_device_init(da9052, id->driver_data);
}
-static int da9052_spi_remove(struct spi_device *spi)
+static void da9052_spi_remove(struct spi_device *spi)
{
struct da9052 *da9052 = spi_get_drvdata(spi);
da9052_device_exit(da9052);
- return 0;
}
static const struct spi_device_id da9052_spi_id[] = {
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 70fa18b04ad2..3d5ce18aa9ae 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
- local_irq_disable();
service = isr & ~msr;
for (irq = pcap->irq_base; service; service >>= 1, irq++) {
if (service & 1)
- generic_handle_irq(irq);
+ generic_handle_irq_safe(irq);
}
- local_irq_enable();
ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
} while (gpio_get_value(pdata->gpio));
}
@@ -392,7 +390,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap,
return ret;
}
-static int ezx_pcap_remove(struct spi_device *spi)
+static void ezx_pcap_remove(struct spi_device *spi)
{
struct pcap_chip *pcap = spi_get_drvdata(spi);
unsigned long flags;
@@ -412,8 +410,6 @@ static int ezx_pcap_remove(struct spi_device *spi)
irq_set_chip_and_handler(i, NULL, NULL);
destroy_workqueue(pcap->workqueue);
-
- return 0;
}
static int ezx_pcap_probe(struct spi_device *spi)
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index f10e53187f67..9ffab9aafd81 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -63,6 +63,8 @@
#define SPIBASE_BYT 0x54
#define SPIBASE_BYT_SZ 512
#define SPIBASE_BYT_EN BIT(1)
+#define BYT_BCR 0xfc
+#define BYT_BCR_WPD BIT(0)
#define SPIBASE_LPT 0x3800
#define SPIBASE_LPT_SZ 512
@@ -1084,12 +1086,57 @@ wdt_done:
return ret;
}
+static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data)
+{
+ u32 val;
+
+ val = readl(base + BYT_BCR);
+ if (!(val & BYT_BCR_WPD)) {
+ val |= BYT_BCR_WPD;
+ writel(val, base + BYT_BCR);
+ val = readl(base + BYT_BCR);
+ }
+
+ return val & BYT_BCR_WPD;
+}
+
+static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data)
+{
+ struct pci_dev *pdev = data;
+ u32 bcr;
+
+ pci_read_config_dword(pdev, BCR, &bcr);
+ if (!(bcr & BCR_WPD)) {
+ bcr |= BCR_WPD;
+ pci_write_config_dword(pdev, BCR, bcr);
+ pci_read_config_dword(pdev, BCR, &bcr);
+ }
+
+ return bcr & BCR_WPD;
+}
+
+static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data)
+{
+ unsigned int spi = PCI_DEVFN(13, 2);
+ struct pci_bus *bus = data;
+ u32 bcr;
+
+ pci_bus_read_config_dword(bus, spi, BCR, &bcr);
+ if (!(bcr & BCR_WPD)) {
+ bcr |= BCR_WPD;
+ pci_bus_write_config_dword(bus, spi, BCR, bcr);
+ pci_bus_read_config_dword(bus, spi, BCR, &bcr);
+ }
+
+ return bcr & BCR_WPD;
+}
+
static int lpc_ich_init_spi(struct pci_dev *dev)
{
struct lpc_ich_priv *priv = pci_get_drvdata(dev);
struct resource *res = &intel_spi_res[0];
struct intel_spi_boardinfo *info;
- u32 spi_base, rcba, bcr;
+ u32 spi_base, rcba;
info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL);
if (!info)
@@ -1103,6 +1150,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
if (spi_base & SPIBASE_BYT_EN) {
res->start = spi_base & ~(SPIBASE_BYT_SZ - 1);
res->end = res->start + SPIBASE_BYT_SZ - 1;
+
+ info->set_writeable = lpc_ich_byt_set_writeable;
}
break;
@@ -1113,8 +1162,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
res->start = spi_base + SPIBASE_LPT;
res->end = res->start + SPIBASE_LPT_SZ - 1;
- pci_read_config_dword(dev, BCR, &bcr);
- info->writeable = !!(bcr & BCR_WPD);
+ info->set_writeable = lpc_ich_lpt_set_writeable;
+ info->data = dev;
}
break;
@@ -1135,8 +1184,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
res->start = spi_base & 0xfffffff0;
res->end = res->start + SPIBASE_APL_SZ - 1;
- pci_bus_read_config_dword(bus, spi, BCR, &bcr);
- info->writeable = !!(bcr & BCR_WPD);
+ info->set_writeable = lpc_ich_bxt_set_writeable;
+ info->data = bus;
}
pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1);
diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c
index e860f5ff0933..da84eb50e53a 100644
--- a/drivers/mfd/madera-spi.c
+++ b/drivers/mfd/madera-spi.c
@@ -112,13 +112,11 @@ static int madera_spi_probe(struct spi_device *spi)
return madera_dev_init(madera);
}
-static int madera_spi_remove(struct spi_device *spi)
+static void madera_spi_remove(struct spi_device *spi)
{
struct madera *madera = spi_get_drvdata(spi);
madera_dev_exit(madera);
-
- return 0;
}
static const struct spi_device_id madera_spi_ids[] = {
diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index 4d8913d647e6..f803527e5819 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -166,10 +166,9 @@ static int mc13xxx_spi_probe(struct spi_device *spi)
return mc13xxx_common_init(&spi->dev);
}
-static int mc13xxx_spi_remove(struct spi_device *spi)
+static void mc13xxx_spi_remove(struct spi_device *spi)
{
mc13xxx_common_exit(&spi->dev);
- return 0;
}
static struct spi_driver mc13xxx_spi_driver = {
diff --git a/drivers/mfd/rsmu_spi.c b/drivers/mfd/rsmu_spi.c
index fec2b4ec477c..d2f3d8f1e05a 100644
--- a/drivers/mfd/rsmu_spi.c
+++ b/drivers/mfd/rsmu_spi.c
@@ -220,13 +220,11 @@ static int rsmu_spi_probe(struct spi_device *client)
return rsmu_core_init(rsmu);
}
-static int rsmu_spi_remove(struct spi_device *client)
+static void rsmu_spi_remove(struct spi_device *client)
{
struct rsmu_ddata *rsmu = spi_get_drvdata(client);
rsmu_core_exit(rsmu);
-
- return 0;
}
static const struct spi_device_id rsmu_spi_id[] = {
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 6c5915016be5..ad8055a0e286 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -102,13 +102,11 @@ stmpe_spi_probe(struct spi_device *spi)
return stmpe_probe(&spi_ci, id->driver_data);
}
-static int stmpe_spi_remove(struct spi_device *spi)
+static void stmpe_spi_remove(struct spi_device *spi)
{
struct stmpe *stmpe = spi_get_drvdata(spi);
stmpe_remove(stmpe);
-
- return 0;
}
static const struct of_device_id stmpe_spi_of_match[] = {
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index d701926aa46e..bba38fbc781d 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c
@@ -50,13 +50,11 @@ static int tps65912_spi_probe(struct spi_device *spi)
return tps65912_device_init(tps);
}
-static int tps65912_spi_remove(struct spi_device *spi)
+static void tps65912_spi_remove(struct spi_device *spi)
{
struct tps65912 *tps = spi_get_drvdata(spi);
tps65912_device_exit(tps);
-
- return 0;
}
static const struct spi_device_id tps65912_spi_id_table[] = {
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
index a9e75d80ad36..263055bda48b 100644
--- a/drivers/misc/ad525x_dpot-spi.c
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -90,10 +90,9 @@ static int ad_dpot_spi_probe(struct spi_device *spi)
spi_get_device_id(spi)->name);
}
-static int ad_dpot_spi_remove(struct spi_device *spi)
+static void ad_dpot_spi_remove(struct spi_device *spi)
{
ad_dpot_remove(&spi->dev);
- return 0;
}
static const struct spi_device_id ad_dpot_spi_id[] = {
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index c3305bdda69c..bee727ed98db 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -440,6 +440,10 @@ static int at25_probe(struct spi_device *spi)
return -ENXIO;
}
+ at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
+ if (!at25)
+ return -ENOMEM;
+
mutex_init(&at25->lock);
at25->spi = spi;
spi_set_drvdata(spi, at25);
diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c
index bb9c4512c968..9fbfe784d710 100644
--- a/drivers/misc/eeprom/ee1004.c
+++ b/drivers/misc/eeprom/ee1004.c
@@ -114,6 +114,9 @@ static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf,
if (offset + count > EE1004_PAGE_SIZE)
count = EE1004_PAGE_SIZE - offset;
+ if (count > I2C_SMBUS_BLOCK_MAX)
+ count = I2C_SMBUS_BLOCK_MAX;
+
return i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf);
}
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 1f15399e5cb4..b630625b3024 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -555,14 +555,12 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
return 0;
}
-static int eeprom_93xx46_remove(struct spi_device *spi)
+static void eeprom_93xx46_remove(struct spi_device *spi)
{
struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
if (!(edev->pdata->flags & EE_READONLY))
device_remove_file(&spi->dev, &dev_attr_erase);
-
- return 0;
}
static struct spi_driver eeprom_93xx46_driver = {
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 4ccbf43e6bfa..aa1682b94a23 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1288,7 +1288,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
}
if (copy_to_user(argp, &bp, sizeof(bp))) {
- dma_buf_put(buf->dmabuf);
+ /*
+ * The usercopy failed, but we can't do much about it, as
+ * dma_buf_fd() already called fd_install() and made the
+ * file descriptor accessible for the current process. It
+ * might already be closed and dmabuf no longer valid when
+ * we reach this point. Therefore "leak" the fd and rely on
+ * the process exit path to do any required cleanup.
+ */
return -EFAULT;
}
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 1c763796cf1f..caa3de37698b 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
* If both powerkey down and up IRQs are received,
* handle them at the right order
*/
- generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
- generic_handle_irq(priv->irqs[POWERKEY_UP]);
+ generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+ generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
}
@@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
continue;
for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
- generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+ generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
}
}
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
index 98828030b5a4..bac4df2e5231 100644
--- a/drivers/misc/lattice-ecp3-config.c
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -211,13 +211,11 @@ static int lattice_ecp3_probe(struct spi_device *spi)
return 0;
}
-static int lattice_ecp3_remove(struct spi_device *spi)
+static void lattice_ecp3_remove(struct spi_device *spi)
{
struct fpga_data *data = spi_get_drvdata(spi);
wait_for_completion(&data->fw_loaded);
-
- return 0;
}
static const struct spi_device_id lattice_ecp3_id[] = {
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
index 9e40dfb60742..203a108b8883 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -96,15 +96,13 @@ static int lis302dl_spi_probe(struct spi_device *spi)
return lis3lv02d_init_device(&lis3_dev);
}
-static int lis302dl_spi_remove(struct spi_device *spi)
+static void lis302dl_spi_remove(struct spi_device *spi)
{
struct lis3lv02d *lis3 = spi_get_drvdata(spi);
lis3lv02d_joystick_disable(lis3);
lis3lv02d_poweroff(lis3);
lis3lv02d_remove_fs(&lis3_dev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 4e61b28a002f..4e67c1403cc9 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1682,31 +1682,31 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req)
struct mmc_card *card = mq->card;
struct mmc_host *host = card->host;
blk_status_t error = BLK_STS_OK;
- int retries = 0;
do {
u32 status;
int err;
+ int retries = 0;
- mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
+ while (retries++ <= MMC_READ_SINGLE_RETRIES) {
+ mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
- mmc_wait_for_req(host, mrq);
+ mmc_wait_for_req(host, mrq);
- err = mmc_send_status(card, &status);
- if (err)
- goto error_exit;
-
- if (!mmc_host_is_spi(host) &&
- !mmc_ready_for_data(status)) {
- err = mmc_blk_fix_state(card, req);
+ err = mmc_send_status(card, &status);
if (err)
goto error_exit;
- }
- if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES)
- continue;
+ if (!mmc_host_is_spi(host) &&
+ !mmc_ready_for_data(status)) {
+ err = mmc_blk_fix_state(card, req);
+ if (err)
+ goto error_exit;
+ }
- retries = 0;
+ if (!mrq->cmd->error)
+ break;
+ }
if (mrq->cmd->error ||
mrq->data->error ||
@@ -1908,7 +1908,7 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
cb_data.card = card;
cb_data.status = 0;
- err = __mmc_poll_for_busy(card->host, MMC_BLK_TIMEOUT_MS,
+ err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS,
&mmc_blk_busy_cb, &cb_data);
/*
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bbbbcaf70a59..8421519c2a98 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1962,7 +1962,7 @@ static int mmc_sleep(struct mmc_host *host)
goto out_release;
}
- err = __mmc_poll_for_busy(host, timeout_ms, &mmc_sleep_busy_cb, host);
+ err = __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_sleep_busy_cb, host);
out_release:
mmc_retune_release(host);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index d63d1c735335..180d7e9d3400 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -21,6 +21,8 @@
#define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */
#define MMC_SANITIZE_TIMEOUT_MS (240 * 1000) /* 240s */
+#define MMC_OP_COND_PERIOD_US (1 * 1000) /* 1ms */
+#define MMC_OP_COND_TIMEOUT_MS 1000 /* 1s */
static const u8 tuning_blk_pattern_4bit[] = {
0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
@@ -232,7 +234,9 @@ int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
cmd.arg = mmc_host_is_spi(host) ? 0 : ocr;
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R3 | MMC_CMD_BCR;
- err = __mmc_poll_for_busy(host, 1000, &__mmc_send_op_cond_cb, &cb_data);
+ err = __mmc_poll_for_busy(host, MMC_OP_COND_PERIOD_US,
+ MMC_OP_COND_TIMEOUT_MS,
+ &__mmc_send_op_cond_cb, &cb_data);
if (err)
return err;
@@ -495,13 +499,14 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
return 0;
}
-int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms,
+int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us,
+ unsigned int timeout_ms,
int (*busy_cb)(void *cb_data, bool *busy),
void *cb_data)
{
int err;
unsigned long timeout;
- unsigned int udelay = 32, udelay_max = 32768;
+ unsigned int udelay = period_us ? period_us : 32, udelay_max = 32768;
bool expired = false;
bool busy = false;
@@ -546,7 +551,7 @@ int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
cb_data.retry_crc_err = retry_crc_err;
cb_data.busy_cmd = busy_cmd;
- return __mmc_poll_for_busy(host, timeout_ms, &mmc_busy_cb, &cb_data);
+ return __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_busy_cb, &cb_data);
}
EXPORT_SYMBOL_GPL(mmc_poll_for_busy);
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 9c813b851d0b..09ffbc00908b 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -41,7 +41,8 @@ int mmc_can_ext_csd(struct mmc_card *card);
int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
unsigned int timeout_ms);
-int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms,
+int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us,
+ unsigned int timeout_ms,
int (*busy_cb)(void *cb_data, bool *busy),
void *cb_data);
int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 45f578793980..bfbfed30dc4d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -67,7 +67,7 @@ static const unsigned int sd_au_size[] = {
__res & __mask; \
})
-#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000
+#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 1000
#define SD_WRITE_EXTR_SINGLE_TIMEOUT_MS 1000
struct sd_busy_data {
@@ -1664,9 +1664,15 @@ static int sd_poweroff_notify(struct mmc_card *card)
goto out;
}
+ /* Find out when the command is completed. */
+ err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false,
+ MMC_BUSY_EXTR_SINGLE);
+ if (err)
+ goto out;
+
cb_data.card = card;
cb_data.reg_buf = reg_buf;
- err = __mmc_poll_for_busy(card->host, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
+ err = __mmc_poll_for_busy(card->host, 0, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
&sd_busy_poweroff_notify_cb, &cb_data);
out:
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 8f36536cb1b6..58ab9d90bc8b 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -173,6 +173,8 @@ struct meson_host {
int irq;
bool vqmmc_enabled;
+ bool needs_pre_post_req;
+
};
#define CMD_CFG_LENGTH_MASK GENMASK(8, 0)
@@ -663,6 +665,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc,
struct meson_host *host = mmc_priv(mmc);
host->cmd = NULL;
+ if (host->needs_pre_post_req)
+ meson_mmc_post_req(mmc, mrq, 0);
mmc_request_done(host->mmc, mrq);
}
@@ -880,7 +884,7 @@ static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data
static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
struct meson_host *host = mmc_priv(mmc);
- bool needs_pre_post_req = mrq->data &&
+ host->needs_pre_post_req = mrq->data &&
!(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE);
/*
@@ -896,22 +900,19 @@ static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
}
}
- if (needs_pre_post_req) {
+ if (host->needs_pre_post_req) {
meson_mmc_get_transfer_mode(mmc, mrq);
if (!meson_mmc_desc_chain_mode(mrq->data))
- needs_pre_post_req = false;
+ host->needs_pre_post_req = false;
}
- if (needs_pre_post_req)
+ if (host->needs_pre_post_req)
meson_mmc_pre_req(mmc, mrq);
/* Stop execution */
writel(0, host->regs + SD_EMMC_START);
meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd);
-
- if (needs_pre_post_req)
- meson_mmc_post_req(mmc, mrq, 0);
}
static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a576181e9db0..106dd204b1a7 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1489,7 +1489,7 @@ nomem:
}
-static int mmc_spi_remove(struct spi_device *spi)
+static void mmc_spi_remove(struct spi_device *spi)
{
struct mmc_host *mmc = dev_get_drvdata(&spi->dev);
struct mmc_spi_host *host = mmc_priv(mmc);
@@ -1507,7 +1507,6 @@ static int mmc_spi_remove(struct spi_device *spi)
spi->max_speed_hz = mmc->f_max;
mmc_spi_put_pdata(spi);
mmc_free_host(mmc);
- return 0;
}
static const struct spi_device_id mmc_spi_dev_ids[] = {
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 16d1c7a43d33..b6eb75f4bbfc 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -705,12 +705,12 @@ static int moxart_remove(struct platform_device *pdev)
if (!IS_ERR_OR_NULL(host->dma_chan_rx))
dma_release_channel(host->dma_chan_rx);
mmc_remove_host(mmc);
- mmc_free_host(mmc);
writel(0, host->base + REG_INTERRUPT_MASK);
writel(0, host->base + REG_POWER_CONTROL);
writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
host->base + REG_CLOCK_CONTROL);
+ mmc_free_host(mmc);
return 0;
}
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index a593b1fbd69e..0f3658b36513 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -524,12 +524,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask)
static int esdhc_of_enable_dma(struct sdhci_host *host)
{
+ int ret;
u32 value;
struct device *dev = mmc_dev(host->mmc);
if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") ||
- of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc"))
- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+ of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) {
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+ if (ret)
+ return ret;
+ }
value = sdhci_readl(host, ESDHC_DMA_SYSCTL);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index bcc595c70a9f..104dcd702870 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -405,6 +405,9 @@ static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host,
struct dma_slave_config cfg = { 0, };
res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+
cfg.direction = direction;
if (direction == DMA_DEV_TO_MEM) {
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index a8b31bddf14b..008df9d8898d 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -209,13 +209,11 @@ static int mchp23k256_probe(struct spi_device *spi)
return 0;
}
-static int mchp23k256_remove(struct spi_device *spi)
+static void mchp23k256_remove(struct spi_device *spi)
{
struct mchp23k256_flash *flash = spi_get_drvdata(spi);
WARN_ON(mtd_device_unregister(&flash->mtd));
-
- return 0;
}
static const struct of_device_id mchp23k256_of_table[] = {
diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c
index 231a10790196..a3fd426df74b 100644
--- a/drivers/mtd/devices/mchp48l640.c
+++ b/drivers/mtd/devices/mchp48l640.c
@@ -341,13 +341,11 @@ static int mchp48l640_probe(struct spi_device *spi)
return 0;
}
-static int mchp48l640_remove(struct spi_device *spi)
+static void mchp48l640_remove(struct spi_device *spi)
{
struct mchp48l640_flash *flash = spi_get_drvdata(spi);
WARN_ON(mtd_device_unregister(&flash->mtd));
-
- return 0;
}
static const struct of_device_id mchp48l640_of_table[] = {
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 734878abaa23..134e27328597 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -916,7 +916,7 @@ static int dataflash_probe(struct spi_device *spi)
return status;
}
-static int dataflash_remove(struct spi_device *spi)
+static void dataflash_remove(struct spi_device *spi)
{
struct dataflash *flash = spi_get_drvdata(spi);
@@ -925,8 +925,6 @@ static int dataflash_remove(struct spi_device *spi)
WARN_ON(mtd_device_unregister(&flash->mtd));
kfree(flash);
-
- return 0;
}
static struct spi_driver dataflash_driver = {
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 6ed6c51fac69..d503821a3e60 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -264,16 +264,20 @@ static int phram_setup(const char *val)
}
}
- if (erasesize)
- div_u64_rem(len, (uint32_t)erasesize, &rem);
-
if (len == 0 || erasesize == 0 || erasesize > len
- || erasesize > UINT_MAX || rem) {
+ || erasesize > UINT_MAX) {
parse_err("illegal erasesize or len\n");
ret = -EINVAL;
goto error;
}
+ div_u64_rem(len, (uint32_t)erasesize, &rem);
+ if (rem) {
+ parse_err("len is not multiple of erasesize\n");
+ ret = -EINVAL;
+ goto error;
+ }
+
ret = register_device(name, start, len, (uint32_t)erasesize);
if (ret)
goto error;
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index 7f124c1bfa40..8813994ce9f4 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -398,13 +398,11 @@ static int sst25l_probe(struct spi_device *spi)
return 0;
}
-static int sst25l_remove(struct spi_device *spi)
+static void sst25l_remove(struct spi_device *spi)
{
struct sst25l_flash *flash = spi_get_drvdata(spi);
WARN_ON(mtd_device_unregister(&flash->mtd));
-
- return 0;
}
static struct spi_driver sst25l_driver = {
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 70f492dce158..eef87b28d6c8 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -546,6 +546,7 @@ static int mtd_nvmem_add(struct mtd_info *mtd)
config.stride = 1;
config.read_only = true;
config.root_only = true;
+ config.ignore_wp = true;
config.no_of_node = !of_device_is_compatible(node, "nvmem-cells");
config.priv = mtd;
@@ -833,6 +834,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
config.owner = THIS_MODULE;
config.type = NVMEM_TYPE_OTP;
config.root_only = true;
+ config.ignore_wp = true;
config.reg_read = reg_read;
config.size = size;
config.of_node = np;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index e86b04bc1d6b..dc7f1532a37f 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -19,7 +19,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/swap.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index 20408b7db540..820e5dc3bc9b 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -42,7 +42,7 @@ config MTD_NAND_OMAP2
tristate "OMAP2, OMAP3, OMAP4 and Keystone NAND controller"
depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
depends on HAS_IOMEM
- select OMAP_GPMC if ARCH_K3
+ depends on OMAP_GPMC
help
Support for NAND flash on Texas Instruments OMAP2, OMAP3, OMAP4
and Keystone platforms.
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index f75929783b94..aee78f5f4f15 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2106,7 +2106,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
mtd->oobsize / trans,
host->hwcfg.sector_size_1k);
- if (!ret) {
+ if (ret != -EBADMSG) {
*err_addr = brcmnand_get_uncorrecc_addr(ctrl);
if (*err_addr)
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 1b64c5a5140d..ded4df473928 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -2285,7 +2285,7 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip,
this->hw.must_apply_timings = false;
ret = gpmi_nfc_apply_timings(this);
if (ret)
- return ret;
+ goto out_pm;
}
dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs);
@@ -2414,6 +2414,7 @@ unmap:
this->bch = false;
+out_pm:
pm_runtime_mark_last_busy(this->dev);
pm_runtime_put_autosuspend(this->dev);
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
index efe0ffe4f1ab..9054559e52dd 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
@@ -68,9 +68,14 @@ static struct ingenic_ecc *ingenic_ecc_get(struct device_node *np)
struct ingenic_ecc *ecc;
pdev = of_find_device_by_node(np);
- if (!pdev || !platform_get_drvdata(pdev))
+ if (!pdev)
return ERR_PTR(-EPROBE_DEFER);
+ if (!platform_get_drvdata(pdev)) {
+ put_device(&pdev->dev);
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+
ecc = platform_get_drvdata(pdev);
clk_prepare_enable(ecc->clk);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 7c6efa3b6255..1a77542c6d67 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2,7 +2,6 @@
/*
* Copyright (c) 2016, The Linux Foundation. All rights reserved.
*/
-
#include <linux/clk.h>
#include <linux/slab.h>
#include <linux/bitops.h>
@@ -3073,10 +3072,6 @@ static int qcom_nandc_probe(struct platform_device *pdev)
if (dma_mapping_error(dev, nandc->base_dma))
return -ENXIO;
- ret = qcom_nandc_alloc(nandc);
- if (ret)
- goto err_nandc_alloc;
-
ret = clk_prepare_enable(nandc->core_clk);
if (ret)
goto err_core_clk;
@@ -3085,6 +3080,10 @@ static int qcom_nandc_probe(struct platform_device *pdev)
if (ret)
goto err_aon_clk;
+ ret = qcom_nandc_alloc(nandc);
+ if (ret)
+ goto err_nandc_alloc;
+
ret = qcom_nandc_setup(nandc);
if (ret)
goto err_setup;
@@ -3096,15 +3095,14 @@ static int qcom_nandc_probe(struct platform_device *pdev)
return 0;
err_setup:
+ qcom_nandc_unalloc(nandc);
+err_nandc_alloc:
clk_disable_unprepare(nandc->aon_clk);
err_aon_clk:
clk_disable_unprepare(nandc->core_clk);
err_core_clk:
- qcom_nandc_unalloc(nandc);
-err_nandc_alloc:
dma_unmap_resource(dev, res->start, resource_size(res),
DMA_BIDIRECTIONAL, 0);
-
return ret;
}
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 5612ee628425..52ce5162538a 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -6,7 +6,6 @@
* Based on Sharp's NAND driver sharp_sl.c
*/
-#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/delay.h>
diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c
index 06a818cd2433..4311b89d8df0 100644
--- a/drivers/mtd/parsers/qcomsmempart.c
+++ b/drivers/mtd/parsers/qcomsmempart.c
@@ -58,11 +58,11 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
const struct mtd_partition **pparts,
struct mtd_part_parser_data *data)
{
+ size_t len = SMEM_FLASH_PTABLE_HDR_LEN;
+ int ret, i, j, tmpparts, numparts = 0;
struct smem_flash_pentry *pentry;
struct smem_flash_ptable *ptable;
- size_t len = SMEM_FLASH_PTABLE_HDR_LEN;
struct mtd_partition *parts;
- int ret, i, numparts;
char *name, *c;
if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_4K_SECTORS)
@@ -75,7 +75,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
pr_debug("Parsing partition table info from SMEM\n");
ptable = qcom_smem_get(SMEM_APPS, SMEM_AARM_PARTITION_TABLE, &len);
if (IS_ERR(ptable)) {
- pr_err("Error reading partition table header\n");
+ if (PTR_ERR(ptable) != -EPROBE_DEFER)
+ pr_err("Error reading partition table header\n");
return PTR_ERR(ptable);
}
@@ -87,8 +88,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
}
/* Ensure that # of partitions is less than the max we have allocated */
- numparts = le32_to_cpu(ptable->numparts);
- if (numparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) {
+ tmpparts = le32_to_cpu(ptable->numparts);
+ if (tmpparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) {
pr_err("Partition numbers exceed the max limit\n");
return -EINVAL;
}
@@ -116,11 +117,17 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
return PTR_ERR(ptable);
}
+ for (i = 0; i < tmpparts; i++) {
+ pentry = &ptable->pentry[i];
+ if (pentry->name[0] != '\0')
+ numparts++;
+ }
+
parts = kcalloc(numparts, sizeof(*parts), GFP_KERNEL);
if (!parts)
return -ENOMEM;
- for (i = 0; i < numparts; i++) {
+ for (i = 0, j = 0; i < tmpparts; i++) {
pentry = &ptable->pentry[i];
if (pentry->name[0] == '\0')
continue;
@@ -135,24 +142,25 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
for (c = name; *c != '\0'; c++)
*c = tolower(*c);
- parts[i].name = name;
- parts[i].offset = le32_to_cpu(pentry->offset) * mtd->erasesize;
- parts[i].mask_flags = pentry->attr;
- parts[i].size = le32_to_cpu(pentry->length) * mtd->erasesize;
+ parts[j].name = name;
+ parts[j].offset = le32_to_cpu(pentry->offset) * mtd->erasesize;
+ parts[j].mask_flags = pentry->attr;
+ parts[j].size = le32_to_cpu(pentry->length) * mtd->erasesize;
pr_debug("%d: %s offs=0x%08x size=0x%08x attr:0x%08x\n",
i, pentry->name, le32_to_cpu(pentry->offset),
le32_to_cpu(pentry->length), pentry->attr);
+ j++;
}
pr_debug("SMEM partition table found: ver: %d len: %d\n",
- le32_to_cpu(ptable->version), numparts);
+ le32_to_cpu(ptable->version), tmpparts);
*pparts = parts;
return numparts;
out_free_parts:
- while (--i >= 0)
- kfree(parts[i].name);
+ while (--j >= 0)
+ kfree(parts[j].name);
kfree(parts);
*pparts = NULL;
@@ -166,6 +174,8 @@ static void parse_qcomsmem_cleanup(const struct mtd_partition *pparts,
for (i = 0; i < nr_parts; i++)
kfree(pparts[i].name);
+
+ kfree(pparts);
}
static const struct of_device_id qcomsmem_of_match_table[] = {
diff --git a/drivers/mtd/spi-nor/controllers/Kconfig b/drivers/mtd/spi-nor/controllers/Kconfig
index 5c0e0ec2e6d1..50f4f3484d42 100644
--- a/drivers/mtd/spi-nor/controllers/Kconfig
+++ b/drivers/mtd/spi-nor/controllers/Kconfig
@@ -26,39 +26,3 @@ config SPI_NXP_SPIFI
SPIFI is a specialized controller for connecting serial SPI
Flash. Enable this option if you have a device with a SPIFI
controller and want to access the Flash as a mtd device.
-
-config SPI_INTEL_SPI
- tristate
-
-config SPI_INTEL_SPI_PCI
- tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)"
- depends on X86 && PCI
- select SPI_INTEL_SPI
- help
- This enables PCI support for the Intel PCH/PCU SPI controller in
- master mode. This controller is present in modern Intel hardware
- and is used to hold BIOS and other persistent settings. Using
- this driver it is possible to upgrade BIOS directly from Linux.
-
- Say N here unless you know what you are doing. Overwriting the
- SPI flash may render the system unbootable.
-
- To compile this driver as a module, choose M here: the module
- will be called intel-spi-pci.
-
-config SPI_INTEL_SPI_PLATFORM
- tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)"
- depends on X86
- select SPI_INTEL_SPI
- help
- This enables platform support for the Intel PCH/PCU SPI
- controller in master mode. This controller is present in modern
- Intel hardware and is used to hold BIOS and other persistent
- settings. Using this driver it is possible to upgrade BIOS
- directly from Linux.
-
- Say N here unless you know what you are doing. Overwriting the
- SPI flash may render the system unbootable.
-
- To compile this driver as a module, choose M here: the module
- will be called intel-spi-platform.
diff --git a/drivers/mtd/spi-nor/controllers/Makefile b/drivers/mtd/spi-nor/controllers/Makefile
index e7abba491d98..6e2a1dc68466 100644
--- a/drivers/mtd/spi-nor/controllers/Makefile
+++ b/drivers/mtd/spi-nor/controllers/Makefile
@@ -2,6 +2,3 @@
obj-$(CONFIG_SPI_ASPEED_SMC) += aspeed-smc.o
obj-$(CONFIG_SPI_HISI_SFC) += hisi-sfc.o
obj-$(CONFIG_SPI_NXP_SPIFI) += nxp-spifi.o
-obj-$(CONFIG_SPI_INTEL_SPI) += intel-spi.o
-obj-$(CONFIG_SPI_INTEL_SPI_PCI) += intel-spi-pci.o
-obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM) += intel-spi-platform.o
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.h b/drivers/mtd/spi-nor/controllers/intel-spi.h
deleted file mode 100644
index f2871179fd34..000000000000
--- a/drivers/mtd/spi-nor/controllers/intel-spi.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel PCH/PCU SPI flash driver.
- *
- * Copyright (C) 2016, Intel Corporation
- * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- */
-
-#ifndef INTEL_SPI_H
-#define INTEL_SPI_H
-
-#include <linux/platform_data/x86/intel-spi.h>
-
-struct intel_spi;
-struct resource;
-
-struct intel_spi *intel_spi_probe(struct device *dev,
- struct resource *mem, const struct intel_spi_boardinfo *info);
-int intel_spi_remove(struct intel_spi *ispi);
-
-#endif /* INTEL_SPI_H */
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 6382e1937cca..c580acb8b1d3 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -138,6 +138,9 @@ static int com20020pci_probe(struct pci_dev *pdev,
return -ENOMEM;
ci = (struct com20020_pci_card_info *)id->driver_data;
+ if (!ci)
+ return -EINVAL;
+
priv->ci = ci;
mm = &ci->misc_map;
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 6006c2e8fa2b..a86b1f71762e 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -225,7 +225,7 @@ static inline int __check_agg_selection_timer(struct port *port)
if (bond == NULL)
return 0;
- return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0;
+ return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0;
}
/**
@@ -1021,8 +1021,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
if (port->aggregator &&
port->aggregator->is_active &&
!__port_is_enabled(port)) {
-
__enable_port(port);
+ *update_slave_arr = true;
}
}
break;
@@ -1779,6 +1779,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
port = port->next_port_in_aggregator) {
__enable_port(port);
}
+ *update_slave_arr = true;
}
}
@@ -1994,7 +1995,7 @@ static void ad_marker_response_received(struct bond_marker *marker,
*/
void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
{
- BOND_AD_INFO(bond).agg_select_timer = timeout;
+ atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout);
}
/**
@@ -2278,6 +2279,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond)
}
/**
+ * bond_agg_timer_advance - advance agg_select_timer
+ * @bond: bonding structure
+ *
+ * Return true when agg_select_timer reaches 0.
+ */
+static bool bond_agg_timer_advance(struct bonding *bond)
+{
+ int val, nval;
+
+ while (1) {
+ val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer);
+ if (!val)
+ return false;
+ nval = val - 1;
+ if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer,
+ val, nval) == val)
+ break;
+ }
+ return nval == 0;
+}
+
+/**
* bond_3ad_state_machine_handler - handle state machines timeout
* @work: work context to fetch bonding struct to work on from
*
@@ -2312,9 +2335,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
if (!bond_has_slaves(bond))
goto re_arm;
- /* check if agg_select_timer timer after initialize is timed out */
- if (BOND_AD_INFO(bond).agg_select_timer &&
- !(--BOND_AD_INFO(bond).agg_select_timer)) {
+ if (bond_agg_timer_advance(bond)) {
slave = bond_first_slave_rcu(bond);
port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ec498ce70f35..aebeb46e6fa6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2379,10 +2379,9 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_select_active_slave(bond);
}
- if (!bond_has_slaves(bond)) {
- bond_set_carrier(bond);
+ bond_set_carrier(bond);
+ if (!bond_has_slaves(bond))
eth_hw_addr_random(bond_dev);
- }
unblock_netpoll_tx();
synchronize_rcu();
@@ -4133,9 +4132,7 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm
fallthrough;
case SIOCGHWTSTAMP:
- rcu_read_lock();
real_dev = bond_option_active_slave_get_rcu(bond);
- rcu_read_unlock();
if (!real_dev)
return -EOPNOTSUPP;
@@ -5382,9 +5379,7 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
struct net_device *real_dev;
struct phy_device *phydev;
- rcu_read_lock();
real_dev = bond_option_active_slave_get_rcu(bond);
- rcu_read_unlock();
if (real_dev) {
ops = real_dev->ethtool_ops;
phydev = real_dev->phydev;
diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index 0bff1884d5cc..74d7fcbfd065 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -296,6 +296,7 @@ static_assert(sizeof(struct flexcan_regs) == 0x4 * 18 + 0xfb8);
static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 |
+ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
};
diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h
index fccdff8b1f0f..23fc09a7e10f 100644
--- a/drivers/net/can/flexcan/flexcan.h
+++ b/drivers/net/can/flexcan/flexcan.h
@@ -21,7 +21,7 @@
* Below is some version info we got:
* SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB
* Filter? connected? Passive detection ption in MB Supported?
- * MCF5441X FlexCAN2 ? no yes no no yes no 16
+ * MCF5441X FlexCAN2 ? no yes no no no no 16
* MX25 FlexCAN2 03.00.00.00 no no no no no no 64
* MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64
* MX35 FlexCAN2 03.00.00.00 no no no no no no 64
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 5b47cd867783..1a4b56f6fa8c 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
}
@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev,
u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
offset;
+ if (val_count == 0)
+ return 0;
+
return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
}
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index 04687b15b250..41645a24384c 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -388,7 +388,7 @@ out_power:
return ret;
}
-static int tcan4x5x_can_remove(struct spi_device *spi)
+static void tcan4x5x_can_remove(struct spi_device *spi)
{
struct tcan4x5x_priv *priv = spi_get_drvdata(spi);
@@ -397,8 +397,6 @@ static int tcan4x5x_can_remove(struct spi_device *spi)
tcan4x5x_power_enable(priv->power, 0);
m_can_class_free_dev(priv->cdev.net);
-
- return 0;
}
static const struct of_device_id tcan4x5x_of_match[] = {
diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c
index ca80dbaf7a3f..26e212b8ca7a 100644
--- a/drivers/net/can/m_can/tcan4x5x-regmap.c
+++ b/drivers/net/can/m_can/tcan4x5x-regmap.c
@@ -12,7 +12,7 @@
#define TCAN4X5X_SPI_INSTRUCTION_WRITE (0x61 << 24)
#define TCAN4X5X_SPI_INSTRUCTION_READ (0x41 << 24)
-#define TCAN4X5X_MAX_REGISTER 0x8ffc
+#define TCAN4X5X_MAX_REGISTER 0x87fc
static int tcan4x5x_regmap_gather_write(void *context,
const void *reg, size_t reg_len,
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index b7dc1c32875f..acd74725831f 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1715,15 +1715,15 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll,
RCANFD_NAPI_WEIGHT);
+ spin_lock_init(&priv->tx_lock);
+ devm_can_led_init(ndev);
+ gpriv->ch[priv->channel] = priv;
err = register_candev(ndev);
if (err) {
dev_err(&pdev->dev,
"register_candev() failed, error %d\n", err);
goto fail_candev;
}
- spin_lock_init(&priv->tx_lock);
- devm_can_led_init(ndev);
- gpriv->ch[priv->channel] = priv;
dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel);
return 0;
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index cfcc14fe3e42..664b8f14d7b0 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -948,7 +948,7 @@ static int hi3110_can_probe(struct spi_device *spi)
return dev_err_probe(dev, ret, "Probe failed\n");
}
-static int hi3110_can_remove(struct spi_device *spi)
+static void hi3110_can_remove(struct spi_device *spi)
{
struct hi3110_priv *priv = spi_get_drvdata(spi);
struct net_device *net = priv->net;
@@ -960,8 +960,6 @@ static int hi3110_can_remove(struct spi_device *spi)
clk_disable_unprepare(priv->clk);
free_candev(net);
-
- return 0;
}
static int __maybe_unused hi3110_can_suspend(struct device *dev)
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 025e07cb7439..d23edaf22420 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1427,7 +1427,7 @@ out_free:
return ret;
}
-static int mcp251x_can_remove(struct spi_device *spi)
+static void mcp251x_can_remove(struct spi_device *spi)
{
struct mcp251x_priv *priv = spi_get_drvdata(spi);
struct net_device *net = priv->net;
@@ -1442,8 +1442,6 @@ static int mcp251x_can_remove(struct spi_device *spi)
clk_disable_unprepare(priv->clk);
free_candev(net);
-
- return 0;
}
static int __maybe_unused mcp251x_can_suspend(struct device *dev)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index b5986df6eca0..65c9b31666a6 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1966,7 +1966,7 @@ static int mcp251xfd_probe(struct spi_device *spi)
return err;
}
-static int mcp251xfd_remove(struct spi_device *spi)
+static void mcp251xfd_remove(struct spi_device *spi)
{
struct mcp251xfd_priv *priv = spi_get_drvdata(spi);
struct net_device *ndev = priv->ndev;
@@ -1975,8 +1975,6 @@ static int mcp251xfd_remove(struct spi_device *spi)
mcp251xfd_unregister(priv);
spi->max_speed_hz = priv->spi_max_speed_hz_orig;
free_candev(ndev);
-
- return 0;
}
static int __maybe_unused mcp251xfd_runtime_suspend(struct device *device)
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 2ed2370a3166..2d73ebbf3836 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -1787,7 +1787,7 @@ static int es58x_open(struct net_device *netdev)
struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
int ret;
- if (atomic_inc_return(&es58x_dev->opened_channel_cnt) == 1) {
+ if (!es58x_dev->opened_channel_cnt) {
ret = es58x_alloc_rx_urbs(es58x_dev);
if (ret)
return ret;
@@ -1805,12 +1805,13 @@ static int es58x_open(struct net_device *netdev)
if (ret)
goto free_urbs;
+ es58x_dev->opened_channel_cnt++;
netif_start_queue(netdev);
return ret;
free_urbs:
- if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
+ if (!es58x_dev->opened_channel_cnt)
es58x_free_urbs(es58x_dev);
netdev_err(netdev, "%s: Could not open the network device: %pe\n",
__func__, ERR_PTR(ret));
@@ -1845,7 +1846,8 @@ static int es58x_stop(struct net_device *netdev)
es58x_flush_pending_tx_msg(netdev);
- if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
+ es58x_dev->opened_channel_cnt--;
+ if (!es58x_dev->opened_channel_cnt)
es58x_free_urbs(es58x_dev);
return 0;
@@ -2215,7 +2217,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf,
init_usb_anchor(&es58x_dev->tx_urbs_idle);
init_usb_anchor(&es58x_dev->tx_urbs_busy);
atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0);
- atomic_set(&es58x_dev->opened_channel_cnt, 0);
usb_set_intfdata(intf, es58x_dev);
es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev,
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
index 826a15871573..e5033cb5e695 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -373,8 +373,6 @@ struct es58x_operators {
* queue wake/stop logic should prevent this URB from getting
* empty. Please refer to es58x_get_tx_urb() for more details.
* @tx_urbs_idle_cnt: number of urbs in @tx_urbs_idle.
- * @opened_channel_cnt: number of channels opened (c.f. es58x_open()
- * and es58x_stop()).
* @ktime_req_ns: kernel timestamp when es58x_set_realtime_diff_ns()
* was called.
* @realtime_diff_ns: difference in nanoseconds between the clocks of
@@ -384,6 +382,10 @@ struct es58x_operators {
* in RX branches.
* @rx_max_packet_size: Maximum length of bulk-in URB.
* @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev).
+ * @opened_channel_cnt: number of channels opened. Free of race
+ * conditions because its two users (net_device_ops:ndo_open()
+ * and net_device_ops:ndo_close()) guarantee that the network
+ * stack big kernel lock (a.k.a. rtnl_mutex) is being hold.
* @rx_cmd_buf_len: Length of @rx_cmd_buf.
* @rx_cmd_buf: The device might split the URB commands in an
* arbitrary amount of pieces. This buffer is used to concatenate
@@ -406,7 +408,6 @@ struct es58x_device {
struct usb_anchor tx_urbs_busy;
struct usb_anchor tx_urbs_idle;
atomic_t tx_urbs_idle_cnt;
- atomic_t opened_channel_cnt;
u64 ktime_req_ns;
s64 realtime_diff_ns;
@@ -415,6 +416,7 @@ struct es58x_device {
u16 rx_max_packet_size;
u8 num_can_ch;
+ u8 opened_channel_cnt;
u16 rx_cmd_buf_len;
union es58x_urb_cmd rx_cmd_buf;
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index b487e3fe770a..d35749fad1ef 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -191,8 +191,8 @@ struct gs_can {
struct gs_usb {
struct gs_can *canch[GS_MAX_INTF];
struct usb_anchor rx_submitted;
- atomic_t active_channels;
struct usb_device *udev;
+ u8 active_channels;
};
/* 'allocate' a tx context.
@@ -589,7 +589,7 @@ static int gs_can_open(struct net_device *netdev)
if (rc)
return rc;
- if (atomic_add_return(1, &parent->active_channels) == 1) {
+ if (!parent->active_channels) {
for (i = 0; i < GS_MAX_RX_URBS; i++) {
struct urb *urb;
u8 *buf;
@@ -690,6 +690,7 @@ static int gs_can_open(struct net_device *netdev)
dev->can.state = CAN_STATE_ERROR_ACTIVE;
+ parent->active_channels++;
if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
netif_start_queue(netdev);
@@ -705,7 +706,8 @@ static int gs_can_close(struct net_device *netdev)
netif_stop_queue(netdev);
/* Stop polling */
- if (atomic_dec_and_test(&parent->active_channels))
+ parent->active_channels--;
+ if (!parent->active_channels)
usb_kill_anchored_urbs(&parent->rx_submitted);
/* Stop sending URBs */
@@ -984,8 +986,6 @@ static int gs_usb_probe(struct usb_interface *intf,
init_usb_anchor(&dev->rx_submitted);
- atomic_set(&dev->active_channels, 0);
-
usb_set_intfdata(intf, dev);
dev->udev = interface_to_usbdev(intf);
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 7b1457a6e327..0029d279616f 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -36,6 +36,7 @@ config NET_DSA_LANTIQ_GSWIP
config NET_DSA_MT7530
tristate "MediaTek MT753x and MT7621 Ethernet switch support"
select NET_DSA_TAG_MTK
+ select MEDIATEK_GE_PHY
help
This enables support for the MediaTek MT7530, MT7531, and MT7621
Ethernet switch chips.
@@ -81,6 +82,7 @@ config NET_DSA_REALTEK_SMI
config NET_DSA_SMSC_LAN9303
tristate
+ depends on VLAN_8021Q || VLAN_8021Q=n
select NET_DSA_TAG_LAN9303
select REGMAP
help
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 2b88f03e5252..0e54b2a0c211 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -314,7 +314,7 @@ static int b53_spi_probe(struct spi_device *spi)
return 0;
}
-static int b53_spi_remove(struct spi_device *spi)
+static void b53_spi_remove(struct spi_device *spi)
{
struct b53_device *dev = spi_get_drvdata(spi);
@@ -322,8 +322,6 @@ static int b53_spi_remove(struct spi_device *spi)
b53_switch_remove(dev);
spi_set_drvdata(spi, NULL);
-
- return 0;
}
static void b53_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 33499fcd8848..6afb5db8244c 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -621,7 +621,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
get_device(&priv->master_mii_bus->dev);
priv->master_mii_dn = dn;
- priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ priv->slave_mii_bus = mdiobus_alloc();
if (!priv->slave_mii_bus) {
of_node_put(dn);
return -ENOMEM;
@@ -681,8 +681,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
}
err = mdiobus_register(priv->slave_mii_bus);
- if (err && dn)
+ if (err && dn) {
+ mdiobus_free(priv->slave_mii_bus);
of_node_put(dn);
+ }
return err;
}
@@ -690,6 +692,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
{
mdiobus_unregister(priv->slave_mii_bus);
+ mdiobus_free(priv->slave_mii_bus);
of_node_put(priv->master_mii_dn);
}
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index d55784d19fa4..3969d89fa4db 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -10,6 +10,7 @@
#include <linux/mii.h>
#include <linux/phy.h>
#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include <linux/etherdevice.h>
#include "lan9303.h"
@@ -1083,21 +1084,27 @@ static void lan9303_adjust_link(struct dsa_switch *ds, int port,
static int lan9303_port_enable(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct lan9303 *chip = ds->priv;
- if (!dsa_is_user_port(ds, port))
+ if (!dsa_port_is_user(dp))
return 0;
+ vlan_vid_add(dp->cpu_dp->master, htons(ETH_P_8021Q), port);
+
return lan9303_enable_processing_port(chip, port);
}
static void lan9303_port_disable(struct dsa_switch *ds, int port)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct lan9303 *chip = ds->priv;
- if (!dsa_is_user_port(ds, port))
+ if (!dsa_port_is_user(dp))
return;
+ vlan_vid_del(dp->cpu_dp->master, htons(ETH_P_8021Q), port);
+
lan9303_disable_processing_port(chip, port);
lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
}
@@ -1310,7 +1317,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip,
struct device_node *np)
{
chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset",
- GPIOD_OUT_LOW);
+ GPIOD_OUT_HIGH);
if (IS_ERR(chip->reset_gpio))
return PTR_ERR(chip->reset_gpio);
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 46ed953e787e..8a7a8093a156 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -498,8 +498,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg)
static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
{
struct dsa_switch *ds = priv->ds;
+ int err;
- ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev);
+ ds->slave_mii_bus = mdiobus_alloc();
if (!ds->slave_mii_bus)
return -ENOMEM;
@@ -512,7 +513,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
ds->slave_mii_bus->parent = priv->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
- return of_mdiobus_register(ds->slave_mii_bus, mdio_np);
+ err = of_mdiobus_register(ds->slave_mii_bus, mdio_np);
+ if (err)
+ mdiobus_free(ds->slave_mii_bus);
+
+ return err;
}
static int gswip_pce_table_entry_read(struct gswip_priv *priv,
@@ -2145,8 +2150,10 @@ disable_switch:
gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
dsa_unregister_switch(priv->ds);
mdio_bus:
- if (mdio_np)
+ if (mdio_np) {
mdiobus_unregister(priv->ds->slave_mii_bus);
+ mdiobus_free(priv->ds->slave_mii_bus);
+ }
put_mdio_node:
of_node_put(mdio_np);
for (i = 0; i < priv->num_gphy_fw; i++)
@@ -2170,6 +2177,7 @@ static int gswip_remove(struct platform_device *pdev)
if (priv->ds->slave_mii_bus) {
mdiobus_unregister(priv->ds->slave_mii_bus);
of_node_put(priv->ds->slave_mii_bus->dev.of_node);
+ mdiobus_free(priv->ds->slave_mii_bus);
}
for (i = 0; i < priv->num_gphy_fw; i++)
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 866767b70d65..5f8d94aee774 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -87,7 +87,7 @@ static int ksz8795_spi_probe(struct spi_device *spi)
return 0;
}
-static int ksz8795_spi_remove(struct spi_device *spi)
+static void ksz8795_spi_remove(struct spi_device *spi)
{
struct ksz_device *dev = spi_get_drvdata(spi);
@@ -95,8 +95,6 @@ static int ksz8795_spi_remove(struct spi_device *spi)
ksz_switch_remove(dev);
spi_set_drvdata(spi, NULL);
-
- return 0;
}
static void ksz8795_spi_shutdown(struct spi_device *spi)
@@ -124,12 +122,23 @@ static const struct of_device_id ksz8795_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
+static const struct spi_device_id ksz8795_spi_ids[] = {
+ { "ksz8765" },
+ { "ksz8794" },
+ { "ksz8795" },
+ { "ksz8863" },
+ { "ksz8873" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids);
+
static struct spi_driver ksz8795_spi_driver = {
.driver = {
.name = "ksz8795-switch",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(ksz8795_dt_ids),
},
+ .id_table = ksz8795_spi_ids,
.probe = ksz8795_spi_probe,
.remove = ksz8795_spi_remove,
.shutdown = ksz8795_spi_shutdown,
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index e3cb0e6c9f6f..87ca464dad32 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -65,7 +65,7 @@ static int ksz9477_spi_probe(struct spi_device *spi)
return 0;
}
-static int ksz9477_spi_remove(struct spi_device *spi)
+static void ksz9477_spi_remove(struct spi_device *spi)
{
struct ksz_device *dev = spi_get_drvdata(spi);
@@ -73,8 +73,6 @@ static int ksz9477_spi_remove(struct spi_device *spi)
ksz_switch_remove(dev);
spi_set_drvdata(spi, NULL);
-
- return 0;
}
static void ksz9477_spi_shutdown(struct spi_device *spi)
@@ -98,12 +96,24 @@ static const struct of_device_id ksz9477_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
+static const struct spi_device_id ksz9477_spi_ids[] = {
+ { "ksz9477" },
+ { "ksz9897" },
+ { "ksz9893" },
+ { "ksz9563" },
+ { "ksz8563" },
+ { "ksz9567" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids);
+
static struct spi_driver ksz9477_spi_driver = {
.driver = {
.name = "ksz9477-switch",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(ksz9477_dt_ids),
},
+ .id_table = ksz9477_spi_ids,
.probe = ksz9477_spi_probe,
.remove = ksz9477_spi_remove,
.shutdown = ksz9477_spi_shutdown,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 55dbda04ea62..243f8ad6d06e 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -26,7 +26,7 @@ void ksz_update_port_member(struct ksz_device *dev, int port)
struct dsa_switch *ds = dev->ds;
u8 port_member = 0, cpu_port;
const struct dsa_port *dp;
- int i;
+ int i, j;
if (!dsa_is_user_port(ds, port))
return;
@@ -45,13 +45,33 @@ void ksz_update_port_member(struct ksz_device *dev, int port)
continue;
if (!dsa_port_bridge_same(dp, other_dp))
continue;
+ if (other_p->stp_state != BR_STATE_FORWARDING)
+ continue;
- if (other_p->stp_state == BR_STATE_FORWARDING &&
- p->stp_state == BR_STATE_FORWARDING) {
+ if (p->stp_state == BR_STATE_FORWARDING) {
val |= BIT(port);
port_member |= BIT(i);
}
+ /* Retain port [i]'s relationship to other ports than [port] */
+ for (j = 0; j < ds->num_ports; j++) {
+ const struct dsa_port *third_dp;
+ struct ksz_port *third_p;
+
+ if (j == i)
+ continue;
+ if (j == port)
+ continue;
+ if (!dsa_is_user_port(ds, j))
+ continue;
+ third_p = &dev->ports[j];
+ if (third_p->stp_state != BR_STATE_FORWARDING)
+ continue;
+ third_dp = dsa_to_port(ds, j);
+ if (dsa_port_bridge_same(other_dp, third_dp))
+ val |= BIT(j);
+ }
+
dev->dev_ops->cfg_port_member(dev, i, val | cpu_port);
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index b82512e5b33b..a251bc55727f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2074,7 +2074,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv)
if (priv->irq)
mt7530_setup_mdio_irq(priv);
- ret = mdiobus_register(bus);
+ ret = devm_mdiobus_register(dev, bus);
if (ret) {
dev_err(dev, "failed to register MDIO bus: %d\n", ret);
if (priv->irq)
@@ -2936,7 +2936,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port,
phylink_set_port_modes(mask);
- if (state->interface != PHY_INTERFACE_MODE_TRGMII ||
+ if (state->interface != PHY_INTERFACE_MODE_TRGMII &&
!phy_interface_mode_is_8023z(state->interface)) {
phylink_set(mask, 10baseT_Half);
phylink_set(mask, 10baseT_Full);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 58ca684d73f7..ab1676553714 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2284,6 +2284,13 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
if (!mv88e6xxx_max_vid(chip))
return -EOPNOTSUPP;
+ /* The ATU removal procedure needs the FID to be mapped in the VTU,
+ * but FDB deletion runs concurrently with VLAN deletion. Flush the DSA
+ * switchdev workqueue to ensure that all FDB entries are deleted
+ * before we remove the VLAN.
+ */
+ dsa_flush_workqueue();
+
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_port_get_pvid(chip, port, &pvid);
@@ -3399,7 +3406,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
return err;
}
- bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
+ bus = mdiobus_alloc_size(sizeof(*mdio_bus));
if (!bus)
return -ENOMEM;
@@ -3424,14 +3431,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
if (!external) {
err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
if (err)
- return err;
+ goto out;
}
err = of_mdiobus_register(bus, np);
if (err) {
dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
mv88e6xxx_g2_irq_mdio_free(chip, bus);
- return err;
+ goto out;
}
if (external)
@@ -3440,21 +3447,26 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
list_add(&mdio_bus->list, &chip->mdios);
return 0;
+
+out:
+ mdiobus_free(bus);
+ return err;
}
static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
{
- struct mv88e6xxx_mdio_bus *mdio_bus;
+ struct mv88e6xxx_mdio_bus *mdio_bus, *p;
struct mii_bus *bus;
- list_for_each_entry(mdio_bus, &chip->mdios, list) {
+ list_for_each_entry_safe(mdio_bus, p, &chip->mdios, list) {
bus = mdio_bus->bus;
if (!mdio_bus->external)
mv88e6xxx_g2_irq_mdio_free(chip, bus);
mdiobus_unregister(bus);
+ mdiobus_free(bus);
}
}
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index bf8d38239e7e..33f0ceae381d 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1061,7 +1061,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
return PTR_ERR(hw);
}
- bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+ bus = mdiobus_alloc_size(sizeof(*mdio_priv));
if (!bus)
return -ENOMEM;
@@ -1081,6 +1081,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
rc = mdiobus_register(bus);
if (rc < 0) {
dev_err(dev, "failed to register MDIO bus\n");
+ mdiobus_free(bus);
return rc;
}
@@ -1132,6 +1133,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
lynx_pcs_destroy(phylink_pcs);
}
mdiobus_unregister(felix->imdio);
+ mdiobus_free(felix->imdio);
}
static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 8c1c9da61602..f2f1608a476c 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1029,7 +1029,7 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
}
/* Needed in order to initialize the bus mutex lock */
- rc = of_mdiobus_register(bus, NULL);
+ rc = devm_of_mdiobus_register(dev, bus, NULL);
if (rc < 0) {
dev_err(dev, "failed to register MDIO bus\n");
return rc;
@@ -1083,7 +1083,8 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot)
mdio_device_free(mdio_device);
lynx_pcs_destroy(phylink_pcs);
}
- mdiobus_unregister(felix->imdio);
+
+ /* mdiobus_unregister and mdiobus_free handled by devres */
}
static const struct felix_info seville_info_vsc9953 = {
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index da0d7e68643a..c39de2a4c1fe 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -378,7 +378,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
if (!mnp)
return -ENODEV;
- ret = of_mdiobus_register(mbus, mnp);
+ ret = devm_of_mdiobus_register(dev, mbus, mnp);
of_node_put(mnp);
if (ret)
return ret;
@@ -1091,7 +1091,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev)
}
irq_domain_remove(priv->irqdomain);
- mdiobus_unregister(priv->mbus);
dsa_unregister_switch(&priv->ds);
reset_control_assert(priv->sw_reset);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b513713be610..c2a47c6693b8 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3346,18 +3346,16 @@ static int sja1105_probe(struct spi_device *spi)
return dsa_register_switch(priv->ds);
}
-static int sja1105_remove(struct spi_device *spi)
+static void sja1105_remove(struct spi_device *spi)
{
struct sja1105_private *priv = spi_get_drvdata(spi);
if (!priv)
- return 0;
+ return;
dsa_unregister_switch(priv->ds);
spi_set_drvdata(spi, NULL);
-
- return 0;
}
static void sja1105_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 645398901e05..3110895358d8 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -159,18 +159,16 @@ static int vsc73xx_spi_probe(struct spi_device *spi)
return vsc73xx_probe(&vsc_spi->vsc);
}
-static int vsc73xx_spi_remove(struct spi_device *spi)
+static void vsc73xx_spi_remove(struct spi_device *spi)
{
struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi);
if (!vsc_spi)
- return 0;
+ return;
vsc73xx_remove(&vsc_spi->vsc);
spi_set_drvdata(spi, NULL);
-
- return 0;
}
static void vsc73xx_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 481f1df3106c..8aec5d9fbfef 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2278,6 +2278,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
struct net_device *dev;
struct typhoon *tp;
int card_id = (int) ent->driver_data;
+ u8 addr[ETH_ALEN] __aligned(4);
void __iomem *ioaddr;
void *shared;
dma_addr_t shared_dma;
@@ -2409,8 +2410,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto error_out_reset;
}
- *(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1));
- *(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2));
+ *(__be16 *)&addr[0] = htons(le16_to_cpu(xp_resp[0].parm1));
+ *(__be32 *)&addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2));
+ eth_hw_addr_set(dev, addr);
if (!is_valid_ether_addr(dev->dev_addr)) {
err_msg = "Could not obtain valid ethernet address, aborting";
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index bd22a534b1c0..e7b879123bb1 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -655,6 +655,7 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
struct ei_device *ei_local;
struct net_device *dev;
struct etherh_priv *eh;
+ u8 addr[ETH_ALEN];
int ret;
ret = ecard_request_resources(ec);
@@ -724,12 +725,13 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
spin_lock_init(&ei_local->page_lock);
if (ec->cid.product == PROD_ANT_ETHERM) {
- etherm_addr(dev->dev_addr);
+ etherm_addr(addr);
ei_local->reg_offset = etherm_regoffsets;
} else {
- etherh_addr(dev->dev_addr, ec);
+ etherh_addr(addr, ec);
ei_local->reg_offset = etherh_regoffsets;
}
+ eth_hw_addr_set(dev, addr);
ei_local->name = dev->name;
ei_local->word16 = 1;
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index e320cccba61a..90cd7bdf06f5 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -405,12 +405,12 @@ static int mcf8390_init(struct net_device *dev)
static int mcf8390_probe(struct platform_device *pdev)
{
struct net_device *dev;
- struct resource *mem, *irq;
+ struct resource *mem;
resource_size_t msize;
- int ret;
+ int ret, irq;
- irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (irq == NULL) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
dev_err(&pdev->dev, "no IRQ specified?\n");
return -ENXIO;
}
@@ -433,7 +433,7 @@ static int mcf8390_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- dev->irq = irq->start;
+ dev->irq = irq;
dev->base_addr = mem->start;
ret = mcf8390_init(dev);
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 493b0cefcc2a..ec8df05e7bf6 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1032,6 +1032,7 @@ static int dec_lance_probe(struct device *bdev, const int type)
int i, ret;
unsigned long esar_base;
unsigned char *esar;
+ u8 addr[ETH_ALEN];
const char *desc;
if (dec_lance_debug && version_printed++ == 0)
@@ -1228,7 +1229,8 @@ static int dec_lance_probe(struct device *bdev, const int type)
break;
}
for (i = 0; i < 6; i++)
- dev->dev_addr[i] = esar[i * 4];
+ addr[i] = esar[i * 4];
+ eth_hw_addr_set(dev, addr);
printk("%s: %s, addr = %pM, irq = %d\n",
name, desc, dev->dev_addr, dev->irq);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 492ac383f16d..a3593290886f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
if (!channel->tx_ring)
break;
+ /* Deactivate the Tx timer */
del_timer_sync(&channel->tx_timer);
+ channel->tx_timer_active = 0;
}
}
@@ -2550,6 +2552,14 @@ read_again:
buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
len += buf2_len;
+ if (buf2_len > rdata->rx.buf.dma_len) {
+ /* Hardware inconsistency within the descriptors
+ * that has resulted in a length underflow.
+ */
+ error = 1;
+ goto skip_data;
+ }
+
if (!skb) {
skb = xgbe_create_skb(pdata, napi, rdata,
buf1_len);
@@ -2579,8 +2589,10 @@ skip_data:
if (!last || context_next)
goto read_again;
- if (!skb)
+ if (!skb || error) {
+ dev_kfree_skb(skb);
goto next_packet;
+ }
/* Be sure we don't exceed the configured MTU */
max_len = netdev->mtu + ETH_HLEN;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index efdcf484a510..2af3da4b2d05 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -425,6 +425,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev)
pci_free_irq_vectors(pdata->pcidev);
+ /* Disable all interrupts in the hardware */
+ XP_IOWRITE(pdata, XP_INT_EN, 0x0);
+
xgbe_free_pdata(pdata);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 1bc4d33a0ce5..30a573db02bb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -826,7 +826,6 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
struct aq_hw_s *aq_hw = aq_nic->aq_hw;
int hweight = 0;
int err = 0;
- int i;
if (unlikely(!aq_hw_ops->hw_filter_vlan_set))
return -EOPNOTSUPP;
@@ -837,8 +836,7 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
- for (i = 0; i < BITS_TO_LONGS(VLAN_N_VID); i++)
- hweight += hweight_long(aq_nic->active_vlans[i]);
+ hweight = bitmap_weight(aq_nic->active_vlans, VLAN_N_VID);
err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
if (err)
@@ -871,7 +869,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
struct aq_hw_s *aq_hw = aq_nic->aq_hw;
int err = 0;
- memset(aq_nic->active_vlans, 0, sizeof(aq_nic->active_vlans));
+ bitmap_zero(aq_nic->active_vlans, VLAN_N_VID);
aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans,
aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c
index 9acf589b1178..87f40c2ba904 100644
--- a/drivers/net/ethernet/arc/emac_mdio.c
+++ b/drivers/net/ethernet/arc/emac_mdio.c
@@ -132,6 +132,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
{
struct arc_emac_mdio_bus_data *data = &priv->bus_data;
struct device_node *np = priv->dev->of_node;
+ const char *name = "Synopsys MII Bus";
struct mii_bus *bus;
int error;
@@ -142,7 +143,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
priv->bus = bus;
bus->priv = priv;
bus->parent = priv->dev;
- bus->name = "Synopsys MII Bus";
+ bus->name = name;
bus->read = &arc_mdio_read;
bus->write = &arc_mdio_write;
bus->reset = &arc_mdio_reset;
@@ -167,7 +168,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
if (error) {
mdiobus_free(bus);
return dev_err_probe(priv->dev, error,
- "cannot register MDIO bus %s\n", bus->name);
+ "cannot register MDIO bus %s\n", name);
}
return 0;
diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c
index e7a9f9863258..bf70481bb1ca 100644
--- a/drivers/net/ethernet/asix/ax88796c_main.c
+++ b/drivers/net/ethernet/asix/ax88796c_main.c
@@ -1102,7 +1102,7 @@ err:
return ret;
}
-static int ax88796c_remove(struct spi_device *spi)
+static void ax88796c_remove(struct spi_device *spi)
{
struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev);
struct net_device *ndev = ax_local->ndev;
@@ -1112,8 +1112,6 @@ static int ax88796c_remove(struct spi_device *spi)
netif_info(ax_local, probe, ndev, "removing network device %s %s\n",
dev_driver_string(&spi->dev),
dev_name(&spi->dev));
-
- return 0;
}
#ifdef CONFIG_OF
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 4ad3fc72e74e..a89b93cb4e26 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
alx->hw.mtu = mtu;
alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
netdev_update_features(netdev);
- if (netif_running(netdev))
+ if (netif_running(netdev)) {
+ mutex_lock(&alx->mtx);
alx_reinit(alx);
+ mutex_unlock(&alx->mtx);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index da595242bc13..f50604f3e541 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -900,7 +900,7 @@ static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
atl1c_clean_buffer(pdev, buffer_info);
}
- netdev_reset_queue(adapter->netdev);
+ netdev_tx_reset_queue(netdev_get_tx_queue(adapter->netdev, queue));
/* Zero out Tx-buffers */
memset(tpd_ring->desc, 0, sizeof(struct atl1c_tpd_desc) *
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index c6412c523637..b4381cd41979 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -172,6 +172,7 @@ static int bgmac_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
struct bgmac *bgmac;
+ struct resource *regs;
int ret;
bgmac = bgmac_alloc(&pdev->dev);
@@ -208,15 +209,23 @@ static int bgmac_probe(struct platform_device *pdev)
if (IS_ERR(bgmac->plat.base))
return PTR_ERR(bgmac->plat.base);
- bgmac->plat.idm_base = devm_platform_ioremap_resource_byname(pdev, "idm_base");
- if (IS_ERR(bgmac->plat.idm_base))
- return PTR_ERR(bgmac->plat.idm_base);
- else
+ /* The idm_base resource is optional for some platforms */
+ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
+ if (regs) {
+ bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+ if (IS_ERR(bgmac->plat.idm_base))
+ return PTR_ERR(bgmac->plat.idm_base);
bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
+ }
- bgmac->plat.nicpm_base = devm_platform_ioremap_resource_byname(pdev, "nicpm_base");
- if (IS_ERR(bgmac->plat.nicpm_base))
- return PTR_ERR(bgmac->plat.nicpm_base);
+ /* The nicpm_base resource is optional for some platforms */
+ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
+ if (regs) {
+ bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
+ regs);
+ if (IS_ERR(bgmac->plat.nicpm_base))
+ return PTR_ERR(bgmac->plat.nicpm_base);
+ }
bgmac->read = platform_bgmac_read;
bgmac->write = platform_bgmac_write;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index e20aafeb4ca9..b97ed9b5f685 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8216,7 +8216,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
rc = dma_set_coherent_mask(&pdev->dev, persist_dma_mask);
if (rc) {
dev_err(&pdev->dev,
- "pci_set_consistent_dma_mask failed, aborting\n");
+ "dma_set_coherent_mask failed, aborting\n");
goto err_out_unmap;
}
} else if ((rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index a19dd6797070..2209d99b3404 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp);
* Meant for implicit re-load flows.
*/
int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
-int bnx2x_init_firmware(struct bnx2x *bp);
-void bnx2x_release_firmware(struct bnx2x *bp);
#endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8d36ebbf08e1..5729a5ab059d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
/* is another pf loaded on this engine? */
if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
- /* build my FW version dword */
- u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
- (bp->fw_rev << 16) + (bp->fw_eng << 24);
+ u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng;
+ u32 loaded_fw;
/* read loaded FW from chip */
- u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+ loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
- DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n",
- loaded_fw, my_fw);
+ loaded_fw_major = loaded_fw & 0xff;
+ loaded_fw_minor = (loaded_fw >> 8) & 0xff;
+ loaded_fw_rev = (loaded_fw >> 16) & 0xff;
+ loaded_fw_eng = (loaded_fw >> 24) & 0xff;
+
+ DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n",
+ loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng);
/* abort nic load if version mismatch */
- if (my_fw != loaded_fw) {
+ if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION ||
+ loaded_fw_minor != BCM_5710_FW_MINOR_VERSION ||
+ loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION ||
+ loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) {
if (print_err)
- BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n",
- loaded_fw, my_fw);
+ BNX2X_ERR("loaded FW incompatible. Aborting\n");
else
- BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n",
- loaded_fw, my_fw);
+ BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n");
+
return -EBUSY;
}
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 774c1f1a57c3..c19b072f3a23 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -100,6 +100,9 @@ MODULE_LICENSE("GPL");
MODULE_FIRMWARE(FW_FILE_NAME_E1);
MODULE_FIRMWARE(FW_FILE_NAME_E1H);
MODULE_FIRMWARE(FW_FILE_NAME_E2);
+MODULE_FIRMWARE(FW_FILE_NAME_E1_V15);
+MODULE_FIRMWARE(FW_FILE_NAME_E1H_V15);
+MODULE_FIRMWARE(FW_FILE_NAME_E2_V15);
int bnx2x_num_queues;
module_param_named(num_queues, bnx2x_num_queues, int, 0444);
@@ -12316,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
bnx2x_read_fwinfo(bp);
- if (IS_PF(bp)) {
- rc = bnx2x_init_firmware(bp);
-
- if (rc) {
- bnx2x_free_mem_bp(bp);
- return rc;
- }
- }
-
func = BP_FUNC(bp);
/* need to reset chip if undi was active */
@@ -12337,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
rc = bnx2x_prev_unload(bp);
if (rc) {
- bnx2x_release_firmware(bp);
bnx2x_free_mem_bp(bp);
return rc;
}
@@ -13406,7 +13399,7 @@ do { \
(u8 *)bp->arr, len); \
} while (0)
-int bnx2x_init_firmware(struct bnx2x *bp)
+static int bnx2x_init_firmware(struct bnx2x *bp)
{
const char *fw_file_name, *fw_file_name_v15;
struct bnx2x_fw_file_hdr *fw_hdr;
@@ -13506,7 +13499,7 @@ request_firmware_exit:
return rc;
}
-void bnx2x_release_firmware(struct bnx2x *bp)
+static void bnx2x_release_firmware(struct bnx2x *bp)
{
kfree(bp->init_ops_offsets);
kfree(bp->init_ops);
@@ -14023,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev,
return 0;
init_one_freemem:
- bnx2x_release_firmware(bp);
bnx2x_free_mem_bp(bp);
init_one_exit:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4f94136a011a..b1c98d1408b8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4747,8 +4747,10 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id)
return rc;
req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
- req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
- req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
+ if (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST) {
+ req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
+ req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
+ }
req->mask = cpu_to_le32(vnic->rx_mask);
return hwrm_req_send_silent(bp, req);
}
@@ -7787,6 +7789,19 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
return 0;
}
+static void bnxt_remap_fw_health_regs(struct bnxt *bp)
+{
+ if (!bp->fw_health)
+ return;
+
+ if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) {
+ bp->fw_health->status_reliable = true;
+ bp->fw_health->resets_reliable = true;
+ } else {
+ bnxt_try_map_fw_health_reg(bp);
+ }
+}
+
static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
{
struct bnxt_fw_health *fw_health = bp->fw_health;
@@ -8639,6 +8654,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
vnic->uc_filter_count = 1;
vnic->rx_mask = 0;
+ if (test_bit(BNXT_STATE_HALF_OPEN, &bp->state))
+ goto skip_rx_mask;
+
if (bp->dev->flags & IFF_BROADCAST)
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
@@ -8648,7 +8666,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
if (bp->dev->flags & IFF_ALLMULTI) {
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
vnic->mc_list_count = 0;
- } else {
+ } else if (bp->dev->flags & IFF_MULTICAST) {
u32 mask = 0;
bnxt_mc_list_updated(bp, &mask);
@@ -8659,6 +8677,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
if (rc)
goto err_out;
+skip_rx_mask:
rc = bnxt_hwrm_set_coal(bp);
if (rc)
netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n",
@@ -9850,8 +9869,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
resc_reinit = true;
if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
fw_reset = true;
- else if (bp->fw_health && !bp->fw_health->status_reliable)
- bnxt_try_map_fw_health_reg(bp);
+ else
+ bnxt_remap_fw_health_regs(bp);
if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
@@ -10330,13 +10349,15 @@ int bnxt_half_open_nic(struct bnxt *bp)
goto half_open_err;
}
- rc = bnxt_alloc_mem(bp, false);
+ rc = bnxt_alloc_mem(bp, true);
if (rc) {
netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
goto half_open_err;
}
- rc = bnxt_init_nic(bp, false);
+ set_bit(BNXT_STATE_HALF_OPEN, &bp->state);
+ rc = bnxt_init_nic(bp, true);
if (rc) {
+ clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
goto half_open_err;
}
@@ -10344,7 +10365,7 @@ int bnxt_half_open_nic(struct bnxt *bp)
half_open_err:
bnxt_free_skbs(bp);
- bnxt_free_mem(bp, false);
+ bnxt_free_mem(bp, true);
dev_close(bp->dev);
return rc;
}
@@ -10354,9 +10375,10 @@ half_open_err:
*/
void bnxt_half_close_nic(struct bnxt *bp)
{
- bnxt_hwrm_resource_free(bp, false, false);
+ bnxt_hwrm_resource_free(bp, false, true);
bnxt_free_skbs(bp);
- bnxt_free_mem(bp, false);
+ bnxt_free_mem(bp, true);
+ clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
}
void bnxt_reenable_sriov(struct bnxt *bp)
@@ -10772,7 +10794,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
if (dev->flags & IFF_ALLMULTI) {
mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
vnic->mc_list_count = 0;
- } else {
+ } else if (dev->flags & IFF_MULTICAST) {
mc_update = bnxt_mc_list_updated(bp, &mask);
}
@@ -10849,9 +10871,10 @@ skip_uc:
!bnxt_promisc_ok(bp))
vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
- if (rc && vnic->mc_list_count) {
+ if (rc && (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST)) {
netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
rc);
+ vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_MCAST;
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
vnic->mc_list_count = 0;
rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 440dfeb4948b..666fc1e7a7d2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1921,6 +1921,7 @@ struct bnxt {
#define BNXT_STATE_RECOVER 12
#define BNXT_STATE_FW_NON_FATAL_COND 13
#define BNXT_STATE_FW_ACTIVATE_RESET 14
+#define BNXT_STATE_HALF_OPEN 15 /* For offline ethtool tests */
#define BNXT_NO_FW_ACCESS(bp) \
(test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 4da31b1b84f9..f6e21fac0e69 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -367,6 +367,16 @@ bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack,
}
}
+/* Live patch status in NVM */
+#define BNXT_LIVEPATCH_NOT_INSTALLED 0
+#define BNXT_LIVEPATCH_INSTALLED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL
+#define BNXT_LIVEPATCH_REMOVED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE
+#define BNXT_LIVEPATCH_MASK (FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL | \
+ FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE)
+#define BNXT_LIVEPATCH_ACTIVATED BNXT_LIVEPATCH_MASK
+
+#define BNXT_LIVEPATCH_STATE(flags) ((flags) & BNXT_LIVEPATCH_MASK)
+
static int
bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
{
@@ -374,8 +384,9 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
struct hwrm_fw_livepatch_query_input *query_req;
struct hwrm_fw_livepatch_output *patch_resp;
struct hwrm_fw_livepatch_input *patch_req;
+ u16 flags, live_patch_state;
+ bool activated = false;
u32 installed = 0;
- u16 flags;
u8 target;
int rc;
@@ -394,7 +405,6 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
hwrm_req_drop(bp, query_req);
return rc;
}
- patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE;
patch_req->loadtype = FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL;
patch_resp = hwrm_req_hold(bp, patch_req);
@@ -407,12 +417,20 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
}
flags = le16_to_cpu(query_resp->status_flags);
- if (~flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL)
+ live_patch_state = BNXT_LIVEPATCH_STATE(flags);
+
+ if (live_patch_state == BNXT_LIVEPATCH_NOT_INSTALLED)
continue;
- if ((flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) &&
- !strncmp(query_resp->active_ver, query_resp->install_ver,
- sizeof(query_resp->active_ver)))
+
+ if (live_patch_state == BNXT_LIVEPATCH_ACTIVATED) {
+ activated = true;
continue;
+ }
+
+ if (live_patch_state == BNXT_LIVEPATCH_INSTALLED)
+ patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE;
+ else if (live_patch_state == BNXT_LIVEPATCH_REMOVED)
+ patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE;
patch_req->fw_target = target;
rc = hwrm_req_send(bp, patch_req);
@@ -424,8 +442,13 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack)
}
if (!rc && !installed) {
- NL_SET_ERR_MSG_MOD(extack, "No live patches found");
- rc = -ENOENT;
+ if (activated) {
+ NL_SET_ERR_MSG_MOD(extack, "Live patch already activated");
+ rc = -EEXIST;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "No live patches found");
+ rc = -ENOENT;
+ }
}
hwrm_req_drop(bp, query_req);
hwrm_req_drop(bp, patch_req);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 003330e8cd58..8aaa2335f848 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -25,6 +25,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
#include "bnxt_hwrm.h"
+#include "bnxt_ulp.h"
#include "bnxt_xdp.h"
#include "bnxt_ptp.h"
#include "bnxt_ethtool.h"
@@ -1969,6 +1970,9 @@ static int bnxt_get_fecparam(struct net_device *dev,
case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE:
fec->active_fec |= ETHTOOL_FEC_LLRS;
break;
+ case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE:
+ fec->active_fec |= ETHTOOL_FEC_OFF;
+ break;
}
return 0;
}
@@ -3454,7 +3458,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
if (!skb)
return -ENOMEM;
data = skb_put(skb, pkt_size);
- eth_broadcast_addr(data);
+ ether_addr_copy(&data[i], bp->dev->dev_addr);
i += ETH_ALEN;
ether_addr_copy(&data[i], bp->dev->dev_addr);
i += ETH_ALEN;
@@ -3548,9 +3552,12 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
if (!offline) {
bnxt_run_fw_tests(bp, test_mask, &test_results);
} else {
- rc = bnxt_close_nic(bp, false, false);
- if (rc)
+ bnxt_ulp_stop(bp);
+ rc = bnxt_close_nic(bp, true, false);
+ if (rc) {
+ bnxt_ulp_start(bp, rc);
return;
+ }
bnxt_run_fw_tests(bp, test_mask, &test_results);
buf[BNXT_MACLPBK_TEST_IDX] = 1;
@@ -3560,6 +3567,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
if (rc) {
bnxt_hwrm_mac_loopback(bp, false);
etest->flags |= ETH_TEST_FL_FAILED;
+ bnxt_ulp_start(bp, rc);
return;
}
if (bnxt_run_loopback(bp))
@@ -3585,7 +3593,8 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
}
bnxt_hwrm_phy_loopback(bp, false, false);
bnxt_half_close_nic(bp);
- rc = bnxt_open_nic(bp, false, true);
+ rc = bnxt_open_nic(bp, true, true);
+ bnxt_ulp_start(bp, rc);
}
if (rc || bnxt_test_irq(bp)) {
buf[BNXT_IRQ_TEST_IDX] = 1;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
index 566c9487ef55..b01d42928a53 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
@@ -644,17 +644,23 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
/* Last byte of resp contains valid bit */
valid = ((u8 *)ctx->resp) + len - 1;
- for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
+ for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; ) {
/* make sure we read from updated DMA memory */
dma_rmb();
if (*valid)
break;
- usleep_range(1, 5);
+ if (j < 10) {
+ udelay(1);
+ j++;
+ } else {
+ usleep_range(20, 30);
+ j += 20;
+ }
}
if (j >= HWRM_VALID_BIT_DELAY_USEC) {
hwrm_err(bp, ctx, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n",
- hwrm_total_timeout(i), req_type,
+ hwrm_total_timeout(i) + j, req_type,
le16_to_cpu(ctx->req->seq_id), len, *valid);
goto exit;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
index d52bd2d63aec..c98032e38188 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
@@ -90,7 +90,7 @@ static inline unsigned int hwrm_total_timeout(unsigned int n)
}
-#define HWRM_VALID_BIT_DELAY_USEC 150
+#define HWRM_VALID_BIT_DELAY_USEC 50000
static inline bool bnxt_cfa_hwrm_message(u16 req_type)
{
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 87f1056e29ff..2da804f84b48 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
dma_length_status = status->length_status;
if (dev->features & NETIF_F_RXCSUM) {
rx_csum = (__force __be16)(status->rx_csum & 0xffff);
- skb->csum = (__force __wsum)ntohs(rx_csum);
- skb->ip_summed = CHECKSUM_COMPLETE;
+ if (rx_csum) {
+ skb->csum = (__force __wsum)ntohs(rx_csum);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
}
/* DMA flags and length are still valid no matter how
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index e31a5a397f11..f55d9d9c01a8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -40,6 +40,13 @@
void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device *kdev = &priv->pdev->dev;
+
+ if (!device_can_wakeup(kdev)) {
+ wol->supported = 0;
+ wol->wolopts = 0;
+ return;
+ }
wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
wol->wolopts = priv->wolopts;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index f38f40eb966e..a1a38456c9a3 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2183,9 +2183,7 @@ static int sbmac_init(struct platform_device *pldev, long long base)
ea_reg >>= 8;
}
- for (i = 0; i < 6; i++) {
- dev->dev_addr[i] = eaddr[i];
- }
+ eth_hw_addr_set(dev, eaddr);
/*
* Initialize context (get pointers to registers and stuff), then
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a363da928e8b..d13f06cf0308 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1573,7 +1573,14 @@ static int macb_poll(struct napi_struct *napi, int budget)
if (work_done < budget) {
napi_complete_done(napi, work_done);
- /* Packets received while interrupts were disabled */
+ /* RSR bits only seem to propagate to raise interrupts when
+ * interrupts are enabled at the time, so if bits are already
+ * set due to packets received while interrupts were disabled,
+ * they will not cause another interrupt to be generated when
+ * interrupts are re-enabled.
+ * Check for this case here. This has been seen to happen
+ * around 30% of the time under heavy network load.
+ */
status = macb_readl(bp, RSR);
if (status) {
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
@@ -1581,6 +1588,22 @@ static int macb_poll(struct napi_struct *napi, int budget)
napi_reschedule(napi);
} else {
queue_writel(queue, IER, bp->rx_intr_mask);
+
+ /* In rare cases, packets could have been received in
+ * the window between the check above and re-enabling
+ * interrupts. Therefore, a double-check is required
+ * to avoid losing a wakeup. This can potentially race
+ * with the interrupt handler doing the same actions
+ * if an interrupt is raised just after enabling them,
+ * but this should be harmless.
+ */
+ status = macb_readl(bp, RSR);
+ if (unlikely(status)) {
+ queue_writel(queue, IDR, bp->rx_intr_mask);
+ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+ queue_writel(queue, ISR, MACB_BIT(RCOMP));
+ napi_schedule(napi);
+ }
}
}
@@ -4712,7 +4735,7 @@ static int macb_probe(struct platform_device *pdev)
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
- dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
bp->hw_dma_cap |= HW_DMA_CAP_64B;
}
#endif
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index da41eee2f25c..a06003bfa04b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -3613,6 +3613,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10);
adapter->params.pci.vpd_cap_addr =
pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD);
+ if (!adapter->params.pci.vpd_cap_addr)
+ return -ENODEV;
ret = get_vpd_params(adapter, &adapter->params.vpd);
if (ret < 0)
return ret;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 691605c15265..d5356db7539a 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -989,117 +989,6 @@ static int ftgmac100_alloc_rx_buffers(struct ftgmac100 *priv)
return 0;
}
-static void ftgmac100_adjust_link(struct net_device *netdev)
-{
- struct ftgmac100 *priv = netdev_priv(netdev);
- struct phy_device *phydev = netdev->phydev;
- bool tx_pause, rx_pause;
- int new_speed;
-
- /* We store "no link" as speed 0 */
- if (!phydev->link)
- new_speed = 0;
- else
- new_speed = phydev->speed;
-
- /* Grab pause settings from PHY if configured to do so */
- if (priv->aneg_pause) {
- rx_pause = tx_pause = phydev->pause;
- if (phydev->asym_pause)
- tx_pause = !rx_pause;
- } else {
- rx_pause = priv->rx_pause;
- tx_pause = priv->tx_pause;
- }
-
- /* Link hasn't changed, do nothing */
- if (phydev->speed == priv->cur_speed &&
- phydev->duplex == priv->cur_duplex &&
- rx_pause == priv->rx_pause &&
- tx_pause == priv->tx_pause)
- return;
-
- /* Print status if we have a link or we had one and just lost it,
- * don't print otherwise.
- */
- if (new_speed || priv->cur_speed)
- phy_print_status(phydev);
-
- priv->cur_speed = new_speed;
- priv->cur_duplex = phydev->duplex;
- priv->rx_pause = rx_pause;
- priv->tx_pause = tx_pause;
-
- /* Link is down, do nothing else */
- if (!new_speed)
- return;
-
- /* Disable all interrupts */
- iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
-
- /* Reset the adapter asynchronously */
- schedule_work(&priv->reset_task);
-}
-
-static int ftgmac100_mii_probe(struct net_device *netdev)
-{
- struct ftgmac100 *priv = netdev_priv(netdev);
- struct platform_device *pdev = to_platform_device(priv->dev);
- struct device_node *np = pdev->dev.of_node;
- struct phy_device *phydev;
- phy_interface_t phy_intf;
- int err;
-
- /* Default to RGMII. It's a gigabit part after all */
- err = of_get_phy_mode(np, &phy_intf);
- if (err)
- phy_intf = PHY_INTERFACE_MODE_RGMII;
-
- /* Aspeed only supports these. I don't know about other IP
- * block vendors so I'm going to just let them through for
- * now. Note that this is only a warning if for some obscure
- * reason the DT really means to lie about it or it's a newer
- * part we don't know about.
- *
- * On the Aspeed SoC there are additionally straps and SCU
- * control bits that could tell us what the interface is
- * (or allow us to configure it while the IP block is held
- * in reset). For now I chose to keep this driver away from
- * those SoC specific bits and assume the device-tree is
- * right and the SCU has been configured properly by pinmux
- * or the firmware.
- */
- if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) {
- netdev_warn(netdev,
- "Unsupported PHY mode %s !\n",
- phy_modes(phy_intf));
- }
-
- phydev = phy_find_first(priv->mii_bus);
- if (!phydev) {
- netdev_info(netdev, "%s: no PHY found\n", netdev->name);
- return -ENODEV;
- }
-
- phydev = phy_connect(netdev, phydev_name(phydev),
- &ftgmac100_adjust_link, phy_intf);
-
- if (IS_ERR(phydev)) {
- netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name);
- return PTR_ERR(phydev);
- }
-
- /* Indicate that we support PAUSE frames (see comment in
- * Documentation/networking/phy.rst)
- */
- phy_support_asym_pause(phydev);
-
- /* Display what we found */
- phy_attached_info(phydev);
-
- return 0;
-}
-
static int ftgmac100_mdiobus_read(struct mii_bus *bus, int phy_addr, int regnum)
{
struct net_device *netdev = bus->priv;
@@ -1410,10 +1299,8 @@ static int ftgmac100_init_all(struct ftgmac100 *priv, bool ignore_alloc_err)
return err;
}
-static void ftgmac100_reset_task(struct work_struct *work)
+static void ftgmac100_reset(struct ftgmac100 *priv)
{
- struct ftgmac100 *priv = container_of(work, struct ftgmac100,
- reset_task);
struct net_device *netdev = priv->netdev;
int err;
@@ -1459,6 +1346,134 @@ static void ftgmac100_reset_task(struct work_struct *work)
rtnl_unlock();
}
+static void ftgmac100_reset_task(struct work_struct *work)
+{
+ struct ftgmac100 *priv = container_of(work, struct ftgmac100,
+ reset_task);
+
+ ftgmac100_reset(priv);
+}
+
+static void ftgmac100_adjust_link(struct net_device *netdev)
+{
+ struct ftgmac100 *priv = netdev_priv(netdev);
+ struct phy_device *phydev = netdev->phydev;
+ bool tx_pause, rx_pause;
+ int new_speed;
+
+ /* We store "no link" as speed 0 */
+ if (!phydev->link)
+ new_speed = 0;
+ else
+ new_speed = phydev->speed;
+
+ /* Grab pause settings from PHY if configured to do so */
+ if (priv->aneg_pause) {
+ rx_pause = tx_pause = phydev->pause;
+ if (phydev->asym_pause)
+ tx_pause = !rx_pause;
+ } else {
+ rx_pause = priv->rx_pause;
+ tx_pause = priv->tx_pause;
+ }
+
+ /* Link hasn't changed, do nothing */
+ if (phydev->speed == priv->cur_speed &&
+ phydev->duplex == priv->cur_duplex &&
+ rx_pause == priv->rx_pause &&
+ tx_pause == priv->tx_pause)
+ return;
+
+ /* Print status if we have a link or we had one and just lost it,
+ * don't print otherwise.
+ */
+ if (new_speed || priv->cur_speed)
+ phy_print_status(phydev);
+
+ priv->cur_speed = new_speed;
+ priv->cur_duplex = phydev->duplex;
+ priv->rx_pause = rx_pause;
+ priv->tx_pause = tx_pause;
+
+ /* Link is down, do nothing else */
+ if (!new_speed)
+ return;
+
+ /* Disable all interrupts */
+ iowrite32(0, priv->base + FTGMAC100_OFFSET_IER);
+
+ /* Release phy lock to allow ftgmac100_reset to aquire it, keeping lock
+ * order consistent to prevent dead lock.
+ */
+ if (netdev->phydev)
+ mutex_unlock(&netdev->phydev->lock);
+
+ ftgmac100_reset(priv);
+
+ if (netdev->phydev)
+ mutex_lock(&netdev->phydev->lock);
+
+}
+
+static int ftgmac100_mii_probe(struct net_device *netdev)
+{
+ struct ftgmac100 *priv = netdev_priv(netdev);
+ struct platform_device *pdev = to_platform_device(priv->dev);
+ struct device_node *np = pdev->dev.of_node;
+ struct phy_device *phydev;
+ phy_interface_t phy_intf;
+ int err;
+
+ /* Default to RGMII. It's a gigabit part after all */
+ err = of_get_phy_mode(np, &phy_intf);
+ if (err)
+ phy_intf = PHY_INTERFACE_MODE_RGMII;
+
+ /* Aspeed only supports these. I don't know about other IP
+ * block vendors so I'm going to just let them through for
+ * now. Note that this is only a warning if for some obscure
+ * reason the DT really means to lie about it or it's a newer
+ * part we don't know about.
+ *
+ * On the Aspeed SoC there are additionally straps and SCU
+ * control bits that could tell us what the interface is
+ * (or allow us to configure it while the IP block is held
+ * in reset). For now I chose to keep this driver away from
+ * those SoC specific bits and assume the device-tree is
+ * right and the SCU has been configured properly by pinmux
+ * or the firmware.
+ */
+ if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) {
+ netdev_warn(netdev,
+ "Unsupported PHY mode %s !\n",
+ phy_modes(phy_intf));
+ }
+
+ phydev = phy_find_first(priv->mii_bus);
+ if (!phydev) {
+ netdev_info(netdev, "%s: no PHY found\n", netdev->name);
+ return -ENODEV;
+ }
+
+ phydev = phy_connect(netdev, phydev_name(phydev),
+ &ftgmac100_adjust_link, phy_intf);
+
+ if (IS_ERR(phydev)) {
+ netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name);
+ return PTR_ERR(phydev);
+ }
+
+ /* Indicate that we support PAUSE frames (see comment in
+ * Documentation/networking/phy.rst)
+ */
+ phy_support_asym_pause(phydev);
+
+ /* Display what we found */
+ phy_attached_info(phydev);
+
+ return 0;
+}
+
static int ftgmac100_open(struct net_device *netdev)
{
struct ftgmac100 *priv = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index e985ae008a97..0f90d2d5bb60 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -4338,7 +4338,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
}
INIT_WORK(&priv->tx_onestep_tstamp, dpaa2_eth_tx_onestep_tstamp);
-
+ mutex_init(&priv->onestep_tstamp_lock);
skb_queue_head_init(&priv->tx_skbs);
priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK;
@@ -4523,12 +4523,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
#ifdef CONFIG_DEBUG_FS
dpaa2_dbg_remove(priv);
#endif
+
+ unregister_netdev(net_dev);
rtnl_lock();
dpaa2_eth_disconnect_mac(priv);
rtnl_unlock();
- unregister_netdev(net_dev);
-
dpaa2_eth_dl_port_del(priv);
dpaa2_eth_dl_traps_unregister(priv);
dpaa2_eth_dl_free(priv);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index d6eefbbf163f..cacd454ac696 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -532,6 +532,7 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
struct flow_dissector *dissector = rule->match.dissector;
struct netlink_ext_ack *extack = cls->common.extack;
+ int ret = -EOPNOTSUPP;
if (dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
@@ -561,9 +562,10 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
}
*vlan = (u16)match.key->vlan_id;
+ ret = 0;
}
- return 0;
+ return ret;
}
static int
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index bbbde9f701c2..be0bd4b44926 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -99,13 +99,13 @@ static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue)
netif_wake_queue(dev);
}
-static void mpc52xx_fec_set_paddr(struct net_device *dev, u8 *mac)
+static void mpc52xx_fec_set_paddr(struct net_device *dev, const u8 *mac)
{
struct mpc52xx_fec_priv *priv = netdev_priv(dev);
struct mpc52xx_fec __iomem *fec = priv->fec;
- out_be32(&fec->paddr1, *(u32 *)(&mac[0]));
- out_be32(&fec->paddr2, (*(u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE);
+ out_be32(&fec->paddr1, *(const u32 *)(&mac[0]));
+ out_be32(&fec->paddr2, (*(const u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE);
}
static int mpc52xx_fec_set_mac_address(struct net_device *dev, void *addr)
@@ -893,13 +893,15 @@ static int mpc52xx_fec_probe(struct platform_device *op)
rv = of_get_ethdev_address(np, ndev);
if (rv) {
struct mpc52xx_fec __iomem *fec = priv->fec;
+ u8 addr[ETH_ALEN] __aligned(4);
/*
* If the MAC addresse is not provided via DT then read
* it back from the controller regs
*/
- *(u32 *)(&ndev->dev_addr[0]) = in_be32(&fec->paddr1);
- *(u16 *)(&ndev->dev_addr[4]) = in_be32(&fec->paddr2) >> 16;
+ *(u32 *)(&addr[0]) = in_be32(&fec->paddr1);
+ *(u16 *)(&addr[4]) = in_be32(&fec->paddr2) >> 16;
+ eth_hw_addr_set(ndev, addr);
}
/*
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index ff756265d58f..9a2c16d69e2c 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1464,6 +1464,7 @@ static int gfar_get_ts_info(struct net_device *dev,
ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp");
if (ptp_node) {
ptp_dev = of_find_device_by_node(ptp_node);
+ of_node_put(ptp_node);
if (ptp_dev)
ptp = platform_get_drvdata(ptp_dev);
}
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 5f5d4f7aa813..160735484465 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -843,7 +843,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
- enum dma_data_direction);
+ enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 2ad7f57f7e5b..f7621ab672b9 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -301,7 +301,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
*/
static int gve_adminq_kick_and_wait(struct gve_priv *priv)
{
- u32 tail, head;
+ int tail, head;
int i;
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index f7f65c4bf993..54e51c8221b8 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -766,9 +766,9 @@ static void gve_free_rings(struct gve_priv *priv)
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, gfp_t gfp_flags)
{
- *page = alloc_page(GFP_KERNEL);
+ *page = alloc_page(gfp_flags);
if (!*page) {
priv->page_alloc_fail++;
return -ENOMEM;
@@ -811,7 +811,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
for (i = 0; i < pages; i++) {
err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
- gve_qpl_dma_dir(priv, id));
+ gve_qpl_dma_dir(priv, id), GFP_KERNEL);
/* caller handles clean up */
if (err)
return -ENOMEM;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 9ddcc497f48e..e4e98aa7745f 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -86,7 +86,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
dma_addr_t dma;
int err;
- err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
+ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
+ GFP_ATOMIC);
if (err)
return err;
@@ -608,6 +609,7 @@ static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
*packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0);
*work_done = work_cnt;
+ skb_record_rx_queue(skb, rx->q_num);
if (skb_is_nonlinear(skb))
napi_gro_frags(napi);
else
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index beb8bb079023..8c939628e2d8 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv,
int err;
err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
- &buf_state->addr, DMA_FROM_DEVICE);
+ &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
if (err)
return err;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7df87610ad96..21442a9bb996 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2043,8 +2043,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
break;
}
- if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER)
- hclgevf_enable_vector(&hdev->misc_vector, true);
+ hclgevf_enable_vector(&hdev->misc_vector, true);
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index c612ef526d16..3e7d7c4bafdc 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -986,6 +986,7 @@ static int
ether1_probe(struct expansion_card *ec, const struct ecard_id *id)
{
struct net_device *dev;
+ u8 addr[ETH_ALEN];
int i, ret = 0;
ether1_banner();
@@ -1015,7 +1016,8 @@ ether1_probe(struct expansion_card *ec, const struct ecard_id *id)
}
for (i = 0; i < 6; i++)
- dev->dev_addr[i] = readb(IDPROM_ADDRESS + (i << 2));
+ addr[i] = readb(IDPROM_ADDRESS + (i << 2));
+ eth_hw_addr_set(dev, addr);
if (ether1_init_2(dev)) {
ret = -ENODEV;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 59536bd5cab1..b423e94956f1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -110,6 +110,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
struct ibmvnic_sub_crq_queue *tx_scrq);
static void free_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb);
+static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
struct ibmvnic_stat {
char name[ETH_GSTRING_LEN];
@@ -1424,7 +1425,7 @@ static int __ibmvnic_open(struct net_device *netdev)
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
if (rc) {
ibmvnic_napi_disable(adapter);
- release_resources(adapter);
+ ibmvnic_disable_irqs(adapter);
return rc;
}
@@ -1474,9 +1475,6 @@ static int ibmvnic_open(struct net_device *netdev)
rc = init_resources(adapter);
if (rc) {
netdev_err(netdev, "failed to initialize resources\n");
- release_resources(adapter);
- release_rx_pools(adapter);
- release_tx_pools(adapter);
goto out;
}
}
@@ -1493,6 +1491,13 @@ out:
adapter->state = VNIC_OPEN;
rc = 0;
}
+
+ if (rc) {
+ release_resources(adapter);
+ release_rx_pools(adapter);
+ release_tx_pools(adapter);
+ }
+
return rc;
}
@@ -2208,6 +2213,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
}
/*
+ * Initialize the init_done completion and return code values. We
+ * can get a transport event just after registering the CRQ and the
+ * tasklet will use this to communicate the transport event. To ensure
+ * we don't miss the notification/error, initialize these _before_
+ * regisering the CRQ.
+ */
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter)
+{
+ reinit_completion(&adapter->init_done);
+ adapter->init_done_rc = 0;
+}
+
+/*
* do_reset returns zero if we are able to keep processing reset events, or
* non-zero if we hit a fatal error and must halt.
*/
@@ -2313,6 +2331,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
*/
adapter->state = VNIC_PROBED;
+ reinit_init_done(adapter);
+
if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
rc = init_crq_queue(adapter);
} else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
@@ -2456,7 +2476,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
*/
adapter->state = VNIC_PROBED;
- reinit_completion(&adapter->init_done);
+ reinit_init_done(adapter);
+
rc = init_crq_queue(adapter);
if (rc) {
netdev_err(adapter->netdev,
@@ -2597,22 +2618,82 @@ out:
static void __ibmvnic_reset(struct work_struct *work)
{
struct ibmvnic_adapter *adapter;
- bool saved_state = false;
+ unsigned int timeout = 5000;
struct ibmvnic_rwi *tmprwi;
+ bool saved_state = false;
struct ibmvnic_rwi *rwi;
unsigned long flags;
+ struct device *dev;
+ bool need_reset;
+ int num_fails = 0;
u32 reset_state;
int rc = 0;
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
+ dev = &adapter->vdev->dev;
- if (test_and_set_bit_lock(0, &adapter->resetting)) {
+ /* Wait for ibmvnic_probe() to complete. If probe is taking too long
+ * or if another reset is in progress, defer work for now. If probe
+ * eventually fails it will flush and terminate our work.
+ *
+ * Three possibilities here:
+ * 1. Adpater being removed - just return
+ * 2. Timed out on probe or another reset in progress - delay the work
+ * 3. Completed probe - perform any resets in queue
+ */
+ if (adapter->state == VNIC_PROBING &&
+ !wait_for_completion_timeout(&adapter->probe_done, timeout)) {
+ dev_err(dev, "Reset thread timed out on probe");
queue_delayed_work(system_long_wq,
&adapter->ibmvnic_delayed_reset,
IBMVNIC_RESET_DELAY);
return;
}
+ /* adapter is done with probe (i.e state is never VNIC_PROBING now) */
+ if (adapter->state == VNIC_REMOVING)
+ return;
+
+ /* ->rwi_list is stable now (no one else is removing entries) */
+
+ /* ibmvnic_probe() may have purged the reset queue after we were
+ * scheduled to process a reset so there maybe no resets to process.
+ * Before setting the ->resetting bit though, we have to make sure
+ * that there is infact a reset to process. Otherwise we may race
+ * with ibmvnic_open() and end up leaving the vnic down:
+ *
+ * __ibmvnic_reset() ibmvnic_open()
+ * ----------------- --------------
+ *
+ * set ->resetting bit
+ * find ->resetting bit is set
+ * set ->state to IBMVNIC_OPEN (i.e
+ * assume reset will open device)
+ * return
+ * find reset queue empty
+ * return
+ *
+ * Neither performed vnic login/open and vnic stays down
+ *
+ * If we hold the lock and conditionally set the bit, either we
+ * or ibmvnic_open() will complete the open.
+ */
+ need_reset = false;
+ spin_lock(&adapter->rwi_lock);
+ if (!list_empty(&adapter->rwi_list)) {
+ if (test_and_set_bit_lock(0, &adapter->resetting)) {
+ queue_delayed_work(system_long_wq,
+ &adapter->ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+ } else {
+ need_reset = true;
+ }
+ }
+ spin_unlock(&adapter->rwi_lock);
+
+ if (!need_reset)
+ return;
+
rwi = get_next_rwi(adapter);
while (rwi) {
spin_lock_irqsave(&adapter->state_lock, flags);
@@ -2655,11 +2736,23 @@ static void __ibmvnic_reset(struct work_struct *work)
rc = do_hard_reset(adapter, rwi, reset_state);
rtnl_unlock();
}
- if (rc) {
- /* give backing device time to settle down */
+ if (rc)
+ num_fails++;
+ else
+ num_fails = 0;
+
+ /* If auto-priority-failover is enabled we can get
+ * back to back failovers during resets, resulting
+ * in at least two failed resets (from high-priority
+ * backing device to low-priority one and then back)
+ * If resets continue to fail beyond that, give the
+ * adapter some time to settle down before retrying.
+ */
+ if (num_fails >= 3) {
netdev_dbg(adapter->netdev,
- "[S:%s] Hard reset failed, waiting 60 secs\n",
- adapter_state_to_string(adapter->state));
+ "[S:%s] Hard reset failed %d times, waiting 60 secs\n",
+ adapter_state_to_string(adapter->state),
+ num_fails);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(60 * HZ);
}
@@ -2717,12 +2810,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work)
__ibmvnic_reset(&adapter->ibmvnic_reset);
}
+static void flush_reset_queue(struct ibmvnic_adapter *adapter)
+{
+ struct list_head *entry, *tmp_entry;
+
+ if (!list_empty(&adapter->rwi_list)) {
+ list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) {
+ list_del(entry);
+ kfree(list_entry(entry, struct ibmvnic_rwi, list));
+ }
+ }
+}
+
static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
enum ibmvnic_reset_reason reason)
{
- struct list_head *entry, *tmp_entry;
- struct ibmvnic_rwi *rwi, *tmp;
struct net_device *netdev = adapter->netdev;
+ struct ibmvnic_rwi *rwi, *tmp;
unsigned long flags;
int ret;
@@ -2741,13 +2845,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
goto err;
}
- if (adapter->state == VNIC_PROBING) {
- netdev_warn(netdev, "Adapter reset during probe\n");
- adapter->init_done_rc = -EAGAIN;
- ret = EAGAIN;
- goto err;
- }
-
list_for_each_entry(tmp, &adapter->rwi_list, list) {
if (tmp->reset_reason == reason) {
netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
@@ -2765,10 +2862,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
/* if we just received a transport event,
* flush reset queue and process this reset
*/
- if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) {
- list_for_each_safe(entry, tmp_entry, &adapter->rwi_list)
- list_del(entry);
- }
+ if (adapter->force_reset_recovery)
+ flush_reset_queue(adapter);
+
rwi->reset_reason = reason;
list_add_tail(&rwi->list, &adapter->rwi_list);
netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
@@ -3844,11 +3940,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
struct device *dev = &adapter->vdev->dev;
union ibmvnic_crq crq;
int max_entries;
+ int cap_reqs;
+
+ /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on
+ * the PROMISC flag). Initialize this count upfront. When the tasklet
+ * receives a response to all of these, it will send the next protocol
+ * message (QUERY_IP_OFFLOAD).
+ */
+ if (!(adapter->netdev->flags & IFF_PROMISC) ||
+ adapter->promisc_supported)
+ cap_reqs = 7;
+ else
+ cap_reqs = 6;
if (!retry) {
/* Sub-CRQ entries are 32 byte long */
int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4);
+ atomic_set(&adapter->running_cap_crqs, cap_reqs);
+
if (adapter->min_tx_entries_per_subcrq > entries_page ||
adapter->min_rx_add_entries_per_subcrq > entries_page) {
dev_err(dev, "Fatal, invalid entries per sub-crq\n");
@@ -3909,44 +4019,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
adapter->opt_rx_comp_queues;
adapter->req_rx_add_queues = adapter->max_rx_add_queues;
+ } else {
+ atomic_add(cap_reqs, &adapter->running_cap_crqs);
}
-
memset(&crq, 0, sizeof(crq));
crq.request_capability.first = IBMVNIC_CRQ_CMD;
crq.request_capability.cmd = REQUEST_CAPABILITY;
crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
cpu_to_be64(adapter->req_tx_entries_per_subcrq);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_MTU);
crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
if (adapter->netdev->flags & IFF_PROMISC) {
@@ -3954,16 +4065,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
crq.request_capability.number = cpu_to_be64(1);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
}
} else {
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
crq.request_capability.number = cpu_to_be64(0);
- atomic_inc(&adapter->running_cap_crqs);
+ cap_reqs--;
ibmvnic_send_crq(adapter, &crq);
}
+
+ /* Keep at end to catch any discrepancy between expected and actual
+ * CRQs sent.
+ */
+ WARN_ON(cap_reqs != 0);
}
static int pending_scrq(struct ibmvnic_adapter *adapter,
@@ -4357,118 +4473,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter)
static void send_query_cap(struct ibmvnic_adapter *adapter)
{
union ibmvnic_crq crq;
+ int cap_reqs;
+
+ /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count
+ * upfront. When the tasklet receives a response to all of these, it
+ * can send out the next protocol messaage (REQUEST_CAPABILITY).
+ */
+ cap_reqs = 25;
+
+ atomic_set(&adapter->running_cap_crqs, cap_reqs);
- atomic_set(&adapter->running_cap_crqs, 0);
memset(&crq, 0, sizeof(crq));
crq.query_capability.first = IBMVNIC_CRQ_CMD;
crq.query_capability.cmd = QUERY_CAPABILITY;
crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MIN_MTU);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_MTU);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability =
cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ);
- atomic_inc(&adapter->running_cap_crqs);
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ);
- atomic_inc(&adapter->running_cap_crqs);
+
ibmvnic_send_crq(adapter, &crq);
+ cap_reqs--;
+
+ /* Keep at end to catch any discrepancy between expected and actual
+ * CRQs sent.
+ */
+ WARN_ON(cap_reqs != 0);
}
static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
@@ -4772,6 +4902,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
char *name;
atomic_dec(&adapter->running_cap_crqs);
+ netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n",
+ atomic_read(&adapter->running_cap_crqs));
switch (be16_to_cpu(crq->request_capability_rsp.capability)) {
case REQ_TX_QUEUES:
req_value = &adapter->req_tx_queues;
@@ -4835,10 +4967,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
}
/* Done receiving requested capabilities, query IP offload support */
- if (atomic_read(&adapter->running_cap_crqs) == 0) {
- adapter->wait_capability = false;
+ if (atomic_read(&adapter->running_cap_crqs) == 0)
send_query_ip_offload(adapter);
- }
}
static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
@@ -5136,10 +5266,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
}
out:
- if (atomic_read(&adapter->running_cap_crqs) == 0) {
- adapter->wait_capability = false;
+ if (atomic_read(&adapter->running_cap_crqs) == 0)
send_request_cap(adapter, 0);
- }
}
static int send_query_phys_parms(struct ibmvnic_adapter *adapter)
@@ -5271,9 +5399,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
}
if (!completion_done(&adapter->init_done)) {
- complete(&adapter->init_done);
if (!adapter->init_done_rc)
adapter->init_done_rc = -EAGAIN;
+ complete(&adapter->init_done);
}
break;
@@ -5296,6 +5424,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
adapter->fw_done_rc = -EIO;
complete(&adapter->fw_done);
}
+
+ /* if we got here during crq-init, retry crq-init */
+ if (!completion_done(&adapter->init_done)) {
+ adapter->init_done_rc = -EAGAIN;
+ complete(&adapter->init_done);
+ }
+
if (!completion_done(&adapter->stats_done))
complete(&adapter->stats_done);
if (test_bit(0, &adapter->resetting))
@@ -5435,33 +5570,21 @@ static void ibmvnic_tasklet(struct tasklet_struct *t)
struct ibmvnic_crq_queue *queue = &adapter->crq;
union ibmvnic_crq *crq;
unsigned long flags;
- bool done = false;
spin_lock_irqsave(&queue->lock, flags);
- while (!done) {
- /* Pull all the valid messages off the CRQ */
- while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
- /* This barrier makes sure ibmvnic_next_crq()'s
- * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
- * before ibmvnic_handle_crq()'s
- * switch(gen_crq->first) and switch(gen_crq->cmd).
- */
- dma_rmb();
- ibmvnic_handle_crq(crq, adapter);
- crq->generic.first = 0;
- }
- /* remain in tasklet until all
- * capabilities responses are received
+ /* Pull all the valid messages off the CRQ */
+ while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
+ /* This barrier makes sure ibmvnic_next_crq()'s
+ * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
+ * before ibmvnic_handle_crq()'s
+ * switch(gen_crq->first) and switch(gen_crq->cmd).
*/
- if (!adapter->wait_capability)
- done = true;
+ dma_rmb();
+ ibmvnic_handle_crq(crq, adapter);
+ crq->generic.first = 0;
}
- /* if capabilities CRQ's were sent in this tasklet, the following
- * tasklet must wait until all responses are received
- */
- if (atomic_read(&adapter->running_cap_crqs) != 0)
- adapter->wait_capability = true;
+
spin_unlock_irqrestore(&queue->lock, flags);
}
@@ -5624,10 +5747,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
adapter->from_passive_init = false;
- if (reset)
- reinit_completion(&adapter->init_done);
-
- adapter->init_done_rc = 0;
rc = ibmvnic_send_crq_init(adapter);
if (rc) {
dev_err(dev, "Send crq init failed with error %d\n", rc);
@@ -5641,12 +5760,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
if (adapter->init_done_rc) {
release_crq_queue(adapter);
+ dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc);
return adapter->init_done_rc;
}
if (adapter->from_passive_init) {
adapter->state = VNIC_OPEN;
adapter->from_passive_init = false;
+ dev_err(dev, "CRQ-init failed, passive-init\n");
return -EINVAL;
}
@@ -5686,6 +5807,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
struct ibmvnic_adapter *adapter;
struct net_device *netdev;
unsigned char *mac_addr_p;
+ unsigned long flags;
bool init_success;
int rc;
@@ -5730,6 +5852,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
spin_lock_init(&adapter->rwi_lock);
spin_lock_init(&adapter->state_lock);
mutex_init(&adapter->fw_lock);
+ init_completion(&adapter->probe_done);
init_completion(&adapter->init_done);
init_completion(&adapter->fw_done);
init_completion(&adapter->reset_done);
@@ -5740,6 +5863,33 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
init_success = false;
do {
+ reinit_init_done(adapter);
+
+ /* clear any failovers we got in the previous pass
+ * since we are reinitializing the CRQ
+ */
+ adapter->failover_pending = false;
+
+ /* If we had already initialized CRQ, we may have one or
+ * more resets queued already. Discard those and release
+ * the CRQ before initializing the CRQ again.
+ */
+ release_crq_queue(adapter);
+
+ /* Since we are still in PROBING state, __ibmvnic_reset()
+ * will not access the ->rwi_list and since we released CRQ,
+ * we won't get _new_ transport events. But there maybe an
+ * ongoing ibmvnic_reset() call. So serialize access to
+ * rwi_list. If we win the race, ibvmnic_reset() could add
+ * a reset after we purged but thats ok - we just may end
+ * up with an extra reset (i.e similar to having two or more
+ * resets in the queue at once).
+ * CHECK.
+ */
+ spin_lock_irqsave(&adapter->rwi_lock, flags);
+ flush_reset_queue(adapter);
+ spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+
rc = init_crq_queue(adapter);
if (rc) {
dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
@@ -5771,12 +5921,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
goto ibmvnic_dev_file_err;
netif_carrier_off(netdev);
- rc = register_netdev(netdev);
- if (rc) {
- dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
- goto ibmvnic_register_fail;
- }
- dev_info(&dev->dev, "ibmvnic registered\n");
if (init_success) {
adapter->state = VNIC_PROBED;
@@ -5789,6 +5933,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->wait_for_reset = false;
adapter->last_reset_time = jiffies;
+
+ rc = register_netdev(netdev);
+ if (rc) {
+ dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
+ goto ibmvnic_register_fail;
+ }
+ dev_info(&dev->dev, "ibmvnic registered\n");
+
+ complete(&adapter->probe_done);
+
return 0;
ibmvnic_register_fail:
@@ -5803,6 +5957,17 @@ ibmvnic_stats_fail:
ibmvnic_init_fail:
release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
+
+ /* cleanup worker thread after releasing CRQ so we don't get
+ * transport events (i.e new work items for the worker thread).
+ */
+ adapter->state = VNIC_REMOVING;
+ complete(&adapter->probe_done);
+ flush_work(&adapter->ibmvnic_reset);
+ flush_delayed_work(&adapter->ibmvnic_delayed_reset);
+
+ flush_reset_queue(adapter);
+
mutex_destroy(&adapter->fw_lock);
free_netdev(netdev);
@@ -5879,10 +6044,14 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
be64_to_cpu(session_token));
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
H_SESSION_ERR_DETECTED, session_token, 0, 0);
- if (rc)
+ if (rc) {
netdev_err(netdev,
"H_VIOCTL initiated failover failed, rc %ld\n",
rc);
+ goto last_resort;
+ }
+
+ return count;
last_resort:
netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n");
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 4a8f36e0ab07..fa2d607a7b1b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -919,7 +919,6 @@ struct ibmvnic_adapter {
int login_rsp_buf_sz;
atomic_t running_cap_crqs;
- bool wait_capability;
struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned;
struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned;
@@ -931,6 +930,7 @@ struct ibmvnic_adapter {
struct ibmvnic_tx_pool *tx_pool;
struct ibmvnic_tx_pool *tso_pool;
+ struct completion probe_done;
struct completion init_done;
int init_done_rc;
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index c3def0ee7788..8d06c9d8ff8b 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -115,7 +115,8 @@ enum e1000_boards {
board_pch_lpt,
board_pch_spt,
board_pch_cnp,
- board_pch_tgp
+ board_pch_tgp,
+ board_pch_adp
};
struct e1000_ps_page {
@@ -502,6 +503,7 @@ extern const struct e1000_info e1000_pch_lpt_info;
extern const struct e1000_info e1000_pch_spt_info;
extern const struct e1000_info e1000_pch_cnp_info;
extern const struct e1000_info e1000_pch_tgp_info;
+extern const struct e1000_info e1000_pch_adp_info;
extern const struct e1000_info e1000_es2_info;
void e1000e_ptp_init(struct e1000_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index bcf680e83811..13382df2f2ef 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -630,6 +630,7 @@ struct e1000_phy_info {
bool disable_polarity_correction;
bool is_mdix;
bool polarity_correction;
+ bool reset_disable;
bool speed_downgraded;
bool autoneg_wait_to_complete;
};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 5e4fc9b4e2ad..d60e2016d03c 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2050,6 +2050,10 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
bool blocked = false;
int i = 0;
+ /* Check the PHY (LCD) reset flag */
+ if (hw->phy.reset_disable)
+ return true;
+
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
usleep_range(10000, 11000);
@@ -4136,9 +4140,9 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
return ret_val;
if (!(data & valid_csum_mask)) {
- e_dbg("NVM Checksum Invalid\n");
+ e_dbg("NVM Checksum valid bit not set\n");
- if (hw->mac.type < e1000_pch_cnp) {
+ if (hw->mac.type < e1000_pch_tgp) {
data |= valid_csum_mask;
ret_val = e1000_write_nvm(hw, word, 1, &data);
if (ret_val)
@@ -6021,3 +6025,23 @@ const struct e1000_info e1000_pch_tgp_info = {
.phy_ops = &ich8_phy_ops,
.nvm_ops = &spt_nvm_ops,
};
+
+const struct e1000_info e1000_pch_adp_info = {
+ .mac = e1000_pch_adp,
+ .flags = FLAG_IS_ICH
+ | FLAG_HAS_WOL
+ | FLAG_HAS_HW_TIMESTAMP
+ | FLAG_HAS_CTRLEXT_ON_LOAD
+ | FLAG_HAS_AMT
+ | FLAG_HAS_FLASH
+ | FLAG_HAS_JUMBO_FRAMES
+ | FLAG_APME_IN_WUC,
+ .flags2 = FLAG2_HAS_PHY_STATS
+ | FLAG2_HAS_EEE,
+ .pba = 26,
+ .max_hw_frame_size = 9022,
+ .get_variants = e1000_get_variants_ich8lan,
+ .mac_ops = &ich8_mac_ops,
+ .phy_ops = &ich8_phy_ops,
+ .nvm_ops = &spt_nvm_ops,
+};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 2504b11c3169..638a3ddd7ada 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -271,6 +271,7 @@
#define I217_CGFREG_ENABLE_MTA_RESET 0x0002
#define I217_MEMPWR PHY_REG(772, 26)
#define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010
+#define I217_MEMPWR_MOEM 0x1000
/* Receive Address Initial CRC Calculation */
#define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 635a95927e93..c5bdef3ffe26 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -52,6 +52,7 @@ static const struct e1000_info *e1000_info_tbl[] = {
[board_pch_spt] = &e1000_pch_spt_info,
[board_pch_cnp] = &e1000_pch_cnp_info,
[board_pch_tgp] = &e1000_pch_tgp_info,
+ [board_pch_adp] = &e1000_pch_adp_info,
};
struct e1000_reg_info {
@@ -6341,7 +6342,8 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
u32 mac_data;
u16 phy_data;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+ hw->mac.type >= e1000_pch_adp) {
/* Request ME configure the device for S0ix */
mac_data = er32(H2ME);
mac_data |= E1000_H2ME_START_DPG;
@@ -6490,7 +6492,8 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
u16 phy_data;
u32 i = 0;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+ hw->mac.type >= e1000_pch_adp) {
/* Request ME unconfigure the device from S0ix */
mac_data = er32(H2ME);
mac_data &= ~E1000_H2ME_START_DPG;
@@ -6984,8 +6987,21 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ u16 phy_data;
int rc;
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+ hw->mac.type >= e1000_pch_adp) {
+ /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
+ e1e_rphy(hw, I217_MEMPWR, &phy_data);
+ phy_data |= I217_MEMPWR_MOEM;
+ e1e_wphy(hw, I217_MEMPWR, phy_data);
+
+ /* Disable LCD reset */
+ hw->phy.reset_disable = true;
+ }
+
e1000e_flush_lpic(pdev);
e1000e_pm_freeze(dev);
@@ -7007,6 +7023,8 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ u16 phy_data;
int rc;
/* Introduce S0ix implementation */
@@ -7017,6 +7035,17 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
if (rc)
return rc;
+ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+ hw->mac.type >= e1000_pch_adp) {
+ /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
+ e1e_rphy(hw, I217_MEMPWR, &phy_data);
+ phy_data &= ~I217_MEMPWR_MOEM;
+ e1e_wphy(hw, I217_MEMPWR, phy_data);
+
+ /* Enable LCD reset */
+ hw->phy.reset_disable = false;
+ }
+
return e1000e_pm_thaw(dev);
}
@@ -7898,22 +7927,22 @@ static const struct pci_device_id e1000_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_tgp },
- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_tgp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_adp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_adp },
{ 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
};
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 4d939af0a626..80c5cecaf2b5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -144,6 +144,7 @@ enum i40e_state_t {
__I40E_VIRTCHNL_OP_PENDING,
__I40E_RECOVERY_MODE,
__I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */
+ __I40E_IN_REMOVE,
__I40E_VFS_RELEASING,
/* This must be last as it determines the size of the BITMAP */
__I40E_STATE_SIZE__,
@@ -174,7 +175,6 @@ enum i40e_interrupt_policy {
struct i40e_lump_tracking {
u16 num_entries;
- u16 search_hint;
u16 list[0];
#define I40E_PILE_VALID_BIT 0x8000
#define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2)
@@ -848,12 +848,12 @@ struct i40e_vsi {
struct rtnl_link_stats64 net_stats_offsets;
struct i40e_eth_stats eth_stats;
struct i40e_eth_stats eth_stats_offsets;
- u32 tx_restart;
- u32 tx_busy;
+ u64 tx_restart;
+ u64 tx_busy;
u64 tx_linearize;
u64 tx_force_wb;
- u32 rx_buf_failed;
- u32 rx_page_failed;
+ u64 rx_buf_failed;
+ u64 rx_page_failed;
/* These are containers of ring pointers, allocated at run-time */
struct i40e_ring **rx_rings;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 2c1b1da1220e..9db5001297c7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
(unsigned long int)vsi->net_stats_offsets.rx_compressed,
(unsigned long int)vsi->net_stats_offsets.tx_compressed);
dev_info(&pf->pdev->dev,
- " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n",
+ " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n",
vsi->tx_restart, vsi->tx_busy,
vsi->rx_buf_failed, vsi->rx_page_failed);
rcu_read_lock();
@@ -742,10 +742,8 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
vsi = pf->vsi[vf->lan_vsi_idx];
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
- dev_info(&pf->pdev->dev, " num MDD=%lld, invalid msg=%lld, valid msg=%lld\n",
- vf->num_mdd_events,
- vf->num_invalid_msgs,
- vf->num_valid_msgs);
+ dev_info(&pf->pdev->dev, " num MDD=%lld\n",
+ vf->num_mdd_events);
} else {
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2a3d8aef7f4e..31b03fe78d3b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -196,10 +196,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
* @id: an owner id to stick on the items assigned
*
* Returns the base item index of the lump, or negative for error
- *
- * The search_hint trick and lack of advanced fit-finding only work
- * because we're highly likely to have all the same size lump requests.
- * Linear search time and any fragmentation should be minimal.
**/
static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
u16 needed, u16 id)
@@ -214,8 +210,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
return -EINVAL;
}
- /* start the linear search with an imperfect hint */
- i = pile->search_hint;
+ /* Allocate last queue in the pile for FDIR VSI queue
+ * so it doesn't fragment the qp_pile
+ */
+ if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
+ if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
+ dev_err(&pf->pdev->dev,
+ "Cannot allocate queue %d for I40E_VSI_FDIR\n",
+ pile->num_entries - 1);
+ return -ENOMEM;
+ }
+ pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
+ return pile->num_entries - 1;
+ }
+
+ i = 0;
while (i < pile->num_entries) {
/* skip already allocated entries */
if (pile->list[i] & I40E_PILE_VALID_BIT) {
@@ -234,7 +243,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
for (j = 0; j < needed; j++)
pile->list[i+j] = id | I40E_PILE_VALID_BIT;
ret = i;
- pile->search_hint = i + j;
break;
}
@@ -257,7 +265,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
{
int valid_id = (id | I40E_PILE_VALID_BIT);
int count = 0;
- int i;
+ u16 i;
if (!pile || index >= pile->num_entries)
return -EINVAL;
@@ -269,8 +277,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
count++;
}
- if (count && index < pile->search_hint)
- pile->search_hint = index;
return count;
}
@@ -772,9 +778,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
struct rtnl_link_stats64 *ns; /* netdev stats */
struct i40e_eth_stats *oes;
struct i40e_eth_stats *es; /* device's eth stats */
- u32 tx_restart, tx_busy;
+ u64 tx_restart, tx_busy;
struct i40e_ring *p;
- u32 rx_page, rx_buf;
+ u64 rx_page, rx_buf;
u64 bytes, packets;
unsigned int start;
u64 tx_linearize;
@@ -10574,15 +10580,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
}
i40e_get_oem_version(&pf->hw);
- if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
- hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
- /* The following delay is necessary for 4.33 firmware and older
- * to recover after EMP reset. 200 ms should suffice but we
- * put here 300 ms to be sure that FW is ready to operate
- * after reset.
- */
- mdelay(300);
+ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
+ /* The following delay is necessary for firmware update. */
+ mdelay(1000);
}
/* re-verify the eeprom if we just had an EMP reset */
@@ -10853,6 +10853,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
bool lock_acquired)
{
int ret;
+
+ if (test_bit(__I40E_IN_REMOVE, pf->state))
+ return;
/* Now we wait for GRST to settle out.
* We don't have to delete the VEBs or VSIs from the hw switch
* because the reset will make them disappear.
@@ -11792,7 +11795,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
return -ENOMEM;
pf->irq_pile->num_entries = vectors;
- pf->irq_pile->search_hint = 0;
/* track first vector for misc interrupts, ignore return */
(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
@@ -12213,6 +12215,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
vsi->req_queue_pairs = queue_count;
i40e_prep_for_reset(pf);
+ if (test_bit(__I40E_IN_REMOVE, pf->state))
+ return pf->alloc_rss_size;
pf->alloc_rss_size = new_rss_size;
@@ -12595,7 +12599,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
goto sw_init_done;
}
pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
- pf->qp_pile->search_hint = 0;
pf->tx_timeout_recovery_level = 1;
@@ -13040,6 +13043,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
if (need_reset)
i40e_prep_for_reset(pf);
+ /* VSI shall be deleted in a moment, just return EINVAL */
+ if (test_bit(__I40E_IN_REMOVE, pf->state))
+ return -EINVAL;
+
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) {
@@ -15930,8 +15937,13 @@ static void i40e_remove(struct pci_dev *pdev)
i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
- while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+ /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
+ * flags, once they are set, i40e_rebuild should not be called as
+ * i40e_prep_for_reset always returns early.
+ */
+ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
usleep_range(1000, 2000);
+ set_bit(__I40E_IN_REMOVE, pf->state);
if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
@@ -16130,6 +16142,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
{
struct i40e_pf *pf = pci_get_drvdata(pdev);
+ if (test_bit(__I40E_IN_REMOVE, pf->state))
+ return;
+
i40e_reset_and_rebuild(pf, false, false);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 8d0588a27a05..1908eed4fa5e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -413,6 +413,9 @@
#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
#define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
#define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
#define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
#define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
#define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index b785d09c19f8..2606e8f0f19b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1377,6 +1377,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
}
/**
+ * i40e_sync_vfr_reset
+ * @hw: pointer to hw struct
+ * @vf_id: VF identifier
+ *
+ * Before trigger hardware reset, we need to know if no other process has
+ * reserved the hardware for any reset operations. This check is done by
+ * examining the status of the RSTAT1 register used to signal the reset.
+ **/
+static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) {
+ reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) &
+ I40E_VFINT_ICR0_ADMINQ_MASK;
+ if (reg)
+ return 0;
+
+ usleep_range(100, 200);
+ }
+
+ return -EAGAIN;
+}
+
+/**
* i40e_trigger_vf_reset
* @vf: pointer to the VF structure
* @flr: VFLR was issued or not
@@ -1390,9 +1416,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = &pf->hw;
u32 reg, reg_idx, bit_idx;
+ bool vf_active;
+ u32 radq;
/* warn the VF */
- clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+ vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
/* Disable VF's configuration API during reset. The flag is re-enabled
* in i40e_alloc_vf_res(), when it's safe again to access VF's VSI.
@@ -1406,7 +1434,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
* just need to clean up, so don't hit the VFRTRIG register.
*/
if (!flr) {
- /* reset VF using VPGEN_VFRTRIG reg */
+ /* Sync VFR reset before trigger next one */
+ radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) &
+ I40E_VFINT_ICR0_ADMINQ_MASK;
+ if (vf_active && !radq)
+ /* waiting for finish reset by virtual driver */
+ if (i40e_sync_vfr_reset(hw, vf->vf_id))
+ dev_info(&pf->pdev->dev,
+ "Reset VF %d never finished\n",
+ vf->vf_id);
+
+ /* Reset VF using VPGEN_VFRTRIG reg. It is also setting
+ * in progress state in rstat1 register.
+ */
reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK;
wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
@@ -1877,19 +1917,17 @@ sriov_configure_out:
/***********************virtual channel routines******************/
/**
- * i40e_vc_send_msg_to_vf_ex
+ * i40e_vc_send_msg_to_vf
* @vf: pointer to the VF info
* @v_opcode: virtual channel opcode
* @v_retval: virtual channel return value
* @msg: pointer to the msg buffer
* @msglen: msg length
- * @is_quiet: true for not printing unsuccessful return values, false otherwise
*
* send msg to VF
**/
-static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
- u32 v_retval, u8 *msg, u16 msglen,
- bool is_quiet)
+static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+ u32 v_retval, u8 *msg, u16 msglen)
{
struct i40e_pf *pf;
struct i40e_hw *hw;
@@ -1904,25 +1942,6 @@ static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
hw = &pf->hw;
abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
- /* single place to detect unsuccessful return values */
- if (v_retval && !is_quiet) {
- vf->num_invalid_msgs++;
- dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
- vf->vf_id, v_opcode, v_retval);
- if (vf->num_invalid_msgs >
- I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
- dev_err(&pf->pdev->dev,
- "Number of invalid messages exceeded for VF %d\n",
- vf->vf_id);
- dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
- set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
- }
- } else {
- vf->num_valid_msgs++;
- /* reset the invalid counter, if a valid message is received. */
- vf->num_invalid_msgs = 0;
- }
-
aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret) {
@@ -1936,23 +1955,6 @@ static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
}
/**
- * i40e_vc_send_msg_to_vf
- * @vf: pointer to the VF info
- * @v_opcode: virtual channel opcode
- * @v_retval: virtual channel return value
- * @msg: pointer to the msg buffer
- * @msglen: msg length
- *
- * send msg to VF
- **/
-static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
- u32 v_retval, u8 *msg, u16 msglen)
-{
- return i40e_vc_send_msg_to_vf_ex(vf, v_opcode, v_retval,
- msg, msglen, false);
-}
-
-/**
* i40e_vc_send_resp_to_vf
* @vf: pointer to the VF info
* @opcode: operation code
@@ -2618,6 +2620,59 @@ error_param:
}
/**
+ * i40e_check_enough_queue - find big enough queue number
+ * @vf: pointer to the VF info
+ * @needed: the number of items needed
+ *
+ * Returns the base item index of the queue, or negative for error
+ **/
+static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed)
+{
+ unsigned int i, cur_queues, more, pool_size;
+ struct i40e_lump_tracking *pile;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi;
+
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ cur_queues = vsi->alloc_queue_pairs;
+
+ /* if current allocated queues are enough for need */
+ if (cur_queues >= needed)
+ return vsi->base_queue;
+
+ pile = pf->qp_pile;
+ if (cur_queues > 0) {
+ /* if the allocated queues are not zero
+ * just check if there are enough queues for more
+ * behind the allocated queues.
+ */
+ more = needed - cur_queues;
+ for (i = vsi->base_queue + cur_queues;
+ i < pile->num_entries; i++) {
+ if (pile->list[i] & I40E_PILE_VALID_BIT)
+ break;
+
+ if (more-- == 1)
+ /* there is enough */
+ return vsi->base_queue;
+ }
+ }
+
+ pool_size = 0;
+ for (i = 0; i < pile->num_entries; i++) {
+ if (pile->list[i] & I40E_PILE_VALID_BIT) {
+ pool_size = 0;
+ continue;
+ }
+ if (needed <= ++pool_size)
+ /* there is enough */
+ return i;
+ }
+
+ return -ENOMEM;
+}
+
+/**
* i40e_vc_request_queues_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2651,6 +2706,12 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
req_pairs - cur_pairs,
pf->queues_left);
vfres->num_queue_pairs = pf->queues_left + cur_pairs;
+ } else if (i40e_check_enough_queue(vf, req_pairs) < 0) {
+ dev_warn(&pf->pdev->dev,
+ "VF %d requested %d more queues, but there is not enough for it.\n",
+ vf->vf_id,
+ req_pairs - cur_pairs);
+ vfres->num_queue_pairs = cur_pairs;
} else {
/* successful request */
vf->num_req_queues = req_pairs;
@@ -2723,7 +2784,6 @@ error_param:
* i40e_check_vf_permission
* @vf: pointer to the VF info
* @al: MAC address list from virtchnl
- * @is_quiet: set true for printing msg without opcode info, false otherwise
*
* Check that the given list of MAC addresses is allowed. Will return -EPERM
* if any address in the list is not valid. Checks the following conditions:
@@ -2738,8 +2798,7 @@ error_param:
* addresses might not be accurate.
**/
static inline int i40e_check_vf_permission(struct i40e_vf *vf,
- struct virtchnl_ether_addr_list *al,
- bool *is_quiet)
+ struct virtchnl_ether_addr_list *al)
{
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
@@ -2747,7 +2806,6 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
int mac2add_cnt = 0;
int i;
- *is_quiet = false;
for (i = 0; i < al->num_elements; i++) {
struct i40e_mac_filter *f;
u8 *addr = al->list[i].addr;
@@ -2771,7 +2829,6 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
!ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
- *is_quiet = true;
return -EPERM;
}
@@ -2822,7 +2879,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
(struct virtchnl_ether_addr_list *)msg;
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = NULL;
- bool is_quiet = false;
i40e_status ret = 0;
int i;
@@ -2839,7 +2895,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
*/
spin_lock_bh(&vsi->mac_filter_hash_lock);
- ret = i40e_check_vf_permission(vf, al, &is_quiet);
+ ret = i40e_check_vf_permission(vf, al);
if (ret) {
spin_unlock_bh(&vsi->mac_filter_hash_lock);
goto error_param;
@@ -2877,8 +2933,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
error_param:
/* send the response to the VF */
- return i40e_vc_send_msg_to_vf_ex(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
- ret, NULL, 0, is_quiet);
+ return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+ ret, NULL, 0);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 49575a640a84..a554d0a0b09b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -10,8 +10,6 @@
#define I40E_VIRTCHNL_SUPPORTED_QTYPES 2
-#define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED 10
-
#define I40E_VLAN_PRIORITY_SHIFT 13
#define I40E_VLAN_MASK 0xFFF
#define I40E_PRIORITY_MASK 0xE000
@@ -19,6 +17,7 @@
#define I40E_MAX_VF_PROMISC_FLAGS 3
#define I40E_VF_STATE_WAIT_COUNT 20
+#define I40E_VFR_WAIT_COUNT 100
/* Various queue ctrls */
enum i40e_queue_ctrl {
@@ -91,9 +90,6 @@ struct i40e_vf {
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
u8 num_req_queues; /* num of requested qps */
u64 num_mdd_events; /* num of mdd events detected */
- /* num of continuous malformed or invalid msgs detected */
- u64 num_invalid_msgs;
- u64 num_valid_msgs; /* num of valid msgs detected */
unsigned long vf_caps; /* vf's adv. capabilities */
unsigned long vf_states; /* vf's runtime states */
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 59806d1f7e79..4babe4705a55 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -201,6 +201,10 @@ enum iavf_state_t {
__IAVF_RUNNING, /* opened, working */
};
+enum iavf_critical_section_t {
+ __IAVF_IN_REMOVE_TASK, /* device being removed */
+};
+
#define IAVF_CLOUD_FIELD_OMAC 0x01
#define IAVF_CLOUD_FIELD_IMAC 0x02
#define IAVF_CLOUD_FIELD_IVLAN 0x04
@@ -246,7 +250,6 @@ struct iavf_adapter {
struct list_head mac_filter_list;
struct mutex crit_lock;
struct mutex client_lock;
- struct mutex remove_lock;
/* Lock to protect accesses to MAC and VLAN lists */
spinlock_t mac_vlan_list_lock;
char misc_vector_name[IFNAMSIZ + 9];
@@ -284,6 +287,8 @@ struct iavf_adapter {
#define IAVF_FLAG_LEGACY_RX BIT(15)
#define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16)
#define IAVF_FLAG_QUEUES_DISABLED BIT(17)
+#define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18)
+#define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20)
/* duplicates for common code */
#define IAVF_FLAG_DCB_ENABLED 0
/* flags for admin queue service task */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8125b9120615..0e178a0a59c5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -302,8 +302,9 @@ static irqreturn_t iavf_msix_aq(int irq, void *data)
rd32(hw, IAVF_VFINT_ICR01);
rd32(hw, IAVF_VFINT_ICR0_ENA1);
- /* schedule work on the private workqueue */
- queue_work(iavf_wq, &adapter->adminq_task);
+ if (adapter->state != __IAVF_REMOVE)
+ /* schedule work on the private workqueue */
+ queue_work(iavf_wq, &adapter->adminq_task);
return IRQ_HANDLED;
}
@@ -1136,8 +1137,7 @@ void iavf_down(struct iavf_adapter *adapter)
rss->state = IAVF_ADV_RSS_DEL_REQUEST;
spin_unlock_bh(&adapter->adv_rss_lock);
- if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
- adapter->state != __IAVF_RESETTING) {
+ if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) {
/* cancel any current operation */
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
/* Schedule operations to close down the HW. Don't wait
@@ -2120,7 +2120,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
"Requested %d queues, but PF only gave us %d.\n",
num_req_queues,
adapter->vsi_res->num_queue_pairs);
- adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
iavf_schedule_reset(adapter);
@@ -2374,17 +2374,22 @@ static void iavf_watchdog_task(struct work_struct *work)
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
- if (!mutex_trylock(&adapter->crit_lock))
+ if (!mutex_trylock(&adapter->crit_lock)) {
+ if (adapter->state == __IAVF_REMOVE)
+ return;
+
goto restart_watchdog;
+ }
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
iavf_change_state(adapter, __IAVF_COMM_FAILED);
- if (adapter->flags & IAVF_FLAG_RESET_NEEDED &&
- adapter->state != __IAVF_RESETTING) {
- iavf_change_state(adapter, __IAVF_RESETTING);
+ if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ mutex_unlock(&adapter->crit_lock);
+ queue_work(iavf_wq, &adapter->reset_task);
+ return;
}
switch (adapter->state) {
@@ -2419,6 +2424,15 @@ static void iavf_watchdog_task(struct work_struct *work)
msecs_to_jiffies(1));
return;
case __IAVF_INIT_FAILED:
+ if (test_bit(__IAVF_IN_REMOVE_TASK,
+ &adapter->crit_section)) {
+ /* Do not update the state and do not reschedule
+ * watchdog task, iavf_remove should handle this state
+ * as it can loop forever
+ */
+ mutex_unlock(&adapter->crit_lock);
+ return;
+ }
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
dev_err(&adapter->pdev->dev,
"Failed to communicate with PF; waiting before retry\n");
@@ -2435,6 +2449,17 @@ static void iavf_watchdog_task(struct work_struct *work)
queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ);
return;
case __IAVF_COMM_FAILED:
+ if (test_bit(__IAVF_IN_REMOVE_TASK,
+ &adapter->crit_section)) {
+ /* Set state to __IAVF_INIT_FAILED and perform remove
+ * steps. Remove IAVF_FLAG_PF_COMMS_FAILED so the task
+ * doesn't bring the state back to __IAVF_COMM_FAILED.
+ */
+ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+ mutex_unlock(&adapter->crit_lock);
+ return;
+ }
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
@@ -2507,7 +2532,8 @@ static void iavf_watchdog_task(struct work_struct *work)
schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
mutex_unlock(&adapter->crit_lock);
restart_watchdog:
- queue_work(iavf_wq, &adapter->adminq_task);
+ if (adapter->state >= __IAVF_DOWN)
+ queue_work(iavf_wq, &adapter->adminq_task);
if (adapter->aq_required)
queue_delayed_work(iavf_wq, &adapter->watchdog_task,
msecs_to_jiffies(20));
@@ -2515,6 +2541,13 @@ restart_watchdog:
queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
}
+/**
+ * iavf_disable_vf - disable VF
+ * @adapter: board private structure
+ *
+ * Set communication failed flag and free all resources.
+ * NOTE: This function is expected to be called with crit_lock being held.
+ **/
static void iavf_disable_vf(struct iavf_adapter *adapter)
{
struct iavf_mac_filter *f, *ftmp;
@@ -2569,7 +2602,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
iavf_shutdown_adminq(&adapter->hw);
adapter->netdev->flags &= ~IFF_UP;
- mutex_unlock(&adapter->crit_lock);
adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
iavf_change_state(adapter, __IAVF_DOWN);
wake_up(&adapter->down_waitqueue);
@@ -2601,13 +2633,13 @@ static void iavf_reset_task(struct work_struct *work)
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
- if (mutex_is_locked(&adapter->remove_lock))
- return;
+ if (!mutex_trylock(&adapter->crit_lock)) {
+ if (adapter->state != __IAVF_REMOVE)
+ queue_work(iavf_wq, &adapter->reset_task);
- if (iavf_lock_timeout(&adapter->crit_lock, 200)) {
- schedule_work(&adapter->reset_task);
return;
}
+
while (!mutex_trylock(&adapter->client_lock))
usleep_range(500, 1000);
if (CLIENT_ENABLED(adapter)) {
@@ -2662,6 +2694,7 @@ static void iavf_reset_task(struct work_struct *work)
reg_val);
iavf_disable_vf(adapter);
mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -2670,8 +2703,7 @@ continue_reset:
* ndo_open() returning, so we can't assume it means all our open
* tasks have finished, since we're not holding the rtnl_lock here.
*/
- running = ((adapter->state == __IAVF_RUNNING) ||
- (adapter->state == __IAVF_RESETTING));
+ running = adapter->state == __IAVF_RUNNING;
if (running) {
netdev->flags &= ~IFF_UP;
@@ -2701,7 +2733,8 @@ continue_reset:
err);
adapter->aq_required = 0;
- if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+ if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+ (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
err = iavf_reinit_interrupt_scheme(adapter);
if (err)
goto reset_err;
@@ -2773,12 +2806,13 @@ continue_reset:
if (err)
goto reset_err;
- if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+ if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+ (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
err = iavf_request_traffic_irqs(adapter, netdev->name);
if (err)
goto reset_err;
- adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->flags &= ~IAVF_FLAG_REINIT_MSIX_NEEDED;
}
iavf_configure(adapter);
@@ -2793,6 +2827,9 @@ continue_reset:
iavf_change_state(adapter, __IAVF_DOWN);
wake_up(&adapter->down_waitqueue);
}
+
+ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+
mutex_unlock(&adapter->client_lock);
mutex_unlock(&adapter->crit_lock);
@@ -2826,13 +2863,19 @@ static void iavf_adminq_task(struct work_struct *work)
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
goto out;
+ if (!mutex_trylock(&adapter->crit_lock)) {
+ if (adapter->state == __IAVF_REMOVE)
+ return;
+
+ queue_work(iavf_wq, &adapter->adminq_task);
+ goto out;
+ }
+
event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
if (!event.msg_buf)
goto out;
- if (iavf_lock_timeout(&adapter->crit_lock, 200))
- goto freedom;
do {
ret = iavf_clean_arq_element(hw, &event, &pending);
v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2848,6 +2891,24 @@ static void iavf_adminq_task(struct work_struct *work)
} while (pending);
mutex_unlock(&adapter->crit_lock);
+ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
+ if (adapter->netdev_registered ||
+ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+ struct net_device *netdev = adapter->netdev;
+
+ rtnl_lock();
+ netdev_update_features(netdev);
+ rtnl_unlock();
+ /* Request VLAN offload settings */
+ if (VLAN_V2_ALLOWED(adapter))
+ iavf_set_vlan_offload_features
+ (adapter, 0, netdev->features);
+
+ iavf_set_queue_vlan_tag_loc(adapter);
+ }
+
+ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ }
if ((adapter->flags &
(IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
adapter->state == __IAVF_RESETTING)
@@ -3800,11 +3861,12 @@ static int iavf_close(struct net_device *netdev)
struct iavf_adapter *adapter = netdev_priv(netdev);
int status;
- if (adapter->state <= __IAVF_DOWN_PENDING)
- return 0;
+ mutex_lock(&adapter->crit_lock);
- while (!mutex_trylock(&adapter->crit_lock))
- usleep_range(500, 1000);
+ if (adapter->state <= __IAVF_DOWN_PENDING) {
+ mutex_unlock(&adapter->crit_lock);
+ return 0;
+ }
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
if (CLIENT_ENABLED(adapter))
@@ -3853,8 +3915,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
iavf_notify_client_l2_params(&adapter->vsi);
adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
}
- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(iavf_wq, &adapter->reset_task);
+
+ if (netif_running(netdev)) {
+ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+ queue_work(iavf_wq, &adapter->reset_task);
+ }
return 0;
}
@@ -4431,7 +4496,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
mutex_init(&adapter->crit_lock);
mutex_init(&adapter->client_lock);
- mutex_init(&adapter->remove_lock);
mutex_init(&hw->aq.asq_mutex);
mutex_init(&hw->aq.arq_mutex);
@@ -4547,7 +4611,6 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
static void iavf_remove(struct pci_dev *pdev)
{
struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
- enum iavf_state_t prev_state = adapter->last_state;
struct net_device *netdev = adapter->netdev;
struct iavf_fdir_fltr *fdir, *fdirtmp;
struct iavf_vlan_filter *vlf, *vlftmp;
@@ -4556,14 +4619,37 @@ static void iavf_remove(struct pci_dev *pdev)
struct iavf_cloud_filter *cf, *cftmp;
struct iavf_hw *hw = &adapter->hw;
int err;
- /* Indicate we are in remove and not to run reset_task */
- mutex_lock(&adapter->remove_lock);
- cancel_work_sync(&adapter->reset_task);
+
+ /* When reboot/shutdown is in progress no need to do anything
+ * as the adapter is already REMOVE state that was set during
+ * iavf_shutdown() callback.
+ */
+ if (adapter->state == __IAVF_REMOVE)
+ return;
+
+ set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
+ /* Wait until port initialization is complete.
+ * There are flows where register/unregister netdev may race.
+ */
+ while (1) {
+ mutex_lock(&adapter->crit_lock);
+ if (adapter->state == __IAVF_RUNNING ||
+ adapter->state == __IAVF_DOWN ||
+ adapter->state == __IAVF_INIT_FAILED) {
+ mutex_unlock(&adapter->crit_lock);
+ break;
+ }
+
+ mutex_unlock(&adapter->crit_lock);
+ usleep_range(500, 1000);
+ }
cancel_delayed_work_sync(&adapter->watchdog_task);
- cancel_delayed_work_sync(&adapter->client_task);
+
if (adapter->netdev_registered) {
- unregister_netdev(netdev);
+ rtnl_lock();
+ unregister_netdevice(netdev);
adapter->netdev_registered = false;
+ rtnl_unlock();
}
if (CLIENT_ALLOWED(adapter)) {
err = iavf_lan_del_device(adapter);
@@ -4572,6 +4658,10 @@ static void iavf_remove(struct pci_dev *pdev)
err);
}
+ mutex_lock(&adapter->crit_lock);
+ dev_info(&adapter->pdev->dev, "Remove device\n");
+ iavf_change_state(adapter, __IAVF_REMOVE);
+
iavf_request_reset(adapter);
msleep(50);
/* If the FW isn't responding, kick it once, but only once. */
@@ -4579,37 +4669,24 @@ static void iavf_remove(struct pci_dev *pdev)
iavf_request_reset(adapter);
msleep(50);
}
- if (iavf_lock_timeout(&adapter->crit_lock, 5000))
- dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
- dev_info(&adapter->pdev->dev, "Removing device\n");
+ iavf_misc_irq_disable(adapter);
/* Shut down all the garbage mashers on the detention level */
- iavf_change_state(adapter, __IAVF_REMOVE);
+ cancel_work_sync(&adapter->reset_task);
+ cancel_delayed_work_sync(&adapter->watchdog_task);
+ cancel_work_sync(&adapter->adminq_task);
+ cancel_delayed_work_sync(&adapter->client_task);
+
adapter->aq_required = 0;
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
iavf_free_all_tx_resources(adapter);
iavf_free_all_rx_resources(adapter);
- iavf_misc_irq_disable(adapter);
iavf_free_misc_irq(adapter);
- /* In case we enter iavf_remove from erroneous state, free traffic irqs
- * here, so as to not cause a kernel crash, when calling
- * iavf_reset_interrupt_capability.
- */
- if ((adapter->last_state == __IAVF_RESETTING &&
- prev_state != __IAVF_DOWN) ||
- (adapter->last_state == __IAVF_RUNNING &&
- !(netdev->flags & IFF_UP)))
- iavf_free_traffic_irqs(adapter);
-
iavf_reset_interrupt_capability(adapter);
iavf_free_q_vectors(adapter);
- cancel_delayed_work_sync(&adapter->watchdog_task);
-
- cancel_work_sync(&adapter->adminq_task);
-
iavf_free_rss(adapter);
if (hw->aq.asq.count)
@@ -4621,8 +4698,6 @@ static void iavf_remove(struct pci_dev *pdev)
mutex_destroy(&adapter->client_lock);
mutex_unlock(&adapter->crit_lock);
mutex_destroy(&adapter->crit_lock);
- mutex_unlock(&adapter->remove_lock);
- mutex_destroy(&adapter->remove_lock);
iounmap(hw->hw_addr);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 5ee1d118fd30..5263cefe46f5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1835,6 +1835,22 @@ void iavf_request_reset(struct iavf_adapter *adapter)
}
/**
+ * iavf_netdev_features_vlan_strip_set - update vlan strip status
+ * @netdev: ptr to netdev being adjusted
+ * @enable: enable or disable vlan strip
+ *
+ * Helper function to change vlan strip status in netdev->features.
+ */
+static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
+ const bool enable)
+{
+ if (enable)
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+ else
+ netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+}
+
+/**
* iavf_virtchnl_completion
* @adapter: adapter structure
* @v_opcode: opcode sent by PF
@@ -2057,8 +2073,18 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
}
break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+ dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+ /* Vlan stripping could not be enabled by ethtool.
+ * Disable it in netdev->features.
+ */
+ iavf_netdev_features_vlan_strip_set(netdev, false);
+ break;
case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+ /* Vlan stripping could not be disabled by ethtool.
+ * Enable it in netdev->features.
+ */
+ iavf_netdev_features_vlan_strip_set(netdev, true);
break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
@@ -2146,29 +2172,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
sizeof(adapter->vlan_v2_caps)));
iavf_process_config(adapter);
-
- /* unlock crit_lock before acquiring rtnl_lock as other
- * processes holding rtnl_lock could be waiting for the same
- * crit_lock
- */
- mutex_unlock(&adapter->crit_lock);
- /* VLAN capabilities can change during VFR, so make sure to
- * update the netdev features with the new capabilities
- */
- rtnl_lock();
- netdev_update_features(netdev);
- rtnl_unlock();
- if (iavf_lock_timeout(&adapter->crit_lock, 10000))
- dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n",
- __FUNCTION__);
-
- /* Request VLAN offload settings */
- if (VLAN_V2_ALLOWED(adapter))
- iavf_set_vlan_offload_features(adapter, 0,
- netdev->features);
-
- iavf_set_queue_vlan_tag_loc(adapter);
-
+ adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
}
break;
case VIRTCHNL_OP_ENABLE_QUEUES:
@@ -2334,6 +2338,20 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
+ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+ /* PF enabled vlan strip on this VF.
+ * Update netdev->features if needed to be in sync with ethtool.
+ */
+ if (!v_retval)
+ iavf_netdev_features_vlan_strip_set(netdev, true);
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+ /* PF disabled vlan strip on this VF.
+ * Update netdev->features if needed to be in sync with ethtool.
+ */
+ if (!v_retval)
+ iavf_netdev_features_vlan_strip_set(netdev, false);
+ break;
default:
if (adapter->current_op && (v_opcode != adapter->current_op))
dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 4e16d185077d..bea1d1e39fa2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -280,7 +280,6 @@ enum ice_pf_state {
ICE_VFLR_EVENT_PENDING,
ICE_FLTR_OVERFLOW_PROMISC,
ICE_VF_DIS,
- ICE_VF_DEINIT_IN_PROGRESS,
ICE_CFG_BUSY,
ICE_SERVICE_SCHED,
ICE_SERVICE_DIS,
@@ -483,6 +482,8 @@ enum ice_pf_flags {
ICE_FLAG_VF_TRUE_PROMISC_ENA,
ICE_FLAG_MDD_AUTO_RESET_VF,
ICE_FLAG_LINK_LENIENT_MODE_ENA,
+ ICE_FLAG_PLUG_AUX_DEV,
+ ICE_FLAG_MTU_CHANGED,
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -887,7 +888,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
set_bit(ICE_FLAG_AUX_ENA, pf->flags);
- ice_plug_aux_dev(pf);
+ set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
}
}
@@ -897,7 +898,16 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
*/
static inline void ice_clear_rdma_cap(struct ice_pf *pf)
{
- ice_unplug_aux_dev(pf);
+ /* We can directly unplug aux device here only if the flag bit
+ * ICE_FLAG_PLUG_AUX_DEV is not set because ice_unplug_aux_dev()
+ * could race with ice_plug_aux_dev() called from
+ * ice_service_task(). In this case we only clear that bit now and
+ * aux device will be unplugged later once ice_plug_aux_device()
+ * called from ice_service_task() finishes (see ice_service_task()).
+ */
+ if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+ ice_unplug_aux_dev(pf);
+
clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 408d15a5b0e3..e2af99a763ed 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3340,9 +3340,10 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
!ice_fw_supports_report_dflt_cfg(hw)) {
- struct ice_link_default_override_tlv tlv;
+ struct ice_link_default_override_tlv tlv = { 0 };
- if (ice_get_link_default_override(&tlv, pi))
+ status = ice_get_link_default_override(&tlv, pi);
+ if (status)
goto out;
if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index 864692b157b6..73edc24d81d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -44,6 +44,7 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac)
ctrl_vsi->rxq_map[vf->vf_id];
rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
rule_info.flags_info.act_valid = true;
+ rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
vf->repr->mac_rule);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e2e3ef7fba7f..a5dc9824e255 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2298,7 +2298,7 @@ ice_set_link_ksettings(struct net_device *netdev,
if (err)
goto done;
- curr_link_speed = pi->phy.link_info.link_speed;
+ curr_link_speed = pi->phy.curr_user_speed_req;
adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
/* If speed didn't get set, set it to what it currently is.
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index e375ac849aec..4f954db01b92 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -204,11 +204,7 @@ ice_lag_unlink(struct ice_lag *lag,
lag->upper_netdev = NULL;
}
- if (lag->peer_netdev) {
- dev_put(lag->peer_netdev);
- lag->peer_netdev = NULL;
- }
-
+ lag->peer_netdev = NULL;
ice_set_sriov_cap(pf);
ice_set_rdma_cap(pf);
lag->bonded = false;
@@ -216,6 +212,32 @@ ice_lag_unlink(struct ice_lag *lag,
}
/**
+ * ice_lag_unregister - handle netdev unregister events
+ * @lag: LAG info struct
+ * @netdev: netdev reporting the event
+ */
+static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev)
+{
+ struct ice_pf *pf = lag->pf;
+
+ /* check to see if this event is for this netdev
+ * check that we are in an aggregate
+ */
+ if (netdev != lag->netdev || !lag->bonded)
+ return;
+
+ if (lag->upper_netdev) {
+ dev_put(lag->upper_netdev);
+ lag->upper_netdev = NULL;
+ ice_set_sriov_cap(pf);
+ ice_set_rdma_cap(pf);
+ }
+ /* perform some cleanup in case we come back */
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+}
+
+/**
* ice_lag_changeupper_event - handle LAG changeupper event
* @lag: LAG info struct
* @ptr: opaque pointer data
@@ -307,7 +329,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
ice_lag_info_event(lag, ptr);
break;
case NETDEV_UNREGISTER:
- ice_lag_unlink(lag, ptr);
+ ice_lag_unregister(lag, netdev);
break;
default:
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index d981dc6f2323..85a612838a89 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -568,6 +568,7 @@ struct ice_tx_ctx_desc {
(0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
#define ICE_TXD_CTX_QW1_MSS_S 50
+#define ICE_TXD_CTX_MIN_MSS 64
#define ICE_TXD_CTX_QW1_VSI_S 50
#define ICE_TXD_CTX_QW1_VSI_M (0x3FFULL << ICE_TXD_CTX_QW1_VSI_S)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0c187cf04fcf..53256aca27c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1684,6 +1684,12 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
if (status)
dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
vsi_num, status);
+
+ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_ESP_SPI,
+ ICE_FLOW_SEG_HDR_ESP);
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for esp/spi flow, vsi = %d, error = %d\n",
+ vsi_num, status);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 30814435f779..b7e8744b0c0a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1799,7 +1799,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
* reset, so print the event prior to reset.
*/
ice_print_vf_rx_mdd_event(vf);
+ mutex_lock(&pf->vf[i].cfg_lock);
ice_reset_vf(&pf->vf[i], false);
+ mutex_unlock(&pf->vf[i].cfg_lock);
}
}
}
@@ -2253,6 +2255,30 @@ static void ice_service_task(struct work_struct *work)
return;
}
+ if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
+ /* Plug aux device per request */
+ ice_plug_aux_dev(pf);
+
+ /* Mark plugging as done but check whether unplug was
+ * requested during ice_plug_aux_dev() call
+ * (e.g. from ice_clear_rdma_cap()) and if so then
+ * plug aux device.
+ */
+ if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+ ice_unplug_aux_dev(pf);
+ }
+
+ if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
+ struct iidc_event *event;
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (event) {
+ set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+ ice_send_event_to_aux(pf, event);
+ kfree(event);
+ }
+ }
+
ice_clean_adminq_subtask(pf);
ice_check_media_subtask(pf);
ice_check_for_hang_subtask(pf);
@@ -3018,7 +3044,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
struct iidc_event *event;
ena_mask &= ~ICE_AUX_CRIT_ERR;
- event = kzalloc(sizeof(*event), GFP_KERNEL);
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
if (event) {
set_bit(IIDC_EVENT_CRIT_ERR, event->type);
/* report the entire OICR value to AUX driver */
@@ -4854,7 +4880,6 @@ static void ice_remove(struct pci_dev *pdev)
ice_devlink_unregister_params(pf);
set_bit(ICE_DOWN, pf->state);
- mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
ice_deinit_lag(pf);
if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
ice_ptp_release(pf);
@@ -4862,6 +4887,7 @@ static void ice_remove(struct pci_dev *pdev)
ice_remove_arfs(pf);
ice_setup_mc_magic_wake(pf);
ice_vsi_release_all(pf);
+ mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
ice_set_wake(pf);
ice_free_irq_msix_misc(pf);
ice_for_each_vsi(pf, i) {
@@ -5936,8 +5962,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
u64 pkts = 0, bytes = 0;
ring = READ_ONCE(rings[i]);
- if (ring)
- ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
+ if (!ring)
+ continue;
+ ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
vsi_stats->tx_packets += pkts;
vsi_stats->tx_bytes += bytes;
vsi->tx_restart += ring->tx_stats.restart_q;
@@ -6817,7 +6844,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
- struct iidc_event *event;
u8 count = 0;
int err = 0;
@@ -6852,14 +6878,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return -EBUSY;
}
- event = kzalloc(sizeof(*event), GFP_KERNEL);
- if (!event)
- return -ENOMEM;
-
- set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
- ice_send_event_to_aux(pf, event);
- clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
-
netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */
@@ -6867,21 +6885,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
err = ice_down(vsi);
if (err) {
netdev_err(netdev, "change MTU if_down err %d\n", err);
- goto event_after;
+ return err;
}
err = ice_up(vsi);
if (err) {
netdev_err(netdev, "change MTU if_up err %d\n", err);
- goto event_after;
+ return err;
}
}
netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
-event_after:
- set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
- ice_send_event_to_aux(pf, event);
- kfree(event);
+ set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
return err;
}
@@ -8525,6 +8540,7 @@ ice_features_check(struct sk_buff *skb,
struct net_device __always_unused *netdev,
netdev_features_t features)
{
+ bool gso = skb_is_gso(skb);
size_t len;
/* No point in doing any of this if neither checksum nor GSO are
@@ -8537,24 +8553,32 @@ ice_features_check(struct sk_buff *skb,
/* We cannot support GSO if the MSS is going to be less than
* 64 bytes. If it is then we need to drop support for GSO.
*/
- if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+ if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
features &= ~NETIF_F_GSO_MASK;
- len = skb_network_header(skb) - skb->data;
+ len = skb_network_offset(skb);
if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
goto out_rm_features;
- len = skb_transport_header(skb) - skb_network_header(skb);
+ len = skb_network_header_len(skb);
if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
goto out_rm_features;
if (skb->encapsulation) {
- len = skb_inner_network_header(skb) - skb_transport_header(skb);
- if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
- goto out_rm_features;
+ /* this must work for VXLAN frames AND IPIP/SIT frames, and in
+ * the case of IPIP frames, the transport header pointer is
+ * after the inner header! So check to make sure that this
+ * is a GRE or UDP_TUNNEL frame before doing that math.
+ */
+ if (gso && (skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
+ len = skb_inner_network_header(skb) -
+ skb_transport_header(skb);
+ if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+ goto out_rm_features;
+ }
- len = skb_inner_transport_header(skb) -
- skb_inner_network_header(skb);
+ len = skb_inner_network_header_len(skb);
if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
goto out_rm_features;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index dc1b0e9e6df5..695b6dd61dc2 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -47,6 +47,7 @@ enum ice_protocol_type {
enum ice_sw_tunnel_type {
ICE_NON_TUN = 0,
+ ICE_SW_TUN_AND_NON_TUN,
ICE_SW_TUN_VXLAN,
ICE_SW_TUN_GENEVE,
ICE_SW_TUN_NVGRE,
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index ae291d442539..000c39d163a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1533,9 +1533,12 @@ exit:
static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta)
{
struct timespec64 now, then;
+ int ret;
then = ns_to_timespec64(delta);
- ice_ptp_gettimex64(info, &now, NULL);
+ ret = ice_ptp_gettimex64(info, &now, NULL);
+ if (ret)
+ return ret;
now = timespec64_add(now, then);
return ice_ptp_settime64(info, (const struct timespec64 *)&now);
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 11ae0bee3590..475ec2afa210 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -4537,6 +4537,7 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo,
case ICE_SW_TUN_NVGRE:
prof_type = ICE_PROF_TUN_GRE;
break;
+ case ICE_SW_TUN_AND_NON_TUN:
default:
prof_type = ICE_PROF_ALL;
break;
@@ -5305,7 +5306,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
if (status)
goto err_ice_add_adv_rule;
- if (rinfo->tun_type != ICE_NON_TUN) {
+ if (rinfo->tun_type != ICE_NON_TUN &&
+ rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) {
status = ice_fill_adv_packet_tun(hw, rinfo->tun_type,
s_rule->pdata.lkup_tx_rx.hdr,
pkt_offsets);
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index e8aab664270a..65cf32eb4046 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -709,7 +709,7 @@ ice_tc_set_port(struct flow_match_ports match,
fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT;
else
fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
- fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
+
headers->l4_key.dst_port = match.key->dst;
headers->l4_mask.dst_port = match.mask->dst;
}
@@ -718,7 +718,7 @@ ice_tc_set_port(struct flow_match_ports match,
fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT;
else
fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
- fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
+
headers->l4_key.src_port = match.key->src;
headers->l4_mask.src_port = match.mask->src;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 39b80124d282..1be3cd4b2bef 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -500,8 +500,6 @@ void ice_free_vfs(struct ice_pf *pf)
struct ice_hw *hw = &pf->hw;
unsigned int tmp, i;
- set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
-
if (!pf->vf)
return;
@@ -519,22 +517,26 @@ void ice_free_vfs(struct ice_pf *pf)
else
dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
- /* Avoid wait time by stopping all VFs at the same time */
- ice_for_each_vf(pf, i)
- ice_dis_vf_qs(&pf->vf[i]);
-
tmp = pf->num_alloc_vfs;
pf->num_qps_per_vf = 0;
pf->num_alloc_vfs = 0;
for (i = 0; i < tmp; i++) {
- if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
+ struct ice_vf *vf = &pf->vf[i];
+
+ mutex_lock(&vf->cfg_lock);
+
+ ice_dis_vf_qs(vf);
+
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
/* disable VF qp mappings and set VF disable state */
- ice_dis_vf_mappings(&pf->vf[i]);
- set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
- ice_free_vf_res(&pf->vf[i]);
+ ice_dis_vf_mappings(vf);
+ set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+ ice_free_vf_res(vf);
}
- mutex_destroy(&pf->vf[i].cfg_lock);
+ mutex_unlock(&vf->cfg_lock);
+
+ mutex_destroy(&vf->cfg_lock);
}
if (ice_sriov_free_msix_res(pf))
@@ -570,7 +572,6 @@ void ice_free_vfs(struct ice_pf *pf)
i);
clear_bit(ICE_VF_DIS, pf->state);
- clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
}
@@ -1498,6 +1499,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
ice_for_each_vf(pf, v) {
vf = &pf->vf[v];
+ mutex_lock(&vf->cfg_lock);
+
vf->driver_caps = 0;
ice_vc_set_default_allowlist(vf);
@@ -1512,6 +1515,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
ice_vf_pre_vsi_rebuild(vf);
ice_vf_rebuild_vsi(vf);
ice_vf_post_vsi_rebuild(vf);
+
+ mutex_unlock(&vf->cfg_lock);
}
if (ice_is_eswitch_mode_switchdev(pf))
@@ -1562,6 +1567,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
u32 reg;
int i;
+ lockdep_assert_held(&vf->cfg_lock);
+
dev = ice_pf_to_dev(pf);
if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
@@ -2061,9 +2068,12 @@ void ice_process_vflr_event(struct ice_pf *pf)
bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
/* read GLGEN_VFLRSTAT register to find out the flr VFs */
reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
- if (reg & BIT(bit_idx))
+ if (reg & BIT(bit_idx)) {
/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
+ mutex_lock(&vf->cfg_lock);
ice_reset_vf(vf, true);
+ mutex_unlock(&vf->cfg_lock);
+ }
}
}
@@ -2140,7 +2150,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
if (!vf)
return;
+ mutex_lock(&vf->cfg_lock);
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
}
/**
@@ -2170,24 +2182,6 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
dev = ice_pf_to_dev(pf);
- /* single place to detect unsuccessful return values */
- if (v_retval) {
- vf->num_inval_msgs++;
- dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
- v_opcode, v_retval);
- if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
- dev_err(dev, "Number of invalid messages exceeded for VF %d\n",
- vf->vf_id);
- dev_err(dev, "Use PF Control I/F to enable the VF\n");
- set_bit(ICE_VF_STATE_DIS, vf->vf_states);
- return -EIO;
- }
- } else {
- vf->num_valid_msgs++;
- /* reset the invalid counter, if a valid message is received. */
- vf->num_inval_msgs = 0;
- }
-
aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
@@ -4625,10 +4619,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
struct device *dev;
int err = 0;
- /* if de-init is underway, don't process messages from VF */
- if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state))
- return;
-
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id)) {
err = -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 752487a1bdd6..8f27255cc0cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -14,7 +14,6 @@
#define ICE_MAX_MACADDR_PER_VF 18
/* Malicious Driver Detection */
-#define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED 10
#define ICE_MDD_EVENTS_THRESHOLD 30
/* Static VF transaction/status register def */
@@ -134,8 +133,6 @@ struct ice_vf {
unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
- u64 num_inval_msgs; /* number of continuous invalid msgs */
- u64 num_valid_msgs; /* number of valid msgs detected */
unsigned long vf_caps; /* VF's adv. capabilities */
u8 num_req_qs; /* num of queue pairs requested by VF */
u16 num_mac;
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 5cad31c3c7b0..40dbf4b43234 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -746,8 +746,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
if (ret_val)
return ret_val;
ret_val = igc_write_phy_reg_mdic(hw, offset, data);
- if (ret_val)
- return ret_val;
hw->phy.ops.release(hw);
} else {
ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
@@ -779,8 +777,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
if (ret_val)
return ret_val;
ret_val = igc_read_phy_reg_mdic(hw, offset, data);
- if (ret_val)
- return ret_val;
hw->phy.ops.release(hw);
} else {
ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index b3fd8e5cd85b..6a5e9cf6b5da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -390,12 +390,14 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
u32 cmd_type;
while (budget-- > 0) {
- if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
- !netif_carrier_ok(xdp_ring->netdev)) {
+ if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
work_done = false;
break;
}
+ if (!netif_carrier_ok(xdp_ring->netdev))
+ break;
+
if (!xsk_tx_peek_desc(pool, &desc))
break;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0015fcf1df2b..0f293acd17e8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1984,14 +1984,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
return;
- set_ring_build_skb_enabled(rx_ring);
+ if (PAGE_SIZE < 8192)
+ if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB)
+ set_ring_uses_large_buffer(rx_ring);
- if (PAGE_SIZE < 8192) {
- if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
- return;
+ /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */
+ if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring))
+ return;
- set_ring_uses_large_buffer(rx_ring);
- }
+ set_ring_build_skb_enabled(rx_ring);
}
/**
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 41d11137cde0..5712c3e94be8 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -260,9 +260,9 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
if (ctl & LTQ_DMA_EOP) {
ch->skb_head->protocol = eth_type_trans(ch->skb_head, net_dev);
- netif_receive_skb(ch->skb_head);
net_dev->stats.rx_packets++;
net_dev->stats.rx_bytes += ch->skb_head->len;
+ netif_receive_skb(ch->skb_head);
ch->skb_head = NULL;
ch->skb_tail = NULL;
ret = XRX200_DMA_PACKET_COMPLETE;
diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig
index f99adbf26ab4..04345b929d8e 100644
--- a/drivers/net/ethernet/litex/Kconfig
+++ b/drivers/net/ethernet/litex/Kconfig
@@ -17,7 +17,7 @@ if NET_VENDOR_LITEX
config LITEX_LITEETH
tristate "LiteX Ethernet support"
- depends on OF
+ depends on OF && HAS_IOMEM
help
If you wish to compile a kernel for hardware with a LiteX LiteEth
device then you should answer Y to this.
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 105247582684..143ca8be5eb5 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2704,6 +2704,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids);
static struct platform_device *port_platdev[3];
+static void mv643xx_eth_shared_of_remove(void)
+{
+ int n;
+
+ for (n = 0; n < 3; n++) {
+ platform_device_del(port_platdev[n]);
+ port_platdev[n] = NULL;
+ }
+}
+
static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
struct device_node *pnp)
{
@@ -2740,7 +2750,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
return -EINVAL;
}
- of_get_mac_address(pnp, ppd.mac_addr);
+ ret = of_get_mac_address(pnp, ppd.mac_addr);
+ if (ret)
+ return ret;
mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size);
mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr);
@@ -2804,21 +2816,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
ret = mv643xx_eth_shared_of_add_port(pdev, pnp);
if (ret) {
of_node_put(pnp);
+ mv643xx_eth_shared_of_remove();
return ret;
}
}
return 0;
}
-static void mv643xx_eth_shared_of_remove(void)
-{
- int n;
-
- for (n = 0; n < 3; n++) {
- platform_device_del(port_platdev[n]);
- port_platdev[n] = NULL;
- }
-}
#else
static inline int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
{
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 7cdbf8b8bbf6..1a835b48791b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6870,6 +6870,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
dev->dev.of_node = port_node;
+ port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops;
+ port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops;
+
if (!mvpp2_use_acpi_compat_mode(port_fwnode)) {
port->phylink_config.dev = &dev->dev;
port->phylink_config.type = PHYLINK_NETDEV;
@@ -6940,9 +6943,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port->phylink_config.supported_interfaces);
}
- port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops;
- port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops;
-
phylink = phylink_create(&port->phylink_config, port_fwnode,
phy_mode, &mvpp2_phylink_ops);
if (IS_ERR(phylink)) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 186d00a9ab35..3631d612aaca 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1570,6 +1570,8 @@ static struct mac_ops cgx_mac_ops = {
.mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm,
.mac_pause_frm_config = cgx_lmac_pause_frm_config,
.mac_enadis_ptp_config = cgx_lmac_ptp_config,
+ .mac_rx_tx_enable = cgx_lmac_rx_tx_enable,
+ .mac_tx_enable = cgx_lmac_tx_enable,
};
static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index fc6e7423cbd8..b33e7d1d0851 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -107,6 +107,9 @@ struct mac_ops {
void (*mac_enadis_ptp_config)(void *cgxd,
int lmac_id,
bool enable);
+
+ int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable);
+ int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable);
};
struct cgx {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4e79e918a161..58e2aeebc14f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -732,6 +732,7 @@ enum nix_af_status {
NIX_AF_ERR_BANDPROF_INVAL_REQ = -428,
NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
+ NIX_AF_ERR_LINK_CREDITS = -431,
};
/* For NIX RX vtag action */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index 0fe7ad35e36f..4180376fa676 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -185,7 +185,6 @@ enum npc_kpu_parser_state {
NPC_S_KPU2_QINQ,
NPC_S_KPU2_ETAG,
NPC_S_KPU2_EXDSA,
- NPC_S_KPU2_NGIO,
NPC_S_KPU2_CPT_CTAG,
NPC_S_KPU2_CPT_QINQ,
NPC_S_KPU3_CTAG,
@@ -212,6 +211,7 @@ enum npc_kpu_parser_state {
NPC_S_KPU5_NSH,
NPC_S_KPU5_CPT_IP,
NPC_S_KPU5_CPT_IP6,
+ NPC_S_KPU5_NGIO,
NPC_S_KPU6_IP6_EXT,
NPC_S_KPU6_IP6_HOP_DEST,
NPC_S_KPU6_IP6_ROUT,
@@ -1124,15 +1124,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
NPC_S_KPU1_ETHER, 0xff,
NPC_ETYPE_CTAG,
0xffff,
- NPC_ETYPE_NGIO,
- 0xffff,
- 0x0000,
- 0x0000,
- },
- {
- NPC_S_KPU1_ETHER, 0xff,
- NPC_ETYPE_CTAG,
- 0xffff,
NPC_ETYPE_CTAG,
0xffff,
0x0000,
@@ -1968,6 +1959,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
},
{
NPC_S_KPU2_CTAG, 0xff,
+ NPC_ETYPE_NGIO,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU2_CTAG, 0xff,
NPC_ETYPE_PPPOE,
0xffff,
0x0000,
@@ -2750,15 +2750,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
0x0000,
},
{
- NPC_S_KPU2_NGIO, 0xff,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- 0x0000,
- },
- {
NPC_S_KPU2_CPT_CTAG, 0xff,
NPC_ETYPE_IP,
0xffff,
@@ -5090,6 +5081,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
0x0000,
},
{
+ NPC_S_KPU5_NGIO, 0xff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ },
+ {
NPC_S_NA, 0X00,
0x0000,
0x0000,
@@ -8425,14 +8425,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 12, 0, 0, 0,
- NPC_S_KPU2_NGIO, 12, 1,
- NPC_LID_LA, NPC_LT_LA_ETHER,
- 0,
- 0, 0, 0, 0,
- },
- {
- NPC_ERRLEV_RE, NPC_EC_NOERR,
- 8, 12, 0, 0, 0,
NPC_S_KPU2_CTAG2, 12, 1,
NPC_LID_LA, NPC_LT_LA_ETHER,
NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
@@ -9196,6 +9188,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 2, 0,
+ NPC_S_KPU5_NGIO, 6, 1,
+ NPC_LID_LB, NPC_LT_LB_CTAG,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 2, 0,
NPC_S_KPU5_IP, 14, 1,
NPC_LID_LB, NPC_LT_LB_PPPOE,
@@ -9892,14 +9892,6 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
},
{
NPC_ERRLEV_RE, NPC_EC_NOERR,
- 0, 0, 0, 0, 1,
- NPC_S_NA, 0, 1,
- NPC_LID_LC, NPC_LT_LC_NGIO,
- 0,
- 0, 0, 0, 0,
- },
- {
- NPC_ERRLEV_RE, NPC_EC_NOERR,
8, 0, 6, 2, 0,
NPC_S_KPU5_CPT_IP, 6, 1,
NPC_LID_LB, NPC_LT_LB_CTAG,
@@ -11974,6 +11966,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
0, 0, 0, 0,
},
{
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 0, 1,
+ NPC_S_NA, 0, 1,
+ NPC_LID_LC, NPC_LT_LC_NGIO,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
NPC_ERRLEV_LC, NPC_EC_UNK,
0, 0, 0, 0, 1,
NPC_S_NA, 0, 0,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index e695fa0e82a9..9ea2f6ac38ec 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -30,6 +30,8 @@ static struct mac_ops rpm_mac_ops = {
.mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm,
.mac_pause_frm_config = rpm_lmac_pause_frm_config,
.mac_enadis_ptp_config = rpm_lmac_ptp_config,
+ .mac_rx_tx_enable = rpm_lmac_rx_tx_enable,
+ .mac_tx_enable = rpm_lmac_tx_enable,
};
struct mac_ops *rpm_get_mac_ops(void)
@@ -54,6 +56,43 @@ int rpm_get_nr_lmacs(void *rpmd)
return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL);
}
+int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg, last;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ last = cfg;
+ if (enable)
+ cfg |= RPM_TX_EN;
+ else
+ cfg &= ~(RPM_TX_EN);
+
+ if (cfg != last)
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ return !!(last & RPM_TX_EN);
+}
+
+int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable)
+{
+ rpm_t *rpm = rpmd;
+ u64 cfg;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
+ if (enable)
+ cfg |= RPM_RX_EN | RPM_TX_EN;
+ else
+ cfg &= ~(RPM_RX_EN | RPM_TX_EN);
+ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ return 0;
+}
+
void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable)
{
rpm_t *rpm = rpmd;
@@ -252,23 +291,20 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
if (!rpm || lmac_id >= rpm->lmac_count)
return -ENODEV;
lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
- if (lmac_type == LMAC_MODE_100G_R) {
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
-
- if (enable)
- cfg |= RPMX_MTI_PCS_LBK;
- else
- cfg &= ~RPMX_MTI_PCS_LBK;
- rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
- } else {
- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1);
- if (enable)
- cfg |= RPMX_MTI_PCS_LBK;
- else
- cfg &= ~RPMX_MTI_PCS_LBK;
- rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg);
+
+ if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) {
+ dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n");
+ return 0;
}
+ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
+
+ if (enable)
+ cfg |= RPMX_MTI_PCS_LBK;
+ else
+ cfg &= ~RPMX_MTI_PCS_LBK;
+ rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
+
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index 57c8a687b488..ff580311edd0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -43,6 +43,8 @@
#define RPMX_MTI_STAT_DATA_HI_CDC 0x10038
#define RPM_LMAC_FWI 0xa
+#define RPM_TX_EN BIT_ULL(0)
+#define RPM_RX_EN BIT_ULL(1)
/* Function Declarations */
int rpm_get_nr_lmacs(void *rpmd);
@@ -57,4 +59,6 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat);
int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat);
void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable);
+int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable);
#endif /* RPM_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 3ca6b942ebe2..54e1b27a7dfe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
- if (err)
- dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
+ if (err) {
+ dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr);
+ while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY)
+ ;
+ }
}
static void rvu_reset_all_blocks(struct rvu *rvu)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 66e45d733824..5ed94cfb47d2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -806,6 +806,7 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
u32 rvu_cgx_get_fifolen(struct rvu *rvu);
void *rvu_first_cgx_pdata(struct rvu *rvu);
int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
+int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
int type);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 2ca182a4ce82..8a7ac5a8b821 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -441,16 +441,26 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable)
int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
{
int pf = rvu_get_pf(pcifunc);
+ struct mac_ops *mac_ops;
u8 cgx_id, lmac_id;
+ void *cgxd;
if (!is_cgx_config_permitted(rvu, pcifunc))
return LMAC_AF_ERR_PERM_DENIED;
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+ mac_ops = get_mac_ops(cgxd);
+
+ return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start);
+}
- cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
+int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable)
+{
+ struct mac_ops *mac_ops;
- return 0;
+ mac_ops = get_mac_ops(cgxd);
+ return mac_ops->mac_tx_enable(cgxd, lmac_id, enable);
}
void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index a73a8017e0ee..a79201a9a6f0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -605,6 +605,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
} else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) {
/* Registers that can be accessed from PF */
switch (offset) {
+ case CPT_AF_DIAG:
case CPT_AF_CTL:
case CPT_AF_PF_FUNC:
case CPT_AF_BLK_RST:
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index a09a507369ac..d1eddb769a41 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -1224,6 +1224,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m,
seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n",
sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld);
+ seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n",
+ sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend);
seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb);
seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb);
seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d8b1948aaa0a..97fb61915379 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -512,11 +512,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
lmac_chan_cnt = cfg & 0xFF;
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
- sdp_chan_cnt = cfg & 0xFFF;
-
cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+ sdp_chan_cnt = cfg & 0xFFF;
sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
@@ -2068,8 +2068,8 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
/* enable cgx tx if disabled */
if (is_pf_cgxmapped(rvu, pf)) {
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
- lmac_id, true);
+ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
+ lmac_id, true);
}
cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
@@ -2092,7 +2092,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
rvu_cgx_enadis_rx_bp(rvu, pf, true);
/* restore cgx tx state */
if (restore_tx_en)
- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
return err;
}
@@ -3878,7 +3878,7 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
/* Enable cgx tx if disabled for credits to be back */
if (is_pf_cgxmapped(rvu, pf)) {
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
+ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
lmac_id, true);
}
@@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
}
- rc = -EBUSY;
- poll_tmo = jiffies + usecs_to_jiffies(10000);
+ rc = NIX_AF_ERR_LINK_CREDITS;
+ poll_tmo = jiffies + usecs_to_jiffies(200000);
/* Wait for credits to return */
do {
if (time_after(jiffies, poll_tmo))
@@ -3918,7 +3918,7 @@ exit:
/* Restore state of cgx tx */
if (restore_tx_en)
- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
mutex_unlock(&rvu->rsrc_lock);
return rc;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index c0005a1feee6..91f86d77cd41 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, int index, struct mcam_entry *entry,
bool *enable)
{
+ struct rvu_npc_mcam_rule *rule;
u16 owner, target_func;
struct rvu_pfvf *pfvf;
u64 rx_action;
@@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
*enable = false;
+ /* fix up not needed for the rules added by user(ntuple filters) */
+ list_for_each_entry(rule, &mcam->mcam_rules, list) {
+ if (rule->entry == index)
+ return;
+ }
+
/* copy VF default entry action to the VF mcam entry */
rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
target_func);
@@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
}
/* PF installing VF rule */
- if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
- npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
+ if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries)
+ npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable);
/* Set 'action' */
rvu_write64(rvu, blkaddr,
@@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 pcifunc, u64 rx_action)
{
int actindex, index, bank, entry;
- bool enable;
+ struct rvu_npc_mcam_rule *rule;
+ bool enable, update;
if (!(pcifunc & RVU_PFVF_FUNC_MASK))
return;
@@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
mutex_lock(&mcam->lock);
for (index = 0; index < mcam->bmap_entries; index++) {
if (mcam->entry2target_pffunc[index] == pcifunc) {
+ update = true;
+ /* update not needed for the rules added via ntuple filters */
+ list_for_each_entry(rule, &mcam->mcam_rules, list) {
+ if (rule->entry == index)
+ update = false;
+ }
+ if (!update)
+ continue;
bank = npc_get_bank(mcam, index);
actindex = index;
entry = index & (mcam->banksize - 1);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index ff2b21999f36..19c53e591d0d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1098,14 +1098,6 @@ find_rule:
write_req.cntr = rule->cntr;
}
- err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
- &write_rsp);
- if (err) {
- rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
- if (new)
- kfree(rule);
- return err;
- }
/* update rule */
memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
@@ -1132,6 +1124,18 @@ find_rule:
if (req->default_rule)
pfvf->def_ucast_rule = rule;
+ /* write to mcam entry registers */
+ err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
+ &write_rsp);
+ if (err) {
+ rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
+ if (new) {
+ list_del(&rule->list);
+ kfree(rule);
+ }
+ return err;
+ }
+
/* VF's MAC address is being changed via PF */
if (pf_set_vfs_mac) {
ether_addr_copy(pfvf->default_mac, req->packet.dmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 61e52812983f..14509fc64cce 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -603,6 +603,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
size++;
tar_addr |= ((size - 1) & 0x7) << 4;
}
+ dma_wmb();
memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
/* Perform LMTST flush */
cn10k_lmt_flush(val, tar_addr);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 6080ebd9bd94..d39341e4ab37 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -394,7 +394,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
dst_mdev->msg_size = mbox_hdr->msg_size;
dst_mdev->num_msgs = num_msgs;
err = otx2_sync_mbox_msg(dst_mbox);
- if (err) {
+ /* Error code -EIO indicate there is a communication failure
+ * to the AF. Rest of the error codes indicate that AF processed
+ * VF messages and set the error codes in response messages
+ * (if any) so simply forward responses to VF.
+ */
+ if (err == -EIO) {
dev_warn(pf->dev,
"AF not responding to VF%d messages\n", vf);
/* restore PF mbase and exit */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index cad93f747d0c..73cd0a4b7291 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -554,6 +554,7 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
dev_info(prestera_dev(sw), "using random base mac address\n");
}
of_node_put(base_mac_np);
+ of_node_put(np);
return prestera_hw_switch_mac_set(sw, sw->base_mac);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 17fe05809653..3eacd8739929 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -131,11 +131,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cmd->alloc_lock, flags);
+ lockdep_assert_held(&cmd->alloc_lock);
set_bit(idx, &cmd->bitmask);
- spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
@@ -145,17 +142,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
{
+ struct mlx5_cmd *cmd = ent->cmd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
if (!refcount_dec_and_test(&ent->refcnt))
- return;
+ goto out;
if (ent->idx >= 0) {
- struct mlx5_cmd *cmd = ent->cmd;
-
cmd_free_index(cmd, ent->idx);
up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
}
cmd_free_ent(ent);
+out:
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 812e6810cb3b..c14e06ca64d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -224,7 +224,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
+ struct mlx5_wqe_data_seg data[];
};
struct mlx5e_rx_wqe_ll {
@@ -241,8 +241,8 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
union {
- struct mlx5_mtt inline_mtts[0];
- struct mlx5_klm inline_klms[0];
+ DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
+ DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
};
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 00449df98a5e..c1e07496c89c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -570,7 +570,8 @@ static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
{
- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
ceil, *max_average_bw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 9c076aa20306..b6f5c1bcdbcd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
{
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *priv;
-
- /* A given netdev is not a representor or not a slave of LAG configuration */
- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
- return false;
-
- priv = netdev_priv(netdev);
- rpriv = priv->ppriv;
-
- /* Egress acl forward to vport is supported only non-uplink representor */
- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
}
static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
u16 fwd_vport_num;
int err;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
info = ptr;
lag_info = info->lower_state_info;
/* This is not an event of a representor becoming active slave */
@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
struct net_device *lag_dev;
struct mlx5e_priv *priv;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
priv = netdev_priv(netdev);
rpriv = priv->ppriv;
lag_dev = info->upper_dev;
@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_CHANGELOWERSTATE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index c6d2f8c78db7..48dc121b2cb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
}
br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
- err = register_netdevice_notifier(&br_offloads->netdev_nb);
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
if (err) {
esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
err);
@@ -509,7 +509,9 @@ err_register_swdev_blk:
err_register_swdev:
destroy_workqueue(br_offloads->wq);
err_alloc_wq:
+ rtnl_lock();
mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
}
void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
@@ -524,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
return;
cancel_delayed_work_sync(&br_offloads->update_work);
- unregister_netdevice_notifier(&br_offloads->netdev_nb);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
unregister_switchdev_notifier(&br_offloads->nb);
destroy_workqueue(br_offloads->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index 26efa33de56f..9cc844bd00f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -16,11 +16,13 @@ struct mlx5e_tc_act_parse_state {
unsigned int num_actions;
struct mlx5e_tc_flow *flow;
struct netlink_ext_ack *extack;
+ bool ct_clear;
bool encap;
bool decap;
bool mpls_push;
bool ptype_host;
const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
int if_count;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
index 06ec30cdb269..58cc33f1363d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -27,8 +27,13 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
int err;
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
&attr->parse_attr->mod_hdr_acts,
act, parse_state->extack);
@@ -40,6 +45,8 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
if (mlx5e_is_eswitch_flow(parse_state->flow))
attr->esw_attr->split_count = attr->esw_attr->out_count;
+ parse_state->ct_clear = clear_action;
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index c614fc7fdc9c..2e615e0ba972 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -177,6 +177,12 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
return -ENOMEM;
parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
esw_attr->out_count++;
/* attr->dests[].rep is resolved when we handle encap */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
index 784fc4f68b1e..89ca88c78840 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -22,6 +22,16 @@ tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
return true;
}
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
static int
tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -29,6 +39,7 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5_flow_attr *attr)
{
parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index f832c26ff2c3..70b40ae384e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -35,6 +35,7 @@ enum {
struct mlx5e_tc_flow_parse_attr {
const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 9918ed8c059b..d39d0dae22fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -750,6 +750,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
unsigned long tbl_time_before = 0;
struct mlx5e_encap_entry *e;
struct mlx5e_encap_key key;
@@ -760,6 +761,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
family = ip_tunnel_info_af(tun_info);
key.ip_tun_key = &tun_info->key;
key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
@@ -810,6 +812,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
goto out_err_init;
}
e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
if (err)
goto out_err_init;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index 60952b33b568..c5b1617d556f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[],
struct mlx5e_encap_entry *r)
{
const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
struct udphdr *udp = (struct udphdr *)(buf);
struct mpls_shim_hdr *mpls;
- u32 tun_id;
- tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id));
mpls = (struct mpls_shim_hdr *)(udp + 1);
*ip_proto = IPPROTO_UDP;
udp->dest = tun_key->tp_dst;
- *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true);
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
return 0;
}
@@ -60,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv,
void *headers_v)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
- struct flow_match_enc_keyid enc_keyid;
struct flow_match_mpls match;
void *misc2_c;
void *misc2_v;
- misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
- misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
- return 0;
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
- return 0;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
-
- if (!enc_keyid.mask->keyid)
- return 0;
-
if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
return -EOPNOTSUPP;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
flow_rule_match_mpls(rule, &match);
/* Only support matching the first LSE */
if (match.mask->used_lses != 1)
return -EOPNOTSUPP;
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
MLX5_SET(fte_match_set_misc2, misc2_c,
outer_first_mpls_over_udp.mpls_label,
match.mask->ls[0].mpls_label);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index da169b816665..d4239e3b3c88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -88,9 +88,6 @@ void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
(MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
break;
- case MLX5E_PACKET_MERGE_SHAMPO:
- MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
- break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 4cdf8e5b24c2..b789af07829c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -167,6 +167,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
+
struct mlx5e_shampo_umr {
u16 len;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 338d65e2c9ce..56e10c84a706 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
dseg++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 2db9573a3fe6..b56fea142c24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
/* Tunnel mode */
if (mode == XFRM_MODE_TUNNEL) {
eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
if (xo->proto == IPPROTO_IPV6)
eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
+
+ switch (xo->inner_ipproto) {
+ case IPPROTO_UDP:
eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ fallthrough;
+ case IPPROTO_TCP:
+ /* IP | ESP | IP | [TCP | UDP] */
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ default:
+ break;
+ }
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index b98db50c3418..428881e0adcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -131,14 +131,17 @@ static inline bool
mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
- struct xfrm_offload *xo = xfrm_offload(skb);
+ u8 inner_ipproto;
if (!mlx5e_ipsec_eseg_meta(eseg))
return false;
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
- if (xo->inner_ipproto) {
- eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM;
+ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
+ if (inner_ipproto) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
sq->stats->csum_partial_inner++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 57d755db1cf5..6e80585d731f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1792,7 +1792,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
if (size_read < 0) {
netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
__func__, size_read);
- return 0;
+ return size_read;
}
i += size_read;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bf80fb612449..3667f5ef5990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3616,8 +3616,7 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
goto out;
}
- err = mlx5e_safe_switch_params(priv, &new_params,
- mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+ err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
out:
mutex_unlock(&priv->state_lock);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index b01dacb6f527..b3f7520dfd08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -183,6 +183,13 @@ struct mlx5e_decap_entry {
struct rcu_head rcu;
};
+struct mlx5e_mpls_info {
+ u32 label;
+ u8 tc;
+ u8 bos;
+ u8 ttl;
+};
+
struct mlx5e_encap_entry {
/* attached neigh hash entry */
struct mlx5e_neigh_hash_entry *nhe;
@@ -196,6 +203,7 @@ struct mlx5e_encap_entry {
struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index e86ccc22fb82..6530d7bd5045 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1117,7 +1117,7 @@ static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr
static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct tcphdr *skb_tcp_hd)
{
- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
struct tcphdr *last_tcp_hd;
void *last_hd_addr;
@@ -1349,7 +1349,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
}
/* True when explicitly set via priv flag, or XDP prog is loaded */
- if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+ get_cqe_tls_offload(cqe))
goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
@@ -1871,7 +1872,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return skb;
}
-static void
+static struct sk_buff *
mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5_cqe64 *cqe, u16 header_index)
{
@@ -1895,7 +1896,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
if (unlikely(!skb))
- return;
+ return NULL;
/* queue up for recycling/reuse */
page_ref_inc(head->page);
@@ -1907,7 +1908,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
ALIGN(head_size, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
- return;
+ return NULL;
}
prefetchw(skb->data);
@@ -1918,9 +1919,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
skb->tail += head_size;
skb->len += head_size;
}
- rq->hw_gro_data->skb = skb;
- NAPI_GRO_CB(skb)->count = 1;
- skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
+ return skb;
}
static void
@@ -1973,13 +1972,14 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
+ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ u16 head_size = cqe->shampo.header_size;
struct sk_buff **skb = &rq->hw_gro_data->skb;
bool flush = cqe->shampo.flush;
bool match = cqe->shampo.match;
@@ -2011,9 +2011,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
}
if (!*skb) {
- mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ if (likely(head_size))
+ *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+ else
+ *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
+ page_idx);
if (unlikely(!*skb))
goto free_hd_entry;
+
+ NAPI_GRO_CB(*skb)->count = 1;
+ skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
} else {
NAPI_GRO_CB(*skb)->count++;
if (NAPI_GRO_CB(*skb)->count == 2 &&
@@ -2027,8 +2034,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
}
}
- di = &wi->umr.dma_info[page_idx];
- mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
+ if (likely(head_size)) {
+ di = &wi->umr.dma_info[page_idx];
+ mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
+ }
mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
if (flush)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 8c9163d2c646..08a75654f5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -334,6 +334,7 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
netdev_info(ndev, "\t[%d] %s start..\n", i, st.name);
buf[count] = st.st_func(priv);
netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]);
+ count++;
}
mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 26e326fe503c..00f1d16db456 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -1254,9 +1254,6 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
- return;
-
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical,
@@ -1272,6 +1269,9 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
struct ethtool_fec_stats *fec_stats)
{
+ if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group))
+ return;
+
fec_set_corrected_bits_total(priv, fec_stats);
fec_set_block_stats(priv, fec_stats);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3d908a7e1406..b27532a9301e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1414,7 +1414,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_out;
- if (!attr->chain && esw_attr->int_port) {
+ if (!attr->chain && esw_attr->int_port &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
/* If decap route device is internal port, change the
* source vport value in reg_c0 back to uplink just in
* case the rule performs goto chain > 0. If we have a miss
@@ -3191,6 +3192,30 @@ actions_match_supported(struct mlx5e_priv *priv,
return false;
}
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return false;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return false;
+ }
+
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!modify_header_match_supported(priv, &parse_attr->spec, flow_action,
actions, ct_flow, ct_clear, extack))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 7fd33b356cc8..ee7ecb88adc1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -208,7 +208,7 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
int cpy1_sz = 2 * ETH_ALEN;
int cpy2_sz = ihs - cpy1_sz;
- memcpy(vhdr, skb->data, cpy1_sz);
+ memcpy(&vhdr->addrs, skb->data, cpy1_sz);
vhdr->h_vlan_proto = skb->vlan_proto;
vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index f690f430f40f..05e08cec5a8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -1574,6 +1574,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_offloads *br_offloads;
+ ASSERT_RTNL();
+
br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
if (!br_offloads)
return ERR_PTR(-ENOMEM);
@@ -1590,6 +1592,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+ ASSERT_RTNL();
+
if (!br_offloads)
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
index 3401188e0a60..51ac24e6ec3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
__field(unsigned int, used)
),
TP_fast_assign(
- strncpy(__entry->dev_name,
+ strscpy(__entry->dev_name,
netdev_name(fdb->dev),
IFNAMSIZ);
memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 11bbcd5f5b8b..694c54066955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -697,7 +697,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
}
int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
- u32 min_rate, u32 max_rate)
+ u32 max_rate, u32 min_rate)
{
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9a7b25692505..cfcd72bad9af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2838,10 +2838,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
return false;
- if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
- mlx5_ecpf_vport_exists(esw->dev))
- return false;
-
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b628917e38e4..537c82b9aa53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2074,6 +2074,8 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
fte->node.del_hw_func = NULL;
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
+ } else {
+ up_write_ref_node(&fte->node, false);
}
kfree(handle);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 0b0234f9d694..84dbe46d5ede 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -132,7 +132,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
- del_timer(&fw_reset->timer);
+ del_timer_sync(&fw_reset->timer);
}
static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index 1ca01a5b6cdd..626aa60b6099 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -126,6 +126,10 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
return;
}
+ /* Handle multipath entry with lower priority value */
+ if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
+ return;
+
/* Handle add/replace event */
nhs = fib_info_num_path(fi);
if (nhs == 1) {
@@ -135,12 +139,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
if (i < 0)
- i = MLX5_LAG_NORMAL_AFFINITY;
- else
- ++i;
+ return;
+ i++;
mlx5_lag_set_port_affinity(ldev, i);
}
+
+ mp->mfi = fi;
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index d5e47630e284..df58cba37930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
{
- if (!mlx5_chains_prios_supported(chains))
- return 1;
-
if (mlx5_chains_ignore_flow_level_supported(chains))
return UINT_MAX;
+ if (!chains->dev->priv.eswitch ||
+ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
+ return 1;
+
/* We should get here only for eswitch case */
return FDB_TC_MAX_PRIO;
}
@@ -211,7 +212,7 @@ static int
create_chain_restore(struct fs_chain *chain)
{
struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
+ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
struct mlx5_fs_chains *chains = chain->chains;
enum mlx5e_tc_attr_to_reg chain_to_reg;
struct mlx5_modify_hdr *mod_hdr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2c774f367199..bba72b220cc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -526,7 +526,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
/* Check log_max_qp from HCA caps to set in current profile */
if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
- prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp));
} else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
prof->log_max_qp,
@@ -1840,10 +1840,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
{ PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
+ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
{ 0, }
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 1ef2b6a848c1..7b16a1188aab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
switch (module_id) {
case MLX5_MODULE_ID_SFP:
- mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
+ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
break;
case MLX5_MODULE_ID_QSFP:
case MLX5_MODULE_ID_QSFP_PLUS:
case MLX5_MODULE_ID_QSFP28:
- mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
+ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
break;
default:
mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
return -EINVAL;
}
- if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
/* Cross pages read, read until offset 256 in low page */
- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
+ size = MLX5_EEPROM_PAGE_LENGTH - offset;
query.size = size;
+ query.offset = offset;
return mlx5_query_mcia(dev, &query, data);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 7f6fd9c5e371..e289cfdbce07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -4,7 +4,6 @@
#include "dr_types.h"
#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
-#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024)
struct mlx5dr_icm_pool {
enum mlx5dr_icm_type icm_type;
@@ -136,37 +135,35 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
kvfree(icm_mr);
}
-static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
{
- chunk->ste_arr = kvzalloc(chunk->num_of_entries *
- sizeof(chunk->ste_arr[0]), GFP_KERNEL);
- if (!chunk->ste_arr)
- return -ENOMEM;
-
- chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries *
- DR_STE_SIZE_REDUCED, GFP_KERNEL);
- if (!chunk->hw_ste_arr)
- goto out_free_ste_arr;
-
- chunk->miss_list = kvmalloc(chunk->num_of_entries *
- sizeof(chunk->miss_list[0]), GFP_KERNEL);
- if (!chunk->miss_list)
- goto out_free_hw_ste_arr;
+ /* We support only one type of STE size, both for ConnectX-5 and later
+ * devices. Once the support for match STE which has a larger tag is
+ * added (32B instead of 16B), the STE size for devices later than
+ * ConnectX-5 needs to account for that.
+ */
+ return DR_STE_SIZE_REDUCED;
+}
- return 0;
+static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ int index = offset / DR_STE_SIZE;
-out_free_hw_ste_arr:
- kvfree(chunk->hw_ste_arr);
-out_free_ste_arr:
- kvfree(chunk->ste_arr);
- return -ENOMEM;
+ chunk->ste_arr = &buddy->ste_arr[index];
+ chunk->miss_list = &buddy->miss_list[index];
+ chunk->hw_ste_arr = buddy->hw_ste_arr +
+ index * dr_icm_buddy_get_ste_size(buddy);
}
static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
{
- kvfree(chunk->miss_list);
- kvfree(chunk->hw_ste_arr);
- kvfree(chunk->ste_arr);
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+
+ memset(chunk->hw_ste_arr, 0,
+ chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy));
+ memset(chunk->ste_arr, 0,
+ chunk->num_of_entries * sizeof(chunk->ste_arr[0]));
}
static enum mlx5dr_icm_type
@@ -189,6 +186,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
kvfree(chunk);
}
+static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
+
+ buddy->ste_arr = kvcalloc(num_of_entries,
+ sizeof(struct mlx5dr_ste), GFP_KERNEL);
+ if (!buddy->ste_arr)
+ return -ENOMEM;
+
+ /* Preallocate full STE size on non-ConnectX-5 devices since
+ * we need to support both full and reduced with the same cache.
+ */
+ buddy->hw_ste_arr = kvcalloc(num_of_entries,
+ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
+ if (!buddy->hw_ste_arr)
+ goto free_ste_arr;
+
+ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
+ if (!buddy->miss_list)
+ goto free_hw_ste_arr;
+
+ return 0;
+
+free_hw_ste_arr:
+ kvfree(buddy->hw_ste_arr);
+free_ste_arr:
+ kvfree(buddy->ste_arr);
+ return -ENOMEM;
+}
+
+static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ kvfree(buddy->ste_arr);
+ kvfree(buddy->hw_ste_arr);
+ kvfree(buddy->miss_list);
+}
+
static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5dr_icm_buddy_mem *buddy;
@@ -208,11 +243,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
buddy->icm_mr = icm_mr;
buddy->pool = pool;
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ /* Reduce allocations by preallocating and reusing the STE structures */
+ if (dr_icm_buddy_init_ste_cache(buddy))
+ goto err_cleanup_buddy;
+ }
+
/* add it to the -start- of the list in order to search in it first */
list_add(&buddy->list_node, &pool->buddy_mem_list);
return 0;
+err_cleanup_buddy:
+ mlx5dr_buddy_cleanup(buddy);
err_free_buddy:
kvfree(buddy);
free_mr:
@@ -234,6 +277,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
mlx5dr_buddy_cleanup(buddy);
+ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_cleanup_ste_cache(buddy);
+
kvfree(buddy);
}
@@ -261,34 +307,30 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
chunk->byte_size =
mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type);
chunk->seg = seg;
+ chunk->buddy_mem = buddy_mem_pool;
- if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) {
- mlx5dr_err(pool->dmn,
- "Failed to init ste arrays (order: %d)\n",
- chunk_size);
- goto out_free_chunk;
- }
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_init(chunk, offset);
buddy_mem_pool->used_memory += chunk->byte_size;
- chunk->buddy_mem = buddy_mem_pool;
INIT_LIST_HEAD(&chunk->chunk_list);
/* chunk now is part of the used_list */
list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
return chunk;
-
-out_free_chunk:
- kvfree(chunk);
- return NULL;
}
static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
{
- if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL)
- return true;
+ int allow_hot_size;
+
+ /* sync when hot memory reaches half of the pool size */
+ allow_hot_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) / 2;
- return false;
+ return pool->hot_memory_size > allow_hot_size;
}
static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index e87cf498c77b..38971fe1dfe1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
return (spec->dmac_47_16 || spec->dmac_15_0);
}
-static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
- spec->src_ip_63_32 || spec->src_ip_31_0);
-}
-
-static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
-{
- return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
- spec->dst_ip_63_32 || spec->dst_ip_31_0);
-}
-
static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
{
return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
@@ -503,11 +491,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
&mask, inner, rx);
if (outer_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.outer))
+ if (DR_MASK_IS_DST_IP_SET(&mask.outer))
mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.outer))
+ if (DR_MASK_IS_SRC_IP_SET(&mask.outer))
mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
&mask, inner, rx);
@@ -610,11 +598,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
&mask, inner, rx);
if (inner_ipv == DR_RULE_IPV6) {
- if (dr_mask_is_dst_addr_set(&mask.inner))
+ if (DR_MASK_IS_DST_IP_SET(&mask.inner))
mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
&mask, inner, rx);
- if (dr_mask_is_src_addr_set(&mask.inner))
+ if (DR_MASK_IS_SRC_IP_SET(&mask.inner))
mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
&mask, inner, rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 7e61742e58a0..187e29b409b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -602,12 +602,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
used_hw_action_num);
}
+static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
+ struct mlx5dr_match_spec *spec)
+{
+ if (spec->ip_version) {
+ if (spec->ip_version != 0xf) {
+ mlx5dr_err(dmn,
+ "Partial ip_version mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+ } else if (spec->ethertype != 0xffff &&
+ (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) {
+ mlx5dr_err(dmn,
+ "Partial/no ethertype mask with src/dst IP is not supported\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
u8 match_criteria,
struct mlx5dr_match_param *mask,
struct mlx5dr_match_param *value)
{
- if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
+ if (value)
+ return 0;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
mlx5dr_err(dmn,
"Partial mask source_port is not supported\n");
@@ -621,6 +643,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
}
}
+ if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->outer))
+ return -EINVAL;
+
+ if ((match_criteria & DR_MATCHER_CRITERIA_INNER) &&
+ dr_ste_build_pre_check_spec(dmn, &mask->inner))
+ return -EINVAL;
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 1b3d484b99be..55fcb751e24a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -798,6 +798,16 @@ struct mlx5dr_match_param {
(_misc3)->icmpv4_code || \
(_misc3)->icmpv4_header_data)
+#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \
+ (_spec)->src_ip_95_64 || \
+ (_spec)->src_ip_63_32 || \
+ (_spec)->src_ip_31_0)
+
+#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \
+ (_spec)->dst_ip_95_64 || \
+ (_spec)->dst_ip_63_32 || \
+ (_spec)->dst_ip_31_0)
+
struct mlx5dr_esw_caps {
u64 drop_icm_address_rx;
u64 drop_icm_address_tx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index a476da2424f8..3f311462bedf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -233,7 +233,11 @@ static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
}
-#define MLX5_FLOW_CONTEXT_ACTION_MAX 32
+/* We want to support a rule with 32 destinations, which means we need to
+ * account for 32 destinations plus usually a counter plus one more action
+ * for a multi-destination flow table.
+ */
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 34
static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_group *group,
@@ -403,9 +407,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
enum mlx5_flow_destination_type type = dst->dest_attr.type;
u32 id;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
- num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -478,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
MLX5_FLOW_DESTINATION_TYPE_COUNTER)
continue;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
- err = -ENOSPC;
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
goto free_actions;
}
@@ -499,14 +504,28 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
params.match_sz = match_sz;
params.match_buf = (u64 *)fte->val;
if (num_term_actions == 1) {
- if (term_actions->reformat)
+ if (term_actions->reformat) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->reformat;
+ }
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
actions[num_actions++] = term_actions->dest;
} else if (num_term_actions > 1) {
bool ignore_flow_level =
!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
term_actions,
num_term_actions,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index c7c93131b762..dfa223415fe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -160,6 +160,11 @@ struct mlx5dr_icm_buddy_mem {
* sync_ste command sets them free.
*/
struct list_head hot_list;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ struct list_head *miss_list;
+ u8 *hw_ste_arr;
};
int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 0303e727e99f..d167d93e4c12 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -452,11 +452,9 @@ static int ks8851_probe_spi(struct spi_device *spi)
return ks8851_probe_common(netdev, dev, msg_enable);
}
-static int ks8851_remove_spi(struct spi_device *spi)
+static void ks8851_remove_spi(struct spi_device *spi)
{
ks8851_remove_common(&spi->dev);
-
- return 0;
}
static const struct of_device_id ks8851_match_table[] = {
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 634ac7649c43..db5a3edb4c3c 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1612,15 +1612,13 @@ error_alloc:
return ret;
}
-static int enc28j60_remove(struct spi_device *spi)
+static void enc28j60_remove(struct spi_device *spi)
{
struct enc28j60_net *priv = spi_get_drvdata(spi);
unregister_netdev(priv->netdev);
free_irq(spi->irq, priv);
free_netdev(priv->netdev);
-
- return 0;
}
static const struct of_device_id enc28j60_dt_ids[] = {
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index b90efc80fb59..dc1840cb5b10 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1093,7 +1093,7 @@ error_out:
return ret;
}
-static int encx24j600_spi_remove(struct spi_device *spi)
+static void encx24j600_spi_remove(struct spi_device *spi)
{
struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev);
@@ -1101,8 +1101,6 @@ static int encx24j600_spi_remove(struct spi_device *spi)
kthread_stop(priv->kworker_task);
free_netdev(priv->ndev);
-
- return 0;
}
static const struct spi_device_id encx24j600_spi_id_table[] = {
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index ca5f1177963d..ce5970bdcc6a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -40,11 +40,12 @@ static int lan966x_mac_wait_for_completion(struct lan966x *lan966x)
{
u32 val;
- return readx_poll_timeout(lan966x_mac_get_status,
- lan966x, val,
- (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) ==
- MACACCESS_CMD_IDLE,
- TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
+ return readx_poll_timeout_atomic(lan966x_mac_get_status,
+ lan966x, val,
+ (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) ==
+ MACACCESS_CMD_IDLE,
+ TABLE_UPDATE_SLEEP_US,
+ TABLE_UPDATE_TIMEOUT_US);
}
static void lan966x_mac_select(struct lan966x *lan966x,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 2cb70da63db3..1f60fd125a1d 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -182,9 +182,9 @@ static int lan966x_port_inj_ready(struct lan966x *lan966x, u8 grp)
{
u32 val;
- return readx_poll_timeout(lan966x_port_inj_status, lan966x, val,
- QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp),
- READL_SLEEP_US, READL_TIMEOUT_US);
+ return readx_poll_timeout_atomic(lan966x_port_inj_status, lan966x, val,
+ QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp),
+ READL_SLEEP_US, READL_TIMEOUT_US);
}
static int lan966x_port_ifh_xmit(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
index 59783fc46a7b..10b866e9f726 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
@@ -1103,7 +1103,7 @@ void sparx5_get_stats64(struct net_device *ndev,
stats->tx_carrier_errors = portstats[spx5_stats_tx_csense_cnt];
stats->tx_window_errors = portstats[spx5_stats_tx_late_coll_cnt];
stats->rx_dropped = portstats[spx5_stats_ana_ac_port_stat_lsb_cnt];
- for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++stats)
+ for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx)
stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop
+ idx];
stats->tx_dropped = portstats[spx5_stats_tx_local_drop];
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index a1acc9b461f2..d40e18ce3293 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -16,6 +16,8 @@
#include <linux/phylink.h>
#include <linux/hrtimer.h>
+#include "sparx5_main_regs.h"
+
/* Target chip type */
enum spx5_target_chiptype {
SPX5_TARGET_CT_7546 = 0x7546, /* SparX-5-64 Enterprise */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index dc7e5ea6ec15..148d431fcde4 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -145,9 +145,9 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap)
skb_put(skb, byte_cnt - ETH_FCS_LEN);
eth_skb_pad(skb);
skb->protocol = eth_type_trans(skb, netdev);
- netif_rx(skb);
netdev->stats.rx_bytes += skb->len;
netdev->stats.rx_packets++;
+ netif_rx(skb);
}
static int sparx5_inject(struct sparx5 *sparx5,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
index 4ce490a25f33..8e56ffa1c4f7 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
@@ -58,16 +58,6 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
struct sparx5 *sparx5 = port->sparx5;
int ret;
- /* Make the port a member of the VLAN */
- set_bit(port->portno, sparx5->vlan_mask[vid]);
- ret = sparx5_vlant_set_mask(sparx5, vid);
- if (ret)
- return ret;
-
- /* Default ingress vlan classification */
- if (pvid)
- port->pvid = vid;
-
/* Untagged egress vlan classification */
if (untagged && port->vid != vid) {
if (port->vid) {
@@ -79,6 +69,16 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
port->vid = vid;
}
+ /* Make the port a member of the VLAN */
+ set_bit(port->portno, sparx5->vlan_mask[vid]);
+ ret = sparx5_vlant_set_mask(sparx5, vid);
+ if (ret)
+ return ret;
+
+ /* Default ingress vlan classification */
+ if (pvid)
+ port->pvid = vid;
+
sparx5_vlan_port_apply(sparx5, port);
return 0;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 455293aa6343..fd3ceb74620d 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -549,14 +549,18 @@ EXPORT_SYMBOL(ocelot_vlan_add);
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
+ bool del_pvid = false;
int err;
+ if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid)
+ del_pvid = true;
+
err = ocelot_vlan_member_del(ocelot, port, vid);
if (err)
return err;
/* Ingress */
- if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid)
+ if (del_pvid)
ocelot_port_set_pvid(ocelot, port, NULL);
/* Egress */
@@ -1432,6 +1436,8 @@ static void
ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV4;
+ trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv4.proto.mask[0] = 0xff;
trap->key.ipv4.dport.value = PTP_EV_PORT;
trap->key.ipv4.dport.mask = 0xffff;
}
@@ -1440,6 +1446,8 @@ static void
ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV6;
+ trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv4.proto.mask[0] = 0xff;
trap->key.ipv6.dport.value = PTP_EV_PORT;
trap->key.ipv6.dport.mask = 0xffff;
}
@@ -1448,6 +1456,8 @@ static void
ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV4;
+ trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv4.proto.mask[0] = 0xff;
trap->key.ipv4.dport.value = PTP_GEN_PORT;
trap->key.ipv4.dport.mask = 0xffff;
}
@@ -1456,6 +1466,8 @@ static void
ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
{
trap->key_type = OCELOT_VCAP_KEY_IPV6;
+ trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+ trap->key.ipv4.proto.mask[0] = 0xff;
trap->key.ipv6.dport.value = PTP_GEN_PORT;
trap->key.ipv6.dport.mask = 0xffff;
}
@@ -1737,12 +1749,11 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
}
EXPORT_SYMBOL(ocelot_get_strings);
+/* Caller must hold &ocelot->stats_lock */
static void ocelot_update_stats(struct ocelot *ocelot)
{
int i, j;
- mutex_lock(&ocelot->stats_lock);
-
for (i = 0; i < ocelot->num_phys_ports; i++) {
/* Configure the port to read the stats from */
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
@@ -1761,8 +1772,6 @@ static void ocelot_update_stats(struct ocelot *ocelot)
~(u64)U32_MAX) + val;
}
}
-
- mutex_unlock(&ocelot->stats_lock);
}
static void ocelot_check_stats_work(struct work_struct *work)
@@ -1771,7 +1780,9 @@ static void ocelot_check_stats_work(struct work_struct *work)
struct ocelot *ocelot = container_of(del_work, struct ocelot,
stats_work);
+ mutex_lock(&ocelot->stats_lock);
ocelot_update_stats(ocelot);
+ mutex_unlock(&ocelot->stats_lock);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
@@ -1781,12 +1792,16 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
{
int i;
+ mutex_lock(&ocelot->stats_lock);
+
/* check and update now */
ocelot_update_stats(ocelot);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
*data++ = ocelot->stats[port * ocelot->num_stats + i];
+
+ mutex_unlock(&ocelot->stats_lock);
}
EXPORT_SYMBOL(ocelot_get_ethtool_stats);
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 949858891973..fdb4d7e7296c 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -60,6 +60,12 @@ static int ocelot_chain_to_block(int chain, bool ingress)
*/
static int ocelot_chain_to_lookup(int chain)
{
+ /* Backwards compatibility with older, single-chain tc-flower
+ * offload support in Ocelot
+ */
+ if (chain == 0)
+ return 0;
+
return (chain / VCAP_LOOKUP) % 10;
}
@@ -68,7 +74,15 @@ static int ocelot_chain_to_lookup(int chain)
*/
static int ocelot_chain_to_pag(int chain)
{
- int lookup = ocelot_chain_to_lookup(chain);
+ int lookup;
+
+ /* Backwards compatibility with older, single-chain tc-flower
+ * offload support in Ocelot
+ */
+ if (chain == 0)
+ return 0;
+
+ lookup = ocelot_chain_to_lookup(chain);
/* calculate PAG value as chain index relative to the first PAG */
return chain - VCAP_IS2_CHAIN(lookup, 0);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 784292b16290..1543e47456d5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -723,6 +723,8 @@ static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev)
return true;
if (netif_is_gretap(netdev))
return true;
+ if (netif_is_ip6gretap(netdev))
+ return true;
return false;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index dfb4468fe287..cb43651ea9ba 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -922,8 +922,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
int port, bool mod)
{
struct nfp_flower_priv *priv = app->priv;
- int ida_idx = NFP_MAX_MAC_INDEX, err;
struct nfp_tun_offloaded_mac *entry;
+ int ida_idx = -1, err;
u16 nfp_mac_idx = 0;
entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
@@ -997,7 +997,7 @@ err_remove_hash:
err_free_entry:
kfree(entry);
err_free_ida:
- if (ida_idx != NFP_MAX_MAC_INDEX)
+ if (ida_idx != -1)
ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
return err;
@@ -1011,6 +1011,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
struct nfp_flower_repr_priv *repr_priv;
struct nfp_tun_offloaded_mac *entry;
struct nfp_repr *repr;
+ u16 nfp_mac_idx;
int ida_idx;
entry = nfp_tunnel_lookup_offloaded_macs(app, mac);
@@ -1029,8 +1030,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
entry->bridge_count--;
if (!entry->bridge_count && entry->ref_count) {
- u16 nfp_mac_idx;
-
nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx,
false)) {
@@ -1046,7 +1045,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
/* If MAC is now used by 1 repr set the offloaded MAC index to port. */
if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) {
- u16 nfp_mac_idx;
int port, err;
repr_priv = list_first_entry(&entry->repr_list,
@@ -1074,8 +1072,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs,
&entry->ht_node,
offloaded_macs_params));
+
+ if (nfp_flower_is_supported_bridge(netdev))
+ nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
+ else
+ nfp_mac_idx = entry->index;
+
/* If MAC has global ID then extract and free the ida entry. */
- if (nfp_tunnel_is_mac_idx_global(entry->index)) {
+ if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) {
ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
}
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index bc39558fe82b..756f97dce85b 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1471,6 +1471,7 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct netdata_local *pldat;
+ int ret;
if (device_may_wakeup(&pdev->dev))
disable_irq_wake(ndev->irq);
@@ -1480,7 +1481,9 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
pldat = netdev_priv(ndev);
/* Enable interface clock */
- clk_enable(pldat->clk);
+ ret = clk_enable(pldat->clk);
+ if (ret)
+ return ret;
/* Reset and initialize */
__lpc_eth_reset(pldat);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 8ac38828ba45..48cf4355bc47 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3806,11 +3806,11 @@ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
return found;
}
-static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
- u16 vfid,
- struct qed_mcp_link_params *p_params,
- struct qed_mcp_link_state *p_link,
- struct qed_mcp_link_capabilities *p_caps)
+static int qed_iov_get_link(struct qed_hwfn *p_hwfn,
+ u16 vfid,
+ struct qed_mcp_link_params *p_params,
+ struct qed_mcp_link_state *p_link,
+ struct qed_mcp_link_capabilities *p_caps)
{
struct qed_vf_info *p_vf = qed_iov_get_vf_info(p_hwfn,
vfid,
@@ -3818,7 +3818,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
struct qed_bulletin_content *p_bulletin;
if (!p_vf)
- return;
+ return -EINVAL;
p_bulletin = p_vf->bulletin.p_virt;
@@ -3828,6 +3828,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
__qed_vf_get_link_state(p_hwfn, p_link, p_bulletin);
if (p_caps)
__qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
+ return 0;
}
static int
@@ -4686,6 +4687,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
struct qed_public_vf_info *vf_info;
struct qed_mcp_link_state link;
u32 tx_rate;
+ int ret;
/* Sanitize request */
if (IS_VF(cdev))
@@ -4699,7 +4701,9 @@ static int qed_get_vf_config(struct qed_dev *cdev,
vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true);
- qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
+ ret = qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
+ if (ret)
+ return ret;
/* Fill information about VF */
ivi->vf = vf_id;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 597cd9cd57b5..7b0e390c0b07 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -513,6 +513,9 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
p_iov->bulletin.size,
&p_iov->bulletin.phys,
GFP_KERNEL);
+ if (!p_iov->bulletin.p_virt)
+ goto free_pf2vf_reply;
+
DP_VERBOSE(p_hwfn, QED_MSG_IOV,
"VF's bulletin Board [%p virt 0x%llx phys 0x%08x bytes]\n",
p_iov->bulletin.p_virt,
@@ -552,6 +555,10 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
return rc;
+free_pf2vf_reply:
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(union pfvf_tlvs),
+ p_iov->pf2vf_reply, p_iov->pf2vf_reply_phys);
free_vf2pf_request:
dma_free_coherent(&p_hwfn->cdev->pdev->dev,
sizeof(union vfpf_tlvs),
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 955cce644392..3c5494afd3c0 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -1001,7 +1001,7 @@ qca_spi_probe(struct spi_device *spi)
return 0;
}
-static int
+static void
qca_spi_remove(struct spi_device *spi)
{
struct net_device *qcaspi_devs = spi_get_drvdata(spi);
@@ -1011,8 +1011,6 @@ qca_spi_remove(struct spi_device *spi)
unregister_netdev(qcaspi_devs);
free_netdev(qcaspi_devs);
-
- return 0;
}
static const struct spi_device_id qca_spi_id[] = {
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 32161a56726c..2881f5b2b5f4 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -2285,18 +2285,18 @@ static int __init sxgbe_cmdline_opt(char *str)
char *opt;
if (!str || !*str)
- return -EINVAL;
+ return 1;
while ((opt = strsep(&str, ",")) != NULL) {
if (!strncmp(opt, "eee_timer:", 10)) {
if (kstrtoint(opt + 10, 0, &eee_timer))
goto err;
}
}
- return 0;
+ return 1;
err:
pr_err("%s: ERROR broken module parameter conversion\n", __func__);
- return -EINVAL;
+ return 1;
}
__setup("sxgbeeth=", sxgbe_cmdline_opt);
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 16a4cbae9326..c672f92d65e9 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -749,6 +749,7 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
const struct ether3_data *data = id->data;
struct net_device *dev;
int bus_type, ret;
+ u8 addr[ETH_ALEN];
ether3_banner();
@@ -776,7 +777,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
priv(dev)->seeq = priv(dev)->base + data->base_offset;
dev->irq = ec->irq;
- ether3_addr(dev->dev_addr, ec);
+ ether3_addr(addr, ec);
+ eth_hw_addr_set(dev, addr);
priv(dev)->dev = dev;
timer_setup(&priv(dev)->timer, ether3_ledoff, 0);
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index be6bfd6b7ec7..50baf62b2cbc 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -163,9 +163,9 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
/* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */
spin_lock_bh(&mcdi->iface_lock);
++mcdi->seqno;
+ seqno = mcdi->seqno & SEQ_MASK;
spin_unlock_bh(&mcdi->iface_lock);
- seqno = mcdi->seqno & SEQ_MASK;
xflags = 0;
if (mcdi->mode == MCDI_MODE_EVENTS)
xflags |= MCDI_HEADER_XFLAGS_EVREQ;
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index dd6f69ced4ee..fc9cef9dcefc 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1648,7 +1648,7 @@ static int smc911x_ethtool_geteeprom(struct net_device *dev,
return ret;
if ((ret=smc911x_ethtool_read_eeprom_byte(dev, &eebuf[i]))!=0)
return ret;
- }
+ }
memcpy(data, eebuf+eeprom->offset, eeprom->len);
return 0;
}
@@ -1667,11 +1667,11 @@ static int smc911x_ethtool_seteeprom(struct net_device *dev,
return ret;
/* write byte */
if ((ret=smc911x_ethtool_write_eeprom_byte(dev, *data))!=0)
- return ret;
+ return ret;
if ((ret=smc911x_ethtool_write_eeprom_cmd(dev, E2P_CMD_EPC_CMD_WRITE_, i ))!=0)
return ret;
- }
- return 0;
+ }
+ return 0;
}
static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 617d0e4c6495..09644ab0d87a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -756,7 +756,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv)
if (err) {
dev_err(priv->device, "EMAC reset timeout\n");
- return -EFAULT;
+ return err;
}
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index e2e0f977875d..c3f10a92b62b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -22,21 +22,21 @@
#define ETHER_CLK_SEL_RMII_CLK_EN BIT(2)
#define ETHER_CLK_SEL_RMII_CLK_RST BIT(3)
#define ETHER_CLK_SEL_DIV_SEL_2 BIT(4)
-#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0)
+#define ETHER_CLK_SEL_DIV_SEL_20 0
#define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8))
#define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9)
#define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8)
#define ETHER_CLK_SEL_FREQ_SEL_2P5M 0
-#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0
#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10)
#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11)
-#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0
#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12)
#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13)
-#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0)
+#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0
#define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14)
#define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15)
-#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0)
+#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0
#define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16)
#define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN)
@@ -49,13 +49,15 @@ struct visconti_eth {
void __iomem *reg;
u32 phy_intf_sel;
struct clk *phy_ref_clk;
+ struct device *dev;
spinlock_t lock; /* lock to protect register update */
};
static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
{
struct visconti_eth *dwmac = priv;
- unsigned int val, clk_sel_val;
+ struct net_device *netdev = dev_get_drvdata(dwmac->dev);
+ unsigned int val, clk_sel_val = 0;
unsigned long flags;
spin_lock_irqsave(&dwmac->lock, flags);
@@ -85,7 +87,9 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
break;
default:
/* No bit control */
- break;
+ netdev_err(netdev, "Unsupported speed request (%d)", speed);
+ spin_unlock_irqrestore(&dwmac->lock, flags);
+ return;
}
writel(val, dwmac->reg + MAC_CTRL_REG);
@@ -96,31 +100,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
val |= ETHER_CLK_SEL_TX_O_E_N_IN;
writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ /* Set Clock-Mux, Start clock, Set TX_O direction */
switch (dwmac->phy_intf_sel) {
case ETHER_CONFIG_INTF_RGMII:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
case ETHER_CONFIG_INTF_RMII:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV |
- ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN |
+ ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
ETHER_CLK_SEL_RMII_CLK_SEL_RX_C;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RMII_CLK_RST;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
case ETHER_CONFIG_INTF_MII:
default:
val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC |
- ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
- ETHER_CLK_SEL_RMII_CLK_EN;
+ ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
+ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
break;
}
- /* Start clock */
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
- val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
-
- val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
-
spin_unlock_irqrestore(&dwmac->lock, flags);
}
@@ -219,6 +233,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
spin_lock_init(&dwmac->lock);
dwmac->reg = stmmac_res.addr;
+ dwmac->dev = &pdev->dev;
plat_dat->bsp_priv = dwmac;
plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 1914ad698cab..acd70b9a3173 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -150,6 +150,7 @@
#define NUM_DWMAC100_DMA_REGS 9
#define NUM_DWMAC1000_DMA_REGS 23
+#define NUM_DWMAC4_DMA_REGS 27
void dwmac_enable_dma_transmission(void __iomem *ioaddr);
void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 40b5ed94cb54..5b195d5051d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -194,7 +194,6 @@ struct stmmac_priv {
u32 tx_coal_timer[MTL_MAX_TX_QUEUES];
u32 rx_coal_frames[MTL_MAX_TX_QUEUES];
- int tx_coalesce;
int hwts_tx_en;
bool tx_path_in_lpi_mode;
bool tso;
@@ -229,7 +228,6 @@ struct stmmac_priv {
unsigned int flow_ctrl;
unsigned int pause;
struct mii_bus *mii;
- int mii_irq[PHY_MAX_ADDR];
struct phylink_config phylink_config;
struct phylink *phylink;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 164dff5ec32e..abfb3cd5958d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -21,10 +21,18 @@
#include "dwxgmac2.h"
#define REG_SPACE_SIZE 0x1060
+#define GMAC4_REG_SPACE_SIZE 0x116C
#define MAC100_ETHTOOL_NAME "st_mac100"
#define GMAC_ETHTOOL_NAME "st_gmac"
#define XGMAC_ETHTOOL_NAME "st_xgmac"
+/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h
+ *
+ * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the
+ * same time due to the conflicting macro names.
+ */
+#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100
+
#define ETHTOOL_DMA_OFFSET 55
struct stmmac_stats {
@@ -434,6 +442,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev)
if (priv->plat->has_xgmac)
return XGMAC_REGSIZE * 4;
+ else if (priv->plat->has_gmac4)
+ return GMAC4_REG_SPACE_SIZE;
return REG_SPACE_SIZE;
}
@@ -446,8 +456,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
stmmac_dump_mac_regs(priv, priv->hw, reg_space);
stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
- if (!priv->plat->has_xgmac) {
- /* Copy DMA registers to where ethtool expects them */
+ /* Copy DMA registers to where ethtool expects them */
+ if (priv->plat->has_gmac4) {
+ /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */
+ memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
+ &reg_space[GMAC4_DMA_CHAN_BASE_ADDR / 4],
+ NUM_DWMAC4_DMA_REGS * 4);
+ } else if (!priv->plat->has_xgmac) {
memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
&reg_space[DMA_BUS_MODE / 4],
NUM_DWMAC1000_DMA_REGS * 4);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 074e2cdfb0fa..a7ec9f4d46ce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -145,15 +145,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
static void get_systime(void __iomem *ioaddr, u64 *systime)
{
- u64 ns;
-
- /* Get the TSSS value */
- ns = readl(ioaddr + PTP_STNSR);
- /* Get the TSS and convert sec time value to nanosecond */
- ns += readl(ioaddr + PTP_STSR) * 1000000000ULL;
+ u64 ns, sec0, sec1;
+
+ /* Get the TSS value */
+ sec1 = readl_relaxed(ioaddr + PTP_STSR);
+ do {
+ sec0 = sec1;
+ /* Get the TSSS value */
+ ns = readl_relaxed(ioaddr + PTP_STNSR);
+ /* Get the TSS value */
+ sec1 = readl_relaxed(ioaddr + PTP_STSR);
+ } while (sec0 != sec1);
if (systime)
- *systime = ns;
+ *systime = ns + (sec1 * 1000000000ULL);
}
static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6708ca2aa4f7..422e3225f476 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -402,7 +402,7 @@ static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en)
* Description: this function is to verify and enter in LPI mode in case of
* EEE.
*/
-static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
+static int stmmac_enable_eee_mode(struct stmmac_priv *priv)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 queue;
@@ -412,13 +412,14 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
if (tx_q->dirty_tx != tx_q->cur_tx)
- return; /* still unfinished work */
+ return -EBUSY; /* still unfinished work */
}
/* Check and enter in LPI mode */
if (!priv->tx_path_in_lpi_mode)
stmmac_set_eee_mode(priv, priv->hw,
priv->plat->en_tx_lpi_clockgating);
+ return 0;
}
/**
@@ -450,8 +451,8 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
{
struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer);
- stmmac_enable_eee_mode(priv);
- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ if (stmmac_enable_eee_mode(priv))
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
}
/**
@@ -889,6 +890,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
int ret;
+ if (priv->plat->ptp_clk_freq_config)
+ priv->plat->ptp_clk_freq_config(priv);
+
ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
if (ret)
return ret;
@@ -911,8 +915,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
priv->hwts_tx_en = 0;
priv->hwts_rx_en = 0;
- stmmac_ptp_register(priv);
-
return 0;
}
@@ -2260,6 +2262,23 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
stmmac_stop_tx(priv, priv->ioaddr, chan);
}
+static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv)
+{
+ u32 rx_channels_count = priv->plat->rx_queues_to_use;
+ u32 tx_channels_count = priv->plat->tx_queues_to_use;
+ u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
+ u32 chan;
+
+ for (chan = 0; chan < dma_csr_ch; chan++) {
+ struct stmmac_channel *ch = &priv->channel[chan];
+ unsigned long flags;
+
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+}
+
/**
* stmmac_start_all_dma - start all RX and TX DMA channels
* @priv: driver private structure
@@ -2647,8 +2666,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
if (priv->eee_enabled && !priv->tx_path_in_lpi_mode &&
priv->eee_sw_timer_en) {
- stmmac_enable_eee_mode(priv);
- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ if (stmmac_enable_eee_mode(priv))
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
}
/* We still have pending packets, let's call for a new scheduling */
@@ -2902,8 +2921,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
/* DMA CSR Channel configuration */
- for (chan = 0; chan < dma_csr_ch; chan++)
+ for (chan = 0; chan < dma_csr_ch; chan++) {
stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
+ }
/* DMA RX Channel Configuration */
for (chan = 0; chan < rx_channels_count; chan++) {
@@ -3238,7 +3259,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
/**
* stmmac_hw_setup - setup mac in a usable state.
* @dev : pointer to the device structure.
- * @init_ptp: initialize PTP if set
+ * @ptp_register: register PTP if set
* Description:
* this is the main function to setup the HW in a usable state because the
* dma engine is reset, the core registers are configured (e.g. AXI,
@@ -3248,7 +3269,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
* 0 on success and an appropriate (-)ve integer as defined in errno.h
* file on failure.
*/
-static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
+static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
{
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_cnt = priv->plat->rx_queues_to_use;
@@ -3305,13 +3326,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
stmmac_mmc_setup(priv);
- if (init_ptp) {
- ret = stmmac_init_ptp(priv);
- if (ret == -EOPNOTSUPP)
- netdev_warn(priv->dev, "PTP not supported by HW\n");
- else if (ret)
- netdev_warn(priv->dev, "PTP init failed\n");
- }
+ ret = stmmac_init_ptp(priv);
+ if (ret == -EOPNOTSUPP)
+ netdev_warn(priv->dev, "PTP not supported by HW\n");
+ else if (ret)
+ netdev_warn(priv->dev, "PTP init failed\n");
+ else if (ptp_register)
+ stmmac_ptp_register(priv);
priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
@@ -3759,6 +3780,7 @@ static int stmmac_open(struct net_device *dev)
stmmac_enable_all_queues(priv);
netif_tx_start_all_queues(priv->dev);
+ stmmac_enable_all_dma_irq(priv);
return 0;
@@ -6508,8 +6530,10 @@ int stmmac_xdp_open(struct net_device *dev)
}
/* DMA CSR Channel configuration */
- for (chan = 0; chan < dma_csr_ch; chan++)
+ for (chan = 0; chan < dma_csr_ch; chan++) {
stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
+ }
/* Adjust Split header */
sph_en = (priv->hw->rx_csum > 0) && priv->sph;
@@ -6570,6 +6594,7 @@ int stmmac_xdp_open(struct net_device *dev)
stmmac_enable_all_queues(priv);
netif_carrier_on(dev);
netif_tx_start_all_queues(dev);
+ stmmac_enable_all_dma_irq(priv);
return 0;
@@ -7250,6 +7275,10 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
+ pm_runtime_get_sync(dev);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
netif_carrier_off(ndev);
@@ -7268,8 +7297,6 @@ int stmmac_dvr_remove(struct device *dev)
if (priv->plat->stmmac_rst)
reset_control_assert(priv->plat->stmmac_rst);
reset_control_assert(priv->plat->stmmac_ahb_rst);
- pm_runtime_put(dev);
- pm_runtime_disable(dev);
if (priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI)
stmmac_mdio_unregister(ndev);
@@ -7447,6 +7474,7 @@ int stmmac_resume(struct device *dev)
stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw);
stmmac_enable_all_queues(priv);
+ stmmac_enable_all_dma_irq(priv);
mutex_unlock(&priv->lock);
rtnl_unlock();
@@ -7463,7 +7491,7 @@ static int __init stmmac_cmdline_opt(char *str)
char *opt;
if (!str || !*str)
- return -EINVAL;
+ return 1;
while ((opt = strsep(&str, ",")) != NULL) {
if (!strncmp(opt, "debug:", 6)) {
if (kstrtoint(opt + 6, 0, &debug))
@@ -7494,11 +7522,11 @@ static int __init stmmac_cmdline_opt(char *str)
goto err;
}
}
- return 0;
+ return 1;
err:
pr_err("%s: ERROR broken module parameter conversion", __func__);
- return -EINVAL;
+ return 1;
}
__setup("stmmaceth=", stmmac_cmdline_opt);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 0d24ebd37873..1c9f02f9c317 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
{
int i;
- if (priv->plat->ptp_clk_freq_config)
- priv->plat->ptp_clk_freq_config(priv);
-
for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
if (i >= STMMAC_PPS_MAX)
break;
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index ad9029ae6848..77e5dffb558f 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -3146,7 +3146,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
if (err) {
printk(KERN_ERR "happymeal(PCI): Cannot register net device, "
"aborting.\n");
- goto err_out_iounmap;
+ goto err_out_free_coherent;
}
pci_set_drvdata(pdev, hp);
@@ -3179,6 +3179,10 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
return 0;
+err_out_free_coherent:
+ dma_free_coherent(hp->dma_dev, PAGE_SIZE,
+ hp->happy_block, hp->hblock_dvma);
+
err_out_iounmap:
iounmap(hp->gregs);
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index ba220593e6db..8f6817f346ba 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1146,7 +1146,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
int size)
{
- struct page_pool_params pp_params;
+ struct page_pool_params pp_params = {};
struct page_pool *pool;
pp_params.order = 0;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index dc70a6bfaa6a..92ca739fac01 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -568,7 +568,9 @@ int cpts_register(struct cpts *cpts)
for (i = 0; i < CPTS_MAX_EVENTS; i++)
list_add(&cpts->pool_data[i].list, &cpts->pool);
- clk_enable(cpts->refclk);
+ err = clk_enable(cpts->refclk);
+ if (err)
+ return err;
cpts_write32(cpts, CPTS_EN, control);
cpts_write32(cpts, TS_PEND_EN, int_enable);
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index cf0917b29e30..5251fc324221 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1091,20 +1091,22 @@ static int tsi108_get_mac(struct net_device *dev)
struct tsi108_prv_data *data = netdev_priv(dev);
u32 word1 = TSI_READ(TSI108_MAC_ADDR1);
u32 word2 = TSI_READ(TSI108_MAC_ADDR2);
+ u8 addr[ETH_ALEN];
/* Note that the octets are reversed from what the manual says,
* producing an even weirder ordering...
*/
if (word2 == 0 && word1 == 0) {
- dev->dev_addr[0] = 0x00;
- dev->dev_addr[1] = 0x06;
- dev->dev_addr[2] = 0xd2;
- dev->dev_addr[3] = 0x00;
- dev->dev_addr[4] = 0x00;
+ addr[0] = 0x00;
+ addr[1] = 0x06;
+ addr[2] = 0xd2;
+ addr[3] = 0x00;
+ addr[4] = 0x00;
if (0x8 == data->phy)
- dev->dev_addr[5] = 0x01;
+ addr[5] = 0x01;
else
- dev->dev_addr[5] = 0x02;
+ addr[5] = 0x02;
+ eth_hw_addr_set(dev, addr);
word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24);
@@ -1114,12 +1116,13 @@ static int tsi108_get_mac(struct net_device *dev)
TSI_WRITE(TSI108_MAC_ADDR1, word1);
TSI_WRITE(TSI108_MAC_ADDR2, word2);
} else {
- dev->dev_addr[0] = (word2 >> 16) & 0xff;
- dev->dev_addr[1] = (word2 >> 24) & 0xff;
- dev->dev_addr[2] = (word1 >> 0) & 0xff;
- dev->dev_addr[3] = (word1 >> 8) & 0xff;
- dev->dev_addr[4] = (word1 >> 16) & 0xff;
- dev->dev_addr[5] = (word1 >> 24) & 0xff;
+ addr[0] = (word2 >> 16) & 0xff;
+ addr[1] = (word2 >> 24) & 0xff;
+ addr[2] = (word1 >> 0) & 0xff;
+ addr[3] = (word1 >> 8) & 0xff;
+ addr[4] = (word1 >> 16) & 0xff;
+ addr[5] = (word1 >> 24) & 0xff;
+ eth_hw_addr_set(dev, addr);
}
if (!is_valid_ether_addr(dev->dev_addr)) {
@@ -1136,14 +1139,12 @@ static int tsi108_set_mac(struct net_device *dev, void *addr)
{
struct tsi108_prv_data *data = netdev_priv(dev);
u32 word1, word2;
- int i;
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (i = 0; i < 6; i++)
- /* +2 is for the offset of the HW addr type */
- dev->dev_addr[i] = ((unsigned char *)addr)[i + 2];
+ /* +2 is for the offset of the HW addr type */
+ eth_hw_addr_set(dev, ((unsigned char *)addr) + 2);
word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24);
diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
index 89a31783fbb4..25739b182ac7 100644
--- a/drivers/net/ethernet/vertexcom/mse102x.c
+++ b/drivers/net/ethernet/vertexcom/mse102x.c
@@ -731,7 +731,7 @@ static int mse102x_probe_spi(struct spi_device *spi)
return 0;
}
-static int mse102x_remove_spi(struct spi_device *spi)
+static void mse102x_remove_spi(struct spi_device *spi)
{
struct mse102x_net *mse = dev_get_drvdata(&spi->dev);
struct mse102x_net_spi *mses = to_mse102x_spi(mse);
@@ -741,8 +741,6 @@ static int mse102x_remove_spi(struct spi_device *spi)
mse102x_remove_device_debugfs(mses);
unregister_netdev(mse->ndev);
-
- return 0;
}
static const struct of_device_id mse102x_match_table[] = {
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 7779a36da3c8..7c52796273a4 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -461,11 +461,9 @@ static int w5100_spi_probe(struct spi_device *spi)
return w5100_probe(&spi->dev, ops, priv_size, mac, spi->irq, -EINVAL);
}
-static int w5100_spi_remove(struct spi_device *spi)
+static void w5100_spi_remove(struct spi_device *spi)
{
w5100_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id w5100_spi_ids[] = {
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index b900ab5aef2a..64c7e26c3b75 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1433,6 +1433,8 @@ static int temac_probe(struct platform_device *pdev)
lp->indirect_lock = devm_kmalloc(&pdev->dev,
sizeof(*lp->indirect_lock),
GFP_KERNEL);
+ if (!lp->indirect_lock)
+ return -ENOMEM;
spin_lock_init(lp->indirect_lock);
}
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 519599480b15..77fa2cb03aca 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1183,7 +1183,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
if (rc) {
dev_err(dev,
"Cannot register network device, aborting\n");
- goto error;
+ goto put_node;
}
dev_info(dev,
@@ -1191,6 +1191,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
(unsigned long __force)ndev->mem_start, lp->base_addr, ndev->irq);
return 0;
+put_node:
+ of_node_put(lp->phy_node);
error:
free_netdev(ndev);
return rc;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index b1fc153125d9..45c3c4a1101b 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -668,11 +668,11 @@ static void sixpack_close(struct tty_struct *tty)
*/
netif_stop_queue(sp->dev);
+ unregister_netdev(sp->dev);
+
del_timer_sync(&sp->tx_t);
del_timer_sync(&sp->resync_t);
- unregister_netdev(sp->dev);
-
/* Free all 6pack frame buffers after unreg. */
kfree(sp->rbuff);
kfree(sp->xbuff);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 6376b8485976..980f2be32f05 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -950,9 +950,7 @@ static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __
ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
if (IS_ERR(ym))
return PTR_ERR(ym);
- if (ym->cmd != SIOCYAMSMCS)
- return -EINVAL;
- if (ym->bitrate > YAM_MAXBITRATE) {
+ if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) {
kfree(ym);
return -EINVAL;
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3646469433b1..fde1c492ca02 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
pcpu_sum = kvmalloc_array(num_possible_cpus(),
sizeof(struct netvsc_ethtool_pcpu_stats),
GFP_KERNEL);
+ if (!pcpu_sum)
+ return;
+
netvsc_get_pcpu_stats(dev, pcpu_sum);
for_each_present_cpu(cpu) {
struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index 7db9cbd0f5de..6afdf1622944 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1304,7 +1304,7 @@ err_alloc_wq:
return ret;
}
-static int adf7242_remove(struct spi_device *spi)
+static void adf7242_remove(struct spi_device *spi)
{
struct adf7242_local *lp = spi_get_drvdata(spi);
@@ -1316,8 +1316,6 @@ static int adf7242_remove(struct spi_device *spi)
ieee802154_unregister_hw(lp->hw);
mutex_destroy(&lp->bmux);
ieee802154_free_hw(lp->hw);
-
- return 0;
}
static const struct of_device_id adf7242_of_match[] = {
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 7d67f41387f5..549d04b5f3d4 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -100,6 +100,7 @@ struct at86rf230_local {
unsigned long cal_timeout;
bool is_tx;
bool is_tx_from_off;
+ bool was_tx;
u8 tx_retry;
struct sk_buff *tx_skb;
struct at86rf230_state_change tx;
@@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context)
if (ctx->free)
kfree(ctx);
- ieee802154_wake_queue(lp->hw);
+ if (lp->was_tx) {
+ lp->was_tx = 0;
+ dev_kfree_skb_any(lp->tx_skb);
+ ieee802154_wake_queue(lp->hw);
+ }
}
static void
@@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context)
struct at86rf230_state_change *ctx = context;
struct at86rf230_local *lp = ctx->lp;
- lp->is_tx = 0;
+ if (lp->is_tx) {
+ lp->was_tx = 1;
+ lp->is_tx = 0;
+ }
+
at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON,
at86rf230_async_error_recover_complete);
}
@@ -1759,7 +1768,7 @@ free_dev:
return rc;
}
-static int at86rf230_remove(struct spi_device *spi)
+static void at86rf230_remove(struct spi_device *spi)
{
struct at86rf230_local *lp = spi_get_drvdata(spi);
@@ -1769,8 +1778,6 @@ static int at86rf230_remove(struct spi_device *spi)
ieee802154_free_hw(lp->hw);
at86rf230_debugfs_remove();
dev_dbg(&spi->dev, "unregistered at86rf230\n");
-
- return 0;
}
static const struct of_device_id at86rf230_of_match[] = {
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index ece6ff6049f6..187cbc634ce8 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -831,7 +831,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl)
finish:;
}
-static int ca8210_remove(struct spi_device *spi_device);
+static void ca8210_remove(struct spi_device *spi_device);
/**
* ca8210_spi_transfer_complete() - Called when a single spi transfer has
@@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete(
status
);
if (status != MAC_TRANSACTION_OVERFLOW) {
+ dev_kfree_skb_any(priv->tx_skb);
ieee802154_wake_queue(priv->hw);
return 0;
}
@@ -2974,8 +2975,8 @@ static void ca8210_hw_setup(struct ieee802154_hw *ca8210_hw)
ca8210_hw->phy->cca.opt = NL802154_CCA_OPT_ENERGY_CARRIER_AND;
ca8210_hw->phy->cca_ed_level = -9800;
ca8210_hw->phy->symbol_duration = 16;
- ca8210_hw->phy->lifs_period = 40;
- ca8210_hw->phy->sifs_period = 12;
+ ca8210_hw->phy->lifs_period = 40 * ca8210_hw->phy->symbol_duration;
+ ca8210_hw->phy->sifs_period = 12 * ca8210_hw->phy->symbol_duration;
ca8210_hw->flags =
IEEE802154_HW_AFILT |
IEEE802154_HW_OMIT_CKSUM |
@@ -3048,7 +3049,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv)
*
* Return: 0 or linux error code
*/
-static int ca8210_remove(struct spi_device *spi_device)
+static void ca8210_remove(struct spi_device *spi_device)
{
struct ca8210_priv *priv;
struct ca8210_platform_data *pdata;
@@ -3088,8 +3089,6 @@ static int ca8210_remove(struct spi_device *spi_device)
if (IS_ENABLED(CONFIG_IEEE802154_CA8210_DEBUGFS))
ca8210_test_interface_clear(priv);
}
-
- return 0;
}
/**
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index 89c046b204e0..1e1f40f628a0 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -1213,7 +1213,7 @@ err_hw_init:
return ret;
}
-static int cc2520_remove(struct spi_device *spi)
+static void cc2520_remove(struct spi_device *spi)
{
struct cc2520_private *priv = spi_get_drvdata(spi);
@@ -1222,8 +1222,6 @@ static int cc2520_remove(struct spi_device *spi)
ieee802154_unregister_hw(priv->hw);
ieee802154_free_hw(priv->hw);
-
- return 0;
}
static const struct spi_device_id cc2520_ids[] = {
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index 8caa61ec718f..36f1c5aa98fc 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -786,6 +786,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev,
goto err_pib;
}
+ pib->channel = 13;
rcu_assign_pointer(phy->pib, pib);
phy->idx = idx;
INIT_LIST_HEAD(&phy->edges);
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index 8dc04e2590b1..c927a5ae0d05 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp)
dev_dbg(printdev(lp), "%s\n", __func__);
phy->symbol_duration = 16;
- phy->lifs_period = 40;
- phy->sifs_period = 12;
+ phy->lifs_period = 40 * phy->symbol_duration;
+ phy->sifs_period = 12 * phy->symbol_duration;
hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
IEEE802154_HW_AFILT |
@@ -1335,7 +1335,7 @@ free_dev:
return ret;
}
-static int mcr20a_remove(struct spi_device *spi)
+static void mcr20a_remove(struct spi_device *spi)
{
struct mcr20a_local *lp = spi_get_drvdata(spi);
@@ -1343,8 +1343,6 @@ static int mcr20a_remove(struct spi_device *spi)
ieee802154_unregister_hw(lp->hw);
ieee802154_free_hw(lp->hw);
-
- return 0;
}
static const struct of_device_id mcr20a_of_match[] = {
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index ff83e00b77af..ee4cfbf2c5cc 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1356,7 +1356,7 @@ err_ret:
return ret;
}
-static int mrf24j40_remove(struct spi_device *spi)
+static void mrf24j40_remove(struct spi_device *spi)
{
struct mrf24j40 *devrec = spi_get_drvdata(spi);
@@ -1366,8 +1366,6 @@ static int mrf24j40_remove(struct spi_device *spi)
ieee802154_free_hw(devrec->hw);
/* TODO: Will ieee802154_free_device() wait until ->xmit() is
* complete? */
-
- return 0;
}
static const struct of_device_id mrf24j40_of_match[] = {
diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
index d037682fb7ad..6782c2cbf542 100644
--- a/drivers/net/ipa/Kconfig
+++ b/drivers/net/ipa/Kconfig
@@ -2,7 +2,9 @@ config QCOM_IPA
tristate "Qualcomm IPA support"
depends on NET && QCOM_SMEM
depends on ARCH_QCOM || COMPILE_TEST
+ depends on INTERCONNECT
depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST)
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select QCOM_MDT_LOADER if ARCH_QCOM
select QCOM_SCM
select QCOM_QMI_HELPERS
diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
index b1c6c0fcb654..f2989aac47a6 100644
--- a/drivers/net/ipa/ipa_power.c
+++ b/drivers/net/ipa/ipa_power.c
@@ -11,6 +11,8 @@
#include <linux/pm_runtime.h>
#include <linux/bitops.h>
+#include "linux/soc/qcom/qcom_aoss.h"
+
#include "ipa.h"
#include "ipa_power.h"
#include "ipa_endpoint.h"
@@ -64,6 +66,7 @@ enum ipa_power_flag {
* struct ipa_power - IPA power management information
* @dev: IPA device pointer
* @core: IPA core clock
+ * @qmp: QMP handle for AOSS communication
* @spinlock: Protects modem TX queue enable/disable
* @flags: Boolean state flags
* @interconnect_count: Number of elements in interconnect[]
@@ -72,6 +75,7 @@ enum ipa_power_flag {
struct ipa_power {
struct device *dev;
struct clk *core;
+ struct qmp *qmp;
spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
u32 interconnect_count;
@@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa)
clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
}
+static int ipa_power_retention_init(struct ipa_power *power)
+{
+ struct qmp *qmp = qmp_get(power->dev);
+
+ if (IS_ERR(qmp)) {
+ if (PTR_ERR(qmp) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
+ /* We assume any other error means it's not defined/needed */
+ qmp = NULL;
+ }
+ power->qmp = qmp;
+
+ return 0;
+}
+
+static void ipa_power_retention_exit(struct ipa_power *power)
+{
+ qmp_put(power->qmp);
+ power->qmp = NULL;
+}
+
+/* Control register retention on power collapse */
+void ipa_power_retention(struct ipa *ipa, bool enable)
+{
+ static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }";
+ struct ipa_power *power = ipa->power;
+ char buf[36]; /* Exactly enough for fmt[]; size a multiple of 4 */
+ int ret;
+
+ if (!power->qmp)
+ return; /* Not needed on this platform */
+
+ (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0');
+
+ ret = qmp_send(power->qmp, buf, sizeof(buf));
+ if (ret)
+ dev_err(power->dev, "error %d sending QMP %sable request\n",
+ ret, enable ? "en" : "dis");
+}
+
int ipa_power_setup(struct ipa *ipa)
{
int ret;
@@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data)
if (ret)
goto err_kfree;
+ ret = ipa_power_retention_init(power);
+ if (ret)
+ goto err_interconnect_exit;
+
pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
pm_runtime_use_autosuspend(dev);
pm_runtime_enable(dev);
return power;
+err_interconnect_exit:
+ ipa_interconnect_exit(power);
err_kfree:
kfree(power);
err_clk_put:
@@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *power)
pm_runtime_disable(dev);
pm_runtime_dont_use_autosuspend(dev);
+ ipa_power_retention_exit(power);
ipa_interconnect_exit(power);
kfree(power);
clk_put(clk);
diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h
index 2151805d7fbb..6f84f057a209 100644
--- a/drivers/net/ipa/ipa_power.h
+++ b/drivers/net/ipa/ipa_power.h
@@ -41,6 +41,13 @@ void ipa_power_modem_queue_wake(struct ipa *ipa);
void ipa_power_modem_queue_active(struct ipa *ipa);
/**
+ * ipa_power_retention() - Control register retention on power collapse
+ * @ipa: IPA pointer
+ * @enable: Whether retention should be enabled or disabled
+ */
+void ipa_power_retention(struct ipa *ipa, bool enable);
+
+/**
* ipa_power_setup() - Set up IPA power management
* @ipa: IPA pointer
*
diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
index 856e55a080a7..fe11910518d9 100644
--- a/drivers/net/ipa/ipa_uc.c
+++ b/drivers/net/ipa/ipa_uc.c
@@ -11,6 +11,7 @@
#include "ipa.h"
#include "ipa_uc.h"
+#include "ipa_power.h"
/**
* DOC: The IPA embedded microcontroller
@@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id)
case IPA_UC_RESPONSE_INIT_COMPLETED:
if (ipa->uc_powered) {
ipa->uc_loaded = true;
+ ipa_power_retention(ipa, true);
pm_runtime_mark_last_busy(dev);
(void)pm_runtime_put_autosuspend(dev);
ipa->uc_powered = false;
@@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa)
ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
+ if (ipa->uc_loaded)
+ ipa_power_retention(ipa, false);
+
if (!ipa->uc_powered)
return;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 16aa3a478e9e..3d0874331763 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3870,6 +3870,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head
struct macsec_dev *macsec = macsec_priv(dev);
struct net_device *real_dev = macsec->real_dev;
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+
+ ops = macsec_get_ops(netdev_priv(dev), &ctx);
+ if (ops) {
+ ctx.secy = &macsec->secy;
+ macsec_offload(ops->mdo_del_secy, &ctx);
+ }
+ }
+
unregister_netdevice_queue(dev, head);
list_del_rcu(&macsec->secys);
macsec_del_dev(macsec);
@@ -3884,18 +3896,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head)
struct net_device *real_dev = macsec->real_dev;
struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
- /* If h/w offloading is available, propagate to the device */
- if (macsec_is_offloaded(macsec)) {
- const struct macsec_ops *ops;
- struct macsec_context ctx;
-
- ops = macsec_get_ops(netdev_priv(dev), &ctx);
- if (ops) {
- ctx.secy = &macsec->secy;
- macsec_offload(ops->mdo_del_secy, &ctx);
- }
- }
-
macsec_common_dellink(dev, head);
if (list_empty(&rxd->secys)) {
@@ -4018,6 +4018,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
!macsec_check_offload(macsec->offload, macsec))
return -EOPNOTSUPP;
+ /* send_sci must be set to true when transmit sci explicitly is set */
+ if ((data && data[IFLA_MACSEC_SCI]) &&
+ (data && data[IFLA_MACSEC_INC_SCI])) {
+ u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]);
+
+ if (!send_sci)
+ return -EINVAL;
+ }
+
if (data && data[IFLA_MACSEC_ICV_LEN])
icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]);
mtu = real_dev->mtu - icv_len - macsec_extra_len(true);
diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c
index eaa6fb3224bc..62723a7faa2d 100644
--- a/drivers/net/mctp/mctp-serial.c
+++ b/drivers/net/mctp/mctp-serial.c
@@ -403,8 +403,16 @@ static void mctp_serial_tty_receive_buf(struct tty_struct *tty,
mctp_serial_push(dev, c[i]);
}
+static void mctp_serial_uninit(struct net_device *ndev)
+{
+ struct mctp_serial *dev = netdev_priv(ndev);
+
+ cancel_work_sync(&dev->tx_work);
+}
+
static const struct net_device_ops mctp_serial_netdev_ops = {
.ndo_start_xmit = mctp_serial_tx,
+ .ndo_uninit = mctp_serial_uninit,
};
static void mctp_serial_setup(struct net_device *ndev)
@@ -483,7 +491,6 @@ static void mctp_serial_close(struct tty_struct *tty)
int idx = dev->idx;
unregister_netdev(dev->netdev);
- cancel_work_sync(&dev->tx_work);
ida_free(&mctp_serial_ida, idx);
}
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index 966c3b4ad59d..e2273588c75b 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -148,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = {
{ .compatible = "aspeed,ast2600-mdio", },
{ },
};
+MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match);
static struct platform_driver aspeed_mdio_driver = {
.driver = {
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 5f4cd24a0241..4eba5a91075c 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -200,7 +200,11 @@ static int ipq_mdio_reset(struct mii_bus *bus)
if (ret)
return ret;
- return clk_prepare_enable(priv->mdio_clk);
+ ret = clk_prepare_enable(priv->mdio_clk);
+ if (ret == 0)
+ mdelay(10);
+
+ return ret;
}
static int ipq4019_mdio_probe(struct platform_device *pdev)
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 7d2abaf2b2c9..64fb76c1e395 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = {
.reg_stride = 4,
};
+static const struct regmap_config mscc_miim_phy_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .name = "phy",
+};
+
int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name,
struct regmap *mii_regmap, int status_offset)
{
@@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev)
}
phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs,
- &mscc_miim_regmap_config);
+ &mscc_miim_phy_regmap_config);
if (IS_ERR(phy_regmap)) {
dev_err(&pdev->dev, "Unable to create phy register regmap\n");
return PTR_ERR(phy_regmap);
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 4300261e2f9e..378ee779061c 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -623,14 +623,14 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data,
if (err)
goto err_fib6_rt_nh_del;
- fib6_event->rt_arr[i]->trap = true;
+ WRITE_ONCE(fib6_event->rt_arr[i]->trap, true);
}
return 0;
err_fib6_rt_nh_del:
for (i--; i >= 0; i--) {
- fib6_event->rt_arr[i]->trap = false;
+ WRITE_ONCE(fib6_event->rt_arr[i]->trap, false);
nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]);
}
return err;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 5b6c0d120e09..29aa811af430 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -1688,19 +1688,19 @@ static int qca808x_read_status(struct phy_device *phydev)
if (ret < 0)
return ret;
- if (phydev->link && phydev->speed == SPEED_2500)
- phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
- else
- phydev->interface = PHY_INTERFACE_MODE_SMII;
-
- /* generate seed as a lower random value to make PHY linked as SLAVE easily,
- * except for master/slave configuration fault detected.
- * the reason for not putting this code into the function link_change_notify is
- * the corner case where the link partner is also the qca8081 PHY and the seed
- * value is configured as the same value, the link can't be up and no link change
- * occurs.
- */
- if (!phydev->link) {
+ if (phydev->link) {
+ if (phydev->speed == SPEED_2500)
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+ else
+ phydev->interface = PHY_INTERFACE_MODE_SGMII;
+ } else {
+ /* generate seed as a lower random value to make PHY linked as SLAVE easily,
+ * except for master/slave configuration fault detected.
+ * the reason for not putting this code into the function link_change_notify is
+ * the corner case where the link partner is also the qca8081 PHY and the seed
+ * value is configured as the same value, the link can't be up and no link change
+ * occurs.
+ */
if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) {
qca808x_phy_ms_seed_enable(phydev, false);
} else {
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index bb5104ae4610..3c683e0e40e9 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -854,6 +854,7 @@ static struct phy_driver broadcom_drivers[] = {
.phy_id_mask = 0xfffffff0,
.name = "Broadcom BCM54616S",
/* PHY_GBIT_FEATURES */
+ .soft_reset = genphy_soft_reset,
.config_init = bcm54xx_config_init,
.config_aneg = bcm54616s_config_aneg,
.config_intr = bcm_phy_config_intr,
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 211b5476a6f5..ce17b2af3218 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -274,7 +274,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
if (err < 0)
return err;
- err = phy_write(phydev, MII_DP83822_MISR1, 0);
+ err = phy_write(phydev, MII_DP83822_MISR2, 0);
if (err < 0)
return err;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index fa71fb7a66b5..2702faf7b0f6 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -553,9 +553,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev)
else
mscr = 0;
- return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
- MII_88E1121_PHY_MSCR_REG,
- MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
+ return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE,
+ MII_88E1121_PHY_MSCR_REG,
+ MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
}
static int m88e1121_config_aneg(struct phy_device *phydev)
@@ -569,11 +569,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
return err;
}
+ changed = err;
+
err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
if (err < 0)
return err;
- changed = err;
+ changed |= err;
err = genphy_config_aneg(phydev);
if (err < 0)
@@ -1213,16 +1215,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev)
{
int err;
- err = genphy_soft_reset(phydev);
+ err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
if (err < 0)
return err;
- err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
+ err = genphy_config_aneg(phydev);
if (err < 0)
return err;
- err = genphy_config_aneg(phydev);
- return 0;
+ return genphy_soft_reset(phydev);
}
static int m88e1118_config_init(struct phy_device *phydev)
@@ -1686,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev)
int err;
/* Suspend the fiber mode first */
- if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
- phydev->supported)) {
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+ phydev->supported)) {
err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
if (err < 0)
goto error;
@@ -1721,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev)
int err;
/* Resume the fiber mode first */
- if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
- phydev->supported)) {
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+ phydev->supported)) {
err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
if (err < 0)
goto error;
diff --git a/drivers/net/phy/mediatek-ge.c b/drivers/net/phy/mediatek-ge.c
index b7a5ae20edd5..68ee434f9dea 100644
--- a/drivers/net/phy/mediatek-ge.c
+++ b/drivers/net/phy/mediatek-ge.c
@@ -55,9 +55,6 @@ static int mt7530_phy_config_init(struct phy_device *phydev)
static int mt7531_phy_config_init(struct phy_device *phydev)
{
- if (phydev->interface != PHY_INTERFACE_MODE_INTERNAL)
- return -EINVAL;
-
mtk_gephy_config_init(phydev);
/* PHY link down power saving enable */
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index 7e7904fee1d9..73f7962a37d3 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -30,8 +30,12 @@
#define INTSRC_LINK_DOWN BIT(4)
#define INTSRC_REMOTE_FAULT BIT(5)
#define INTSRC_ANEG_COMPLETE BIT(6)
+#define INTSRC_ENERGY_DETECT BIT(7)
#define INTSRC_MASK 30
+#define INT_SOURCES (INTSRC_LINK_DOWN | INTSRC_ANEG_COMPLETE | \
+ INTSRC_ENERGY_DETECT)
+
#define BANK_ANALOG_DSP 0
#define BANK_WOL 1
#define BANK_BIST 3
@@ -200,7 +204,6 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev)
static int meson_gxl_config_intr(struct phy_device *phydev)
{
- u16 val;
int ret;
if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
@@ -209,16 +212,9 @@ static int meson_gxl_config_intr(struct phy_device *phydev)
if (ret)
return ret;
- val = INTSRC_ANEG_PR
- | INTSRC_PARALLEL_FAULT
- | INTSRC_ANEG_LP_ACK
- | INTSRC_LINK_DOWN
- | INTSRC_REMOTE_FAULT
- | INTSRC_ANEG_COMPLETE;
- ret = phy_write(phydev, INTSRC_MASK, val);
+ ret = phy_write(phydev, INTSRC_MASK, INT_SOURCES);
} else {
- val = 0;
- ret = phy_write(phydev, INTSRC_MASK, val);
+ ret = phy_write(phydev, INTSRC_MASK, 0);
/* Ack any pending IRQ */
ret = meson_gxl_ack_interrupt(phydev);
@@ -237,10 +233,23 @@ static irqreturn_t meson_gxl_handle_interrupt(struct phy_device *phydev)
return IRQ_NONE;
}
+ irq_status &= INT_SOURCES;
+
if (irq_status == 0)
return IRQ_NONE;
- phy_trigger_machine(phydev);
+ /* Aneg-complete interrupt is used for link-up detection */
+ if (phydev->autoneg == AUTONEG_ENABLE &&
+ irq_status == INTSRC_ENERGY_DETECT)
+ return IRQ_HANDLED;
+
+ /* Give PHY some time before MAC starts sending data. This works
+ * around an issue where network doesn't come up properly.
+ */
+ if (!(irq_status & INTSRC_LINK_DOWN))
+ phy_queue_state_machine(phydev, msecs_to_jiffies(100));
+ else
+ phy_trigger_machine(phydev);
return IRQ_HANDLED;
}
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index ebfeeb3c67c1..7e3017e7a1c0 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
MODULE_AUTHOR("Nagaraju Lakkaraju");
MODULE_LICENSE("Dual MIT/GPL");
+
+MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW);
+MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 74d8e1dc125f..ce0bb5951b81 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev)
phy_driver_is_genphy_10g(phydev))
device_release_driver(&phydev->mdio.dev);
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+
/*
* The phydev might go away on the put_device() below, so avoid
* a use-after-free bug by reading the underlying bus first.
@@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev)
ndev_owner = dev->dev.parent->driver->owner;
if (ndev_owner != bus->owner)
module_put(bus->owner);
-
- /* Assert the reset signal */
- phy_device_reset(phydev, 1);
}
EXPORT_SYMBOL(phy_detach);
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 0c6c0d1843bc..c1512c9925a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
else if (ret < 0)
return ERR_PTR(ret);
+ if (!fwnode_device_is_available(ref.fwnode)) {
+ fwnode_handle_put(ref.fwnode);
+ return NULL;
+ }
+
bus = sfp_bus_get(ref.fwnode);
fwnode_handle_put(ref.fwnode);
if (!bus)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 8b5445a724ce..ff37f8ba6758 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -517,7 +517,7 @@ static int ks8995_probe(struct spi_device *spi)
return 0;
}
-static int ks8995_remove(struct spi_device *spi)
+static void ks8995_remove(struct spi_device *spi)
{
struct ks8995_switch *ks = spi_get_drvdata(spi);
@@ -526,8 +526,6 @@ static int ks8995_remove(struct spi_device *spi)
/* assert reset */
if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio))
gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 1);
-
- return 0;
}
/* ------------------------------------------------------------------------ */
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 1a627ba4b850..a31098981a65 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1468,58 +1468,68 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
u16 hdr_off;
u32 *pkt_hdr;
- /* This check is no longer done by usbnet */
- if (skb->len < dev->net->hard_header_len)
+ /* At the end of the SKB, there's a header telling us how many packets
+ * are bundled into this buffer and where we can find an array of
+ * per-packet metadata (which contains elements encoded into u16).
+ */
+ if (skb->len < 4)
return 0;
-
skb_trim(skb, skb->len - 4);
rx_hdr = get_unaligned_le32(skb_tail_pointer(skb));
-
pkt_cnt = (u16)rx_hdr;
hdr_off = (u16)(rx_hdr >> 16);
+
+ if (pkt_cnt == 0)
+ return 0;
+
+ /* Make sure that the bounds of the metadata array are inside the SKB
+ * (and in front of the counter at the end).
+ */
+ if (pkt_cnt * 2 + hdr_off > skb->len)
+ return 0;
pkt_hdr = (u32 *)(skb->data + hdr_off);
- while (pkt_cnt--) {
+ /* Packets must not overlap the metadata array */
+ skb_trim(skb, hdr_off);
+
+ for (; ; pkt_cnt--, pkt_hdr++) {
u16 pkt_len;
le32_to_cpus(pkt_hdr);
pkt_len = (*pkt_hdr >> 16) & 0x1fff;
- /* Check CRC or runt packet */
- if ((*pkt_hdr & AX_RXHDR_CRC_ERR) ||
- (*pkt_hdr & AX_RXHDR_DROP_ERR)) {
- skb_pull(skb, (pkt_len + 7) & 0xFFF8);
- pkt_hdr++;
- continue;
- }
-
- if (pkt_cnt == 0) {
- skb->len = pkt_len;
- /* Skip IP alignment pseudo header */
- skb_pull(skb, 2);
- skb_set_tail_pointer(skb, skb->len);
- skb->truesize = pkt_len + sizeof(struct sk_buff);
- ax88179_rx_checksum(skb, pkt_hdr);
- return 1;
- }
+ if (pkt_len > skb->len)
+ return 0;
- ax_skb = skb_clone(skb, GFP_ATOMIC);
- if (ax_skb) {
+ /* Check CRC or runt packet */
+ if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) &&
+ pkt_len >= 2 + ETH_HLEN) {
+ bool last = (pkt_cnt == 0);
+
+ if (last) {
+ ax_skb = skb;
+ } else {
+ ax_skb = skb_clone(skb, GFP_ATOMIC);
+ if (!ax_skb)
+ return 0;
+ }
ax_skb->len = pkt_len;
/* Skip IP alignment pseudo header */
skb_pull(ax_skb, 2);
skb_set_tail_pointer(ax_skb, ax_skb->len);
ax_skb->truesize = pkt_len + sizeof(struct sk_buff);
ax88179_rx_checksum(ax_skb, pkt_hdr);
+
+ if (last)
+ return 1;
+
usbnet_skb_return(dev, ax_skb);
- } else {
- return 0;
}
- skb_pull(skb, (pkt_len + 7) & 0xFFF8);
- pkt_hdr++;
+ /* Trim this packet away from the SKB */
+ if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8))
+ return 0;
}
- return 1;
}
static struct sk_buff *
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index eb3817d70f2b..9b4dfa3001d6 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -583,6 +583,11 @@ static const struct usb_device_id products[] = {
.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
.bInterfaceProtocol = USB_CDC_PROTO_NONE
+#define ZAURUS_FAKE_INTERFACE \
+ .bInterfaceClass = USB_CLASS_COMM, \
+ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \
+ .bInterfaceProtocol = USB_CDC_PROTO_NONE
+
/* SA-1100 based Sharp Zaurus ("collie"), or compatible;
* wire-incompatible with true CDC Ethernet implementations.
* (And, it seems, needlessly so...)
@@ -640,6 +645,13 @@ static const struct usb_device_id products[] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x9032, /* SL-6000 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
/* reported with some C860 units */
.idProduct = 0x9050, /* C-860 */
ZAURUS_MASTER_INTERFACE,
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 82bb5ed94c48..c0b8b4aa78f3 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -659,6 +659,11 @@ static const struct usb_device_id mbim_devs[] = {
.driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
},
+ /* Telit FN990 */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
+ },
+
/* default entry */
{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&cdc_mbim_info_zlp,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index e303b522efb5..15f91d691bba 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1715,10 +1715,10 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
{
struct sk_buff *skb;
struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
- int len;
+ unsigned int len;
int nframes;
int x;
- int offset;
+ unsigned int offset;
union {
struct usb_cdc_ncm_ndp16 *ndp16;
struct usb_cdc_ncm_ndp32 *ndp32;
@@ -1790,8 +1790,8 @@ next_ndp:
break;
}
- /* sanity checking */
- if (((offset + len) > skb_in->len) ||
+ /* sanity checking - watch out for integer wrap*/
+ if ((offset > skb_in->len) || (len > skb_in->len - offset) ||
(len > ctx->rx_max) || (len < ETH_HLEN)) {
netif_dbg(dev, rx_err, dev->net,
"invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n",
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index cd33955df0b6..6a769df0b421 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone)
if (tx_buf == NULL)
goto free_rx_urb;
- rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
+ rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
GFP_KERNEL, &rx_urb->transfer_dma);
if (rx_buf == NULL)
goto free_tx_buf;
@@ -146,7 +146,7 @@ error_nomem:
static void ipheth_free_urbs(struct ipheth_device *iphone)
{
- usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
+ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
iphone->rx_urb->transfer_dma);
usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
iphone->tx_urb->transfer_dma);
@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags)
usb_fill_bulk_urb(dev->rx_urb, udev,
usb_rcvbulkpipe(udev, dev->bulk_in),
- dev->rx_buf, IPHETH_BUF_SIZE,
+ dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
ipheth_rcvbulk_callback,
dev);
dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index b8e20a3f2b84..415f16662f88 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1537,11 +1537,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
- if (dev->domain_data.phyirq > 0) {
- local_irq_disable();
- generic_handle_irq(dev->domain_data.phyirq);
- local_irq_enable();
- }
+ if (dev->domain_data.phyirq > 0)
+ generic_handle_irq_safe(dev->domain_data.phyirq);
} else {
netdev_warn(dev->net,
"unexpected interrupt: 0x%08x\n", intdata);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 37e5f3495362..3353e761016d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1400,6 +1400,8 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */
{QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */
{QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
+ {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/
+ {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
{QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bc1e3dd67c04..a0f29482294d 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -84,9 +84,10 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN
| USB_TYPE_VENDOR | USB_RECIP_DEVICE,
0, index, &buf, 4);
- if (unlikely(ret < 0)) {
- netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
- index, ret);
+ if (ret < 0) {
+ if (ret != -ENODEV)
+ netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
+ index, ret);
return ret;
}
@@ -116,7 +117,7 @@ static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT
| USB_TYPE_VENDOR | USB_RECIP_DEVICE,
0, index, &buf, 4);
- if (unlikely(ret < 0))
+ if (ret < 0 && ret != -ENODEV)
netdev_warn(dev->net, "Failed to write reg index 0x%08x: %d\n",
index, ret);
@@ -159,6 +160,9 @@ static int __must_check __smsc95xx_phy_wait_not_busy(struct usbnet *dev,
do {
ret = __smsc95xx_read_reg(dev, MII_ADDR, &val, in_pm);
if (ret < 0) {
+ /* Ignore -ENODEV error during disconnect() */
+ if (ret == -ENODEV)
+ return 0;
netdev_warn(dev->net, "Error reading MII_ACCESS\n");
return ret;
}
@@ -194,7 +198,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
addr = mii_address_cmd(phy_id, idx, MII_READ_ | MII_BUSY_);
ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "Error writing MII_ADDR\n");
+ if (ret != -ENODEV)
+ netdev_warn(dev->net, "Error writing MII_ADDR\n");
goto done;
}
@@ -206,7 +211,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
ret = __smsc95xx_read_reg(dev, MII_DATA, &val, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "Error reading MII_DATA\n");
+ if (ret != -ENODEV)
+ netdev_warn(dev->net, "Error reading MII_DATA\n");
goto done;
}
@@ -214,6 +220,10 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
done:
mutex_unlock(&dev->phy_mutex);
+
+ /* Ignore -ENODEV error during disconnect() */
+ if (ret == -ENODEV)
+ return 0;
return ret;
}
@@ -235,7 +245,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
val = regval;
ret = __smsc95xx_write_reg(dev, MII_DATA, val, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "Error writing MII_DATA\n");
+ if (ret != -ENODEV)
+ netdev_warn(dev->net, "Error writing MII_DATA\n");
goto done;
}
@@ -243,7 +254,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
addr = mii_address_cmd(phy_id, idx, MII_WRITE_ | MII_BUSY_);
ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
if (ret < 0) {
- netdev_warn(dev->net, "Error writing MII_ADDR\n");
+ if (ret != -ENODEV)
+ netdev_warn(dev->net, "Error writing MII_ADDR\n");
goto done;
}
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index b658510cc9a4..5a53e63d33a6 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -413,7 +413,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
/* ignore the CRC length */
len = (skb->data[1] | (skb->data[2] << 8)) - 4;
- if (len > ETH_FRAME_LEN)
+ if (len > ETH_FRAME_LEN || len > skb->len)
return 0;
/* the last packet of current skb */
diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
index 8e717a0b559b..7984f2157d22 100644
--- a/drivers/net/usb/zaurus.c
+++ b/drivers/net/usb/zaurus.c
@@ -256,6 +256,11 @@ static const struct usb_device_id products [] = {
.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
.bInterfaceProtocol = USB_CDC_PROTO_NONE
+#define ZAURUS_FAKE_INTERFACE \
+ .bInterfaceClass = USB_CLASS_COMM, \
+ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \
+ .bInterfaceProtocol = USB_CDC_PROTO_NONE
+
/* SA-1100 based Sharp Zaurus ("collie"), or compatible. */
{
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
@@ -315,6 +320,13 @@ static const struct usb_device_id products [] = {
.driver_info = ZAURUS_PXA_INFO,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x9032, /* SL-6000 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
/* reported with some C860 units */
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 354a963075c5..d29fb9759cc9 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -265,9 +265,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
{
/* Write ptr_ring before reading rx_notify_masked */
smp_mb();
- if (!rq->rx_notify_masked) {
- rq->rx_notify_masked = true;
- napi_schedule(&rq->xdp_napi);
+ if (!READ_ONCE(rq->rx_notify_masked) &&
+ napi_schedule_prep(&rq->xdp_napi)) {
+ WRITE_ONCE(rq->rx_notify_masked, true);
+ __napi_schedule(&rq->xdp_napi);
}
}
@@ -912,8 +913,10 @@ static int veth_poll(struct napi_struct *napi, int budget)
/* Write rx_notify_masked before reading ptr_ring */
smp_store_mb(rq->rx_notify_masked, false);
if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
- rq->rx_notify_masked = true;
- napi_schedule(&rq->xdp_napi);
+ if (napi_schedule_prep(&rq->xdp_napi)) {
+ WRITE_ONCE(rq->rx_notify_masked, true);
+ __napi_schedule(&rq->xdp_napi);
+ }
}
}
diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c
index 8e3b1c717c10..6063552cea9b 100644
--- a/drivers/net/wan/slic_ds26522.c
+++ b/drivers/net/wan/slic_ds26522.c
@@ -194,10 +194,9 @@ static int slic_ds26522_init_configure(struct spi_device *spi)
return 0;
}
-static int slic_ds26522_remove(struct spi_device *spi)
+static void slic_ds26522_remove(struct spi_device *spi)
{
pr_info("DS26522 module uninstalled\n");
- return 0;
}
static int slic_ds26522_probe(struct spi_device *spi)
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index a46067c38bf5..0fad1331303c 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -59,9 +59,7 @@ out:
return ret;
}
-#ifdef CONFIG_PM_SLEEP
-static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
- void *data)
+static int wg_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
{
struct wg_device *wg;
struct wg_peer *peer;
@@ -92,7 +90,24 @@ static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
}
static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
-#endif
+
+static int wg_vm_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct wg_device *wg;
+ struct wg_peer *peer;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list)
+ wg_noise_expire_current_peer_keypairs(peer);
+ mutex_unlock(&wg->device_update_lock);
+ }
+ rtnl_unlock();
+ return 0;
+}
+
+static struct notifier_block vm_notifier = { .notifier_call = wg_vm_notification };
static int wg_stop(struct net_device *dev)
{
@@ -424,16 +439,18 @@ int __init wg_device_init(void)
{
int ret;
-#ifdef CONFIG_PM_SLEEP
ret = register_pm_notifier(&pm_notifier);
if (ret)
return ret;
-#endif
- ret = register_pernet_device(&pernet_ops);
+ ret = register_random_vmfork_notifier(&vm_notifier);
if (ret)
goto error_pm;
+ ret = register_pernet_device(&pernet_ops);
+ if (ret)
+ goto error_vm;
+
ret = rtnl_link_register(&link_ops);
if (ret)
goto error_pernet;
@@ -442,10 +459,10 @@ int __init wg_device_init(void)
error_pernet:
unregister_pernet_device(&pernet_ops);
+error_vm:
+ unregister_random_vmfork_notifier(&vm_notifier);
error_pm:
-#ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&pm_notifier);
-#endif
return ret;
}
@@ -453,8 +470,7 @@ void wg_device_uninit(void)
{
rtnl_link_unregister(&link_ops);
unregister_pernet_device(&pernet_ops);
-#ifdef CONFIG_PM_SLEEP
+ unregister_random_vmfork_notifier(&vm_notifier);
unregister_pm_notifier(&pm_notifier);
-#endif
rcu_barrier();
}
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 62c453a21e49..7c1c2658cb5f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
ath10k_mac_handle_beacon(ar, skb);
if (ieee80211_is_beacon(hdr->frame_control) ||
- ieee80211_is_probe_resp(hdr->frame_control)) {
- struct ieee80211_mgmt *mgmt = (void *)skb->data;
- enum cfg80211_bss_frame_type ftype;
- u8 *ies;
- int ies_ch;
-
+ ieee80211_is_probe_resp(hdr->frame_control))
status->boottime_ns = ktime_get_boottime_ns();
- if (!ar->scan_channel)
- goto drop;
-
- ies = mgmt->u.beacon.variable;
-
- if (ieee80211_is_beacon(mgmt->frame_control))
- ftype = CFG80211_BSS_FTYPE_BEACON;
- else
- ftype = CFG80211_BSS_FTYPE_PRESP;
-
- ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable,
- skb_tail_pointer(skb) - ies,
- sband->band, ftype);
-
- if (ies_ch > 0 && ies_ch != channel) {
- ath10k_dbg(ar, ATH10K_DBG_MGMT,
- "channel mismatched ds channel %d scan channel %d\n",
- ies_ch, channel);
- goto drop;
- }
- }
-
ath10k_dbg(ar, ATH10K_DBG_MGMT,
"event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
skb, skb->len,
@@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
ieee80211_rx_ni(ar->hw, skb);
return 0;
-
-drop:
- dev_kfree_skb(skb);
- return 0;
}
static int freq_to_idx(struct ath10k *ar, int freq)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index 0eb13e5df517..d99140960a82 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -693,7 +693,7 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
{
struct brcmf_fw_item *first = &req->items[0];
struct brcmf_fw *fwctx;
- char *alt_path;
+ char *alt_path = NULL;
int ret;
brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
@@ -712,7 +712,9 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
fwctx->done = fw_cb;
/* First try alternative board-specific path if any */
- alt_path = brcm_alt_fw_path(first->path, fwctx->req->board_type);
+ if (fwctx->req->board_type)
+ alt_path = brcm_alt_fw_path(first->path,
+ fwctx->req->board_type);
if (alt_path) {
ret = request_firmware_nowait(THIS_MODULE, true, alt_path,
fwctx->dev, GFP_KERNEL, fwctx,
diff --git a/drivers/net/wireless/intel/Makefile b/drivers/net/wireless/intel/Makefile
index 1364b0014488..208e73a16051 100644
--- a/drivers/net/wireless/intel/Makefile
+++ b/drivers/net/wireless/intel/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_IPW2200) += ipw2x00/
obj-$(CONFIG_IWLEGACY) += iwlegacy/
obj-$(CONFIG_IWLWIFI) += iwlwifi/
+obj-$(CONFIG_IWLMEI) += iwlwifi/
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index c21c0c68849a..85e704283755 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -80,19 +80,6 @@ config IWLWIFI_OPMODE_MODULAR
comment "WARNING: iwlwifi is useless without IWLDVM or IWLMVM"
depends on IWLDVM=n && IWLMVM=n
-config IWLWIFI_BCAST_FILTERING
- bool "Enable broadcast filtering"
- depends on IWLMVM
- help
- Say Y here to enable default bcast filtering configuration.
-
- Enabling broadcast filtering will drop any incoming wireless
- broadcast frames, except some very specific predefined
- patterns (e.g. incoming arp requests).
-
- If unsure, don't enable this option, as some programs might
- expect incoming broadcasts for their normal operations.
-
menu "Debugging Options"
config IWLWIFI_DEBUG
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 790c96df58cb..c17ab53fcd8f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
*/
#include <linux/uuid.h>
#include "iwl-drv.h"
@@ -888,10 +888,11 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
* only one using version 36, so skip this version entirely.
*/
return IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) >= 38 ||
- IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 ||
- (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
- ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
- CSR_HW_REV_TYPE_7265D));
+ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 &&
+ fwrt->trans->hw_rev != CSR_HW_REV_TYPE_3160) ||
+ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
+ ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+ CSR_HW_REV_TYPE_7265D));
}
IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
index 0703e41403a6..35b8856e511f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -502,11 +502,6 @@ enum iwl_legacy_cmds {
DEBUG_LOG_MSG = 0xf7,
/**
- * @BCAST_FILTER_CMD: &struct iwl_bcast_filter_cmd
- */
- BCAST_FILTER_CMD = 0xcf,
-
- /**
* @MCAST_FILTER_CMD: &struct iwl_mcast_filter_cmd
*/
MCAST_FILTER_CMD = 0xd0,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h b/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h
index dd62a63956b3..e44c70b7c790 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h
@@ -36,92 +36,4 @@ struct iwl_mcast_filter_cmd {
u8 addr_list[0];
} __packed; /* MCAST_FILTERING_CMD_API_S_VER_1 */
-#define MAX_BCAST_FILTERS 8
-#define MAX_BCAST_FILTER_ATTRS 2
-
-/**
- * enum iwl_mvm_bcast_filter_attr_offset - written by fw for each Rx packet
- * @BCAST_FILTER_OFFSET_PAYLOAD_START: offset is from payload start.
- * @BCAST_FILTER_OFFSET_IP_END: offset is from ip header end (i.e.
- * start of ip payload).
- */
-enum iwl_mvm_bcast_filter_attr_offset {
- BCAST_FILTER_OFFSET_PAYLOAD_START = 0,
- BCAST_FILTER_OFFSET_IP_END = 1,
-};
-
-/**
- * struct iwl_fw_bcast_filter_attr - broadcast filter attribute
- * @offset_type: &enum iwl_mvm_bcast_filter_attr_offset.
- * @offset: starting offset of this pattern.
- * @reserved1: reserved
- * @val: value to match - big endian (MSB is the first
- * byte to match from offset pos).
- * @mask: mask to match (big endian).
- */
-struct iwl_fw_bcast_filter_attr {
- u8 offset_type;
- u8 offset;
- __le16 reserved1;
- __be32 val;
- __be32 mask;
-} __packed; /* BCAST_FILTER_ATT_S_VER_1 */
-
-/**
- * enum iwl_mvm_bcast_filter_frame_type - filter frame type
- * @BCAST_FILTER_FRAME_TYPE_ALL: consider all frames.
- * @BCAST_FILTER_FRAME_TYPE_IPV4: consider only ipv4 frames
- */
-enum iwl_mvm_bcast_filter_frame_type {
- BCAST_FILTER_FRAME_TYPE_ALL = 0,
- BCAST_FILTER_FRAME_TYPE_IPV4 = 1,
-};
-
-/**
- * struct iwl_fw_bcast_filter - broadcast filter
- * @discard: discard frame (1) or let it pass (0).
- * @frame_type: &enum iwl_mvm_bcast_filter_frame_type.
- * @reserved1: reserved
- * @num_attrs: number of valid attributes in this filter.
- * @attrs: attributes of this filter. a filter is considered matched
- * only when all its attributes are matched (i.e. AND relationship)
- */
-struct iwl_fw_bcast_filter {
- u8 discard;
- u8 frame_type;
- u8 num_attrs;
- u8 reserved1;
- struct iwl_fw_bcast_filter_attr attrs[MAX_BCAST_FILTER_ATTRS];
-} __packed; /* BCAST_FILTER_S_VER_1 */
-
-/**
- * struct iwl_fw_bcast_mac - per-mac broadcast filtering configuration.
- * @default_discard: default action for this mac (discard (1) / pass (0)).
- * @reserved1: reserved
- * @attached_filters: bitmap of relevant filters for this mac.
- */
-struct iwl_fw_bcast_mac {
- u8 default_discard;
- u8 reserved1;
- __le16 attached_filters;
-} __packed; /* BCAST_MAC_CONTEXT_S_VER_1 */
-
-/**
- * struct iwl_bcast_filter_cmd - broadcast filtering configuration
- * @disable: enable (0) / disable (1)
- * @max_bcast_filters: max number of filters (MAX_BCAST_FILTERS)
- * @max_macs: max number of macs (NUM_MAC_INDEX_DRIVER)
- * @reserved1: reserved
- * @filters: broadcast filters
- * @macs: broadcast filtering configuration per-mac
- */
-struct iwl_bcast_filter_cmd {
- u8 disable;
- u8 max_bcast_filters;
- u8 max_macs;
- u8 reserved1;
- struct iwl_fw_bcast_filter filters[MAX_BCAST_FILTERS];
- struct iwl_fw_bcast_mac macs[NUM_MAC_INDEX_DRIVER];
-} __packed; /* BCAST_FILTERING_HCMD_API_S_VER_1 */
-
#endif /* __iwl_fw_api_filter_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
index 173a6991587b..4a7723eb8c1d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
@@ -752,7 +752,6 @@ struct iwl_lq_cmd {
u8 iwl_fw_rate_idx_to_plcp(int idx);
u32 iwl_new_rate_from_v1(u32 rate_v1);
-u32 iwl_legacy_rate_to_fw_idx(u32 rate_n_flags);
const struct iwl_rate_mcs_info *iwl_rate_mcs(int idx);
const char *iwl_rs_pretty_ant(u8 ant);
const char *iwl_rs_pretty_bw(int bw);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index e4ebda64cd52..efc6540d31af 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -181,7 +181,6 @@ struct iwl_ucode_capa {
* @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version)
* @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD
* @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save
- * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering.
* @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS.
*/
enum iwl_ucode_tlv_flag {
@@ -196,7 +195,6 @@ enum iwl_ucode_tlv_flag {
IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT = BIT(24),
IWL_UCODE_TLV_FLAGS_EBS_SUPPORT = BIT(25),
IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD = BIT(26),
- IWL_UCODE_TLV_FLAGS_BCAST_FILTERING = BIT(29),
};
typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/rs.c b/drivers/net/wireless/intel/iwlwifi/fw/rs.c
index a21c3befd93b..a835214611ce 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/rs.c
@@ -91,6 +91,20 @@ const char *iwl_rs_pretty_bw(int bw)
}
IWL_EXPORT_SYMBOL(iwl_rs_pretty_bw);
+static u32 iwl_legacy_rate_to_fw_idx(u32 rate_n_flags)
+{
+ int rate = rate_n_flags & RATE_LEGACY_RATE_MSK_V1;
+ int idx;
+ bool ofdm = !(rate_n_flags & RATE_MCS_CCK_MSK_V1);
+ int offset = ofdm ? IWL_FIRST_OFDM_RATE : 0;
+ int last = ofdm ? IWL_RATE_COUNT_LEGACY : IWL_FIRST_OFDM_RATE;
+
+ for (idx = offset; idx < last; idx++)
+ if (iwl_fw_rate_idx_to_plcp(idx) == rate)
+ return idx - offset;
+ return IWL_RATE_INVALID;
+}
+
u32 iwl_new_rate_from_v1(u32 rate_v1)
{
u32 rate_v2 = 0;
@@ -144,7 +158,10 @@ u32 iwl_new_rate_from_v1(u32 rate_v1)
} else {
u32 legacy_rate = iwl_legacy_rate_to_fw_idx(rate_v1);
- WARN_ON(legacy_rate < 0);
+ if (WARN_ON_ONCE(legacy_rate == IWL_RATE_INVALID))
+ legacy_rate = (rate_v1 & RATE_MCS_CCK_MSK_V1) ?
+ IWL_FIRST_CCK_RATE : IWL_FIRST_OFDM_RATE;
+
rate_v2 |= legacy_rate;
if (!(rate_v1 & RATE_MCS_CCK_MSK_V1))
rate_v2 |= RATE_MCS_LEGACY_OFDM_MSK;
@@ -172,20 +189,6 @@ u32 iwl_new_rate_from_v1(u32 rate_v1)
}
IWL_EXPORT_SYMBOL(iwl_new_rate_from_v1);
-u32 iwl_legacy_rate_to_fw_idx(u32 rate_n_flags)
-{
- int rate = rate_n_flags & RATE_LEGACY_RATE_MSK_V1;
- int idx;
- bool ofdm = !(rate_n_flags & RATE_MCS_CCK_MSK_V1);
- int offset = ofdm ? IWL_FIRST_OFDM_RATE : 0;
- int last = ofdm ? IWL_RATE_COUNT_LEGACY : IWL_FIRST_OFDM_RATE;
-
- for (idx = offset; idx < last; idx++)
- if (iwl_fw_rate_idx_to_plcp(idx) == rate)
- return idx - offset;
- return -1;
-}
-
int rs_pretty_print_rate(char *buf, int bufsz, const u32 rate)
{
char *type;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index f90d4662c164..8e10ba88afb3 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2022 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2016 Intel Deutschland GmbH
*/
@@ -329,6 +329,7 @@ enum {
#define CSR_HW_REV_TYPE_2x00 (0x0000100)
#define CSR_HW_REV_TYPE_105 (0x0000110)
#define CSR_HW_REV_TYPE_135 (0x0000120)
+#define CSR_HW_REV_TYPE_3160 (0x0000164)
#define CSR_HW_REV_TYPE_7265D (0x0000210)
#define CSR_HW_REV_TYPE_NONE (0x00001F0)
#define CSR_HW_REV_TYPE_QNJ (0x0000360)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 83e3b731ad29..6651e78b39ec 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1707,6 +1707,8 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
out_unbind:
complete(&drv->request_firmware_complete);
device_release_driver(drv->trans->dev);
+ /* drv has just been freed by the release */
+ failure = false;
free:
if (failure)
iwl_dealloc_ucode(drv);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index dd58c8f9aa11..04addf964d83 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -553,8 +553,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
.has_he = true,
.he_cap_elem = {
.mac_cap_info[0] =
- IEEE80211_HE_MAC_CAP0_HTC_HE |
- IEEE80211_HE_MAC_CAP0_TWT_REQ,
+ IEEE80211_HE_MAC_CAP0_HTC_HE,
.mac_cap_info[1] =
IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
diff --git a/drivers/net/wireless/intel/iwlwifi/mei/main.c b/drivers/net/wireless/intel/iwlwifi/mei/main.c
index d9733aaf6f6e..2f7f0f994ca3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mei/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/mei/main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -146,6 +146,7 @@ struct iwl_mei_filters {
* @csme_taking_ownership: true when CSME is taking ownership. Used to remember
* to send CSME_OWNERSHIP_CONFIRMED when the driver completes its down
* flow.
+ * @link_prot_state: true when we are in link protection PASSIVE
* @csa_throttle_end_wk: used when &csa_throttled is true
* @data_q_lock: protects the access to the data queues which are
* accessed without the mutex.
@@ -165,6 +166,7 @@ struct iwl_mei {
bool amt_enabled;
bool csa_throttled;
bool csme_taking_ownership;
+ bool link_prot_state;
struct delayed_work csa_throttle_end_wk;
spinlock_t data_q_lock;
@@ -229,8 +231,6 @@ static int iwl_mei_alloc_shared_mem(struct mei_cl_device *cldev)
if (IS_ERR(mem->ctrl)) {
int ret = PTR_ERR(mem->ctrl);
- dev_err(&cldev->dev, "Couldn't allocate the shared memory: %d\n",
- ret);
mem->ctrl = NULL;
return ret;
@@ -669,6 +669,8 @@ iwl_mei_handle_conn_status(struct mei_cl_device *cldev,
iwl_mei_cache.ops->me_conn_status(iwl_mei_cache.priv, &conn_info);
+ mei->link_prot_state = status->link_prot_state;
+
/*
* Update the Rfkill state in case the host does not own the device:
* if we are in Link Protection, ask to not touch the device, else,
@@ -1663,9 +1665,11 @@ int iwl_mei_register(void *priv, const struct iwl_mei_ops *ops)
mei_cldev_get_drvdata(iwl_mei_global_cldev);
/* we have already a SAP connection */
- if (iwl_mei_is_connected())
+ if (iwl_mei_is_connected()) {
iwl_mei_send_sap_msg(mei->cldev,
SAP_MSG_NOTIF_WIFIDR_UP);
+ ops->rfkill(priv, mei->link_prot_state);
+ }
}
ret = 0;
@@ -1784,6 +1788,8 @@ static void iwl_mei_dbgfs_unregister(struct iwl_mei *mei) {}
#endif /* CONFIG_DEBUG_FS */
+#define ALLOC_SHARED_MEM_RETRY_MAX_NUM 3
+
/*
* iwl_mei_probe - the probe function called by the mei bus enumeration
*
@@ -1795,6 +1801,7 @@ static void iwl_mei_dbgfs_unregister(struct iwl_mei *mei) {}
static int iwl_mei_probe(struct mei_cl_device *cldev,
const struct mei_cl_device_id *id)
{
+ int alloc_retry = ALLOC_SHARED_MEM_RETRY_MAX_NUM;
struct iwl_mei *mei;
int ret;
@@ -1812,15 +1819,31 @@ static int iwl_mei_probe(struct mei_cl_device *cldev,
mei_cldev_set_drvdata(cldev, mei);
mei->cldev = cldev;
- /*
- * The CSME firmware needs to boot the internal WLAN client. Wait here
- * so that the DMA map request will succeed.
- */
- msleep(20);
+ do {
+ ret = iwl_mei_alloc_shared_mem(cldev);
+ if (!ret)
+ break;
+ /*
+ * The CSME firmware needs to boot the internal WLAN client.
+ * This can take time in certain configurations (usually
+ * upon resume and when the whole CSME firmware is shut down
+ * during suspend).
+ *
+ * Wait a bit before retrying and hope we'll succeed next time.
+ */
- ret = iwl_mei_alloc_shared_mem(cldev);
- if (ret)
+ dev_dbg(&cldev->dev,
+ "Couldn't allocate the shared memory: %d, attempt %d / %d\n",
+ ret, alloc_retry, ALLOC_SHARED_MEM_RETRY_MAX_NUM);
+ msleep(100);
+ alloc_retry--;
+ } while (alloc_retry);
+
+ if (ret) {
+ dev_err(&cldev->dev, "Couldn't allocate the shared memory: %d\n",
+ ret);
goto free;
+ }
iwl_mei_init_shared_mem(mei);
diff --git a/drivers/net/wireless/intel/iwlwifi/mei/net.c b/drivers/net/wireless/intel/iwlwifi/mei/net.c
index 5f966af69720..468102a95e1b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mei/net.c
+++ b/drivers/net/wireless/intel/iwlwifi/mei/net.c
@@ -195,8 +195,7 @@ static bool iwl_mei_rx_filter_ipv4(struct sk_buff *skb,
bool match;
if (!pskb_may_pull(skb, skb_network_offset(skb) + sizeof(*iphdr)) ||
- !pskb_may_pull(skb, skb_network_offset(skb) +
- sizeof(ip_hdrlen(skb) - sizeof(*iphdr))))
+ !pskb_may_pull(skb, skb_network_offset(skb) + ip_hdrlen(skb)))
return false;
iphdrlen = ip_hdrlen(skb);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index fb4920b01dbb..445c94adb076 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -5,6 +5,7 @@
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
#include <linux/vmalloc.h>
+#include <linux/err.h>
#include <linux/ieee80211.h>
#include <linux/netdevice.h>
@@ -1369,189 +1370,6 @@ static ssize_t iwl_dbgfs_dbg_time_point_write(struct iwl_mvm *mvm,
return count;
}
-#define ADD_TEXT(...) pos += scnprintf(buf + pos, bufsz - pos, __VA_ARGS__)
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
-static ssize_t iwl_dbgfs_bcast_filters_read(struct file *file,
- char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct iwl_mvm *mvm = file->private_data;
- struct iwl_bcast_filter_cmd cmd;
- const struct iwl_fw_bcast_filter *filter;
- char *buf;
- int bufsz = 1024;
- int i, j, pos = 0;
- ssize_t ret;
-
- buf = kzalloc(bufsz, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- mutex_lock(&mvm->mutex);
- if (!iwl_mvm_bcast_filter_build_cmd(mvm, &cmd)) {
- ADD_TEXT("None\n");
- mutex_unlock(&mvm->mutex);
- goto out;
- }
- mutex_unlock(&mvm->mutex);
-
- for (i = 0; cmd.filters[i].attrs[0].mask; i++) {
- filter = &cmd.filters[i];
-
- ADD_TEXT("Filter [%d]:\n", i);
- ADD_TEXT("\tDiscard=%d\n", filter->discard);
- ADD_TEXT("\tFrame Type: %s\n",
- filter->frame_type ? "IPv4" : "Generic");
-
- for (j = 0; j < ARRAY_SIZE(filter->attrs); j++) {
- const struct iwl_fw_bcast_filter_attr *attr;
-
- attr = &filter->attrs[j];
- if (!attr->mask)
- break;
-
- ADD_TEXT("\tAttr [%d]: offset=%d (from %s), mask=0x%x, value=0x%x reserved=0x%x\n",
- j, attr->offset,
- attr->offset_type ? "IP End" :
- "Payload Start",
- be32_to_cpu(attr->mask),
- be32_to_cpu(attr->val),
- le16_to_cpu(attr->reserved1));
- }
- }
-out:
- ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos);
- kfree(buf);
- return ret;
-}
-
-static ssize_t iwl_dbgfs_bcast_filters_write(struct iwl_mvm *mvm, char *buf,
- size_t count, loff_t *ppos)
-{
- int pos, next_pos;
- struct iwl_fw_bcast_filter filter = {};
- struct iwl_bcast_filter_cmd cmd;
- u32 filter_id, attr_id, mask, value;
- int err = 0;
-
- if (sscanf(buf, "%d %hhi %hhi %n", &filter_id, &filter.discard,
- &filter.frame_type, &pos) != 3)
- return -EINVAL;
-
- if (filter_id >= ARRAY_SIZE(mvm->dbgfs_bcast_filtering.cmd.filters) ||
- filter.frame_type > BCAST_FILTER_FRAME_TYPE_IPV4)
- return -EINVAL;
-
- for (attr_id = 0; attr_id < ARRAY_SIZE(filter.attrs);
- attr_id++) {
- struct iwl_fw_bcast_filter_attr *attr =
- &filter.attrs[attr_id];
-
- if (pos >= count)
- break;
-
- if (sscanf(&buf[pos], "%hhi %hhi %i %i %n",
- &attr->offset, &attr->offset_type,
- &mask, &value, &next_pos) != 4)
- return -EINVAL;
-
- attr->mask = cpu_to_be32(mask);
- attr->val = cpu_to_be32(value);
- if (mask)
- filter.num_attrs++;
-
- pos += next_pos;
- }
-
- mutex_lock(&mvm->mutex);
- memcpy(&mvm->dbgfs_bcast_filtering.cmd.filters[filter_id],
- &filter, sizeof(filter));
-
- /* send updated bcast filtering configuration */
- if (iwl_mvm_firmware_running(mvm) &&
- mvm->dbgfs_bcast_filtering.override &&
- iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
- err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
- sizeof(cmd), &cmd);
- mutex_unlock(&mvm->mutex);
-
- return err ?: count;
-}
-
-static ssize_t iwl_dbgfs_bcast_filters_macs_read(struct file *file,
- char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct iwl_mvm *mvm = file->private_data;
- struct iwl_bcast_filter_cmd cmd;
- char *buf;
- int bufsz = 1024;
- int i, pos = 0;
- ssize_t ret;
-
- buf = kzalloc(bufsz, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- mutex_lock(&mvm->mutex);
- if (!iwl_mvm_bcast_filter_build_cmd(mvm, &cmd)) {
- ADD_TEXT("None\n");
- mutex_unlock(&mvm->mutex);
- goto out;
- }
- mutex_unlock(&mvm->mutex);
-
- for (i = 0; i < ARRAY_SIZE(cmd.macs); i++) {
- const struct iwl_fw_bcast_mac *mac = &cmd.macs[i];
-
- ADD_TEXT("Mac [%d]: discard=%d attached_filters=0x%x\n",
- i, mac->default_discard, mac->attached_filters);
- }
-out:
- ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos);
- kfree(buf);
- return ret;
-}
-
-static ssize_t iwl_dbgfs_bcast_filters_macs_write(struct iwl_mvm *mvm,
- char *buf, size_t count,
- loff_t *ppos)
-{
- struct iwl_bcast_filter_cmd cmd;
- struct iwl_fw_bcast_mac mac = {};
- u32 mac_id, attached_filters;
- int err = 0;
-
- if (!mvm->bcast_filters)
- return -ENOENT;
-
- if (sscanf(buf, "%d %hhi %i", &mac_id, &mac.default_discard,
- &attached_filters) != 3)
- return -EINVAL;
-
- if (mac_id >= ARRAY_SIZE(cmd.macs) ||
- mac.default_discard > 1 ||
- attached_filters >= BIT(ARRAY_SIZE(cmd.filters)))
- return -EINVAL;
-
- mac.attached_filters = cpu_to_le16(attached_filters);
-
- mutex_lock(&mvm->mutex);
- memcpy(&mvm->dbgfs_bcast_filtering.cmd.macs[mac_id],
- &mac, sizeof(mac));
-
- /* send updated bcast filtering configuration */
- if (iwl_mvm_firmware_running(mvm) &&
- mvm->dbgfs_bcast_filtering.override &&
- iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
- err = iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
- sizeof(cmd), &cmd);
- mutex_unlock(&mvm->mutex);
-
- return err ?: count;
-}
-#endif
-
#define MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz) \
_MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz, struct iwl_mvm)
#define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
@@ -1881,11 +1699,6 @@ MVM_DEBUGFS_WRITE_FILE_OPS(inject_beacon_ie_restore, 512);
MVM_DEBUGFS_READ_FILE_OPS(uapsd_noagg_bssids);
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
-MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256);
-MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters_macs, 256);
-#endif
-
#ifdef CONFIG_ACPI
MVM_DEBUGFS_READ_FILE_OPS(sar_geo_profile);
#endif
@@ -2045,7 +1858,6 @@ void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw,
void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm)
{
struct dentry *bcast_dir __maybe_unused;
- char buf[100];
spin_lock_init(&mvm->drv_stats_lock);
@@ -2097,21 +1909,6 @@ void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm)
MVM_DEBUGFS_ADD_FILE(uapsd_noagg_bssids, mvm->debugfs_dir, S_IRUSR);
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
- if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING) {
- bcast_dir = debugfs_create_dir("bcast_filtering",
- mvm->debugfs_dir);
-
- debugfs_create_bool("override", 0600, bcast_dir,
- &mvm->dbgfs_bcast_filtering.override);
-
- MVM_DEBUGFS_ADD_FILE_ALIAS("filters", bcast_filters,
- bcast_dir, 0600);
- MVM_DEBUGFS_ADD_FILE_ALIAS("macs", bcast_filters_macs,
- bcast_dir, 0600);
- }
-#endif
-
#ifdef CONFIG_PM_SLEEP
MVM_DEBUGFS_ADD_FILE(d3_test, mvm->debugfs_dir, 0400);
debugfs_create_bool("d3_wake_sysassert", 0600, mvm->debugfs_dir,
@@ -2142,6 +1939,11 @@ void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm)
* Create a symlink with mac80211. It will be removed when mac80211
* exists (before the opmode exists which removes the target.)
*/
- snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent);
- debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf);
+ if (!IS_ERR(mvm->debugfs_dir)) {
+ char buf[100];
+
+ snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent);
+ debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir,
+ buf);
+ }
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 6f4690e56a46..ae589b3b8c46 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1741,7 +1741,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
ret = iwl_mvm_sar_init(mvm);
if (ret == 0)
ret = iwl_mvm_sar_geo_init(mvm);
- else if (ret < 0)
+ if (ret < 0)
goto error;
ret = iwl_mvm_sgom_init(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 65f4fe3ef504..709a3df57b10 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -55,79 +55,6 @@ static const struct ieee80211_iface_combination iwl_mvm_iface_combinations[] = {
},
};
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
-/*
- * Use the reserved field to indicate magic values.
- * these values will only be used internally by the driver,
- * and won't make it to the fw (reserved will be 0).
- * BC_FILTER_MAGIC_IP - configure the val of this attribute to
- * be the vif's ip address. in case there is not a single
- * ip address (0, or more than 1), this attribute will
- * be skipped.
- * BC_FILTER_MAGIC_MAC - set the val of this attribute to
- * the LSB bytes of the vif's mac address
- */
-enum {
- BC_FILTER_MAGIC_NONE = 0,
- BC_FILTER_MAGIC_IP,
- BC_FILTER_MAGIC_MAC,
-};
-
-static const struct iwl_fw_bcast_filter iwl_mvm_default_bcast_filters[] = {
- {
- /* arp */
- .discard = 0,
- .frame_type = BCAST_FILTER_FRAME_TYPE_ALL,
- .attrs = {
- {
- /* frame type - arp, hw type - ethernet */
- .offset_type =
- BCAST_FILTER_OFFSET_PAYLOAD_START,
- .offset = sizeof(rfc1042_header),
- .val = cpu_to_be32(0x08060001),
- .mask = cpu_to_be32(0xffffffff),
- },
- {
- /* arp dest ip */
- .offset_type =
- BCAST_FILTER_OFFSET_PAYLOAD_START,
- .offset = sizeof(rfc1042_header) + 2 +
- sizeof(struct arphdr) +
- ETH_ALEN + sizeof(__be32) +
- ETH_ALEN,
- .mask = cpu_to_be32(0xffffffff),
- /* mark it as special field */
- .reserved1 = cpu_to_le16(BC_FILTER_MAGIC_IP),
- },
- },
- },
- {
- /* dhcp offer bcast */
- .discard = 0,
- .frame_type = BCAST_FILTER_FRAME_TYPE_IPV4,
- .attrs = {
- {
- /* udp dest port - 68 (bootp client)*/
- .offset_type = BCAST_FILTER_OFFSET_IP_END,
- .offset = offsetof(struct udphdr, dest),
- .val = cpu_to_be32(0x00440000),
- .mask = cpu_to_be32(0xffff0000),
- },
- {
- /* dhcp - lsb bytes of client hw address */
- .offset_type = BCAST_FILTER_OFFSET_IP_END,
- .offset = 38,
- .mask = cpu_to_be32(0xffffffff),
- /* mark it as special field */
- .reserved1 = cpu_to_le16(BC_FILTER_MAGIC_MAC),
- },
- },
- },
- /* last filter must be empty */
- {},
-};
-#endif
-
static const struct cfg80211_pmsr_capabilities iwl_mvm_pmsr_capa = {
.max_peers = IWL_MVM_TOF_MAX_APS,
.report_ap_tsf = 1,
@@ -299,7 +226,6 @@ static const u8 he_if_types_ext_capa_sta[] = {
[0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING,
[2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT,
[7] = WLAN_EXT_CAPA8_OPMODE_NOTIF,
- [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT,
};
static const struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = {
@@ -693,11 +619,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
}
#endif
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
- /* assign default bcast filtering configuration */
- mvm->bcast_filters = iwl_mvm_default_bcast_filters;
-#endif
-
ret = iwl_mvm_leds_init(mvm);
if (ret)
return ret;
@@ -1853,162 +1774,6 @@ static void iwl_mvm_config_iface_filter(struct ieee80211_hw *hw,
mutex_unlock(&mvm->mutex);
}
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
-struct iwl_bcast_iter_data {
- struct iwl_mvm *mvm;
- struct iwl_bcast_filter_cmd *cmd;
- u8 current_filter;
-};
-
-static void
-iwl_mvm_set_bcast_filter(struct ieee80211_vif *vif,
- const struct iwl_fw_bcast_filter *in_filter,
- struct iwl_fw_bcast_filter *out_filter)
-{
- struct iwl_fw_bcast_filter_attr *attr;
- int i;
-
- memcpy(out_filter, in_filter, sizeof(*out_filter));
-
- for (i = 0; i < ARRAY_SIZE(out_filter->attrs); i++) {
- attr = &out_filter->attrs[i];
-
- if (!attr->mask)
- break;
-
- switch (attr->reserved1) {
- case cpu_to_le16(BC_FILTER_MAGIC_IP):
- if (vif->bss_conf.arp_addr_cnt != 1) {
- attr->mask = 0;
- continue;
- }
-
- attr->val = vif->bss_conf.arp_addr_list[0];
- break;
- case cpu_to_le16(BC_FILTER_MAGIC_MAC):
- attr->val = *(__be32 *)&vif->addr[2];
- break;
- default:
- break;
- }
- attr->reserved1 = 0;
- out_filter->num_attrs++;
- }
-}
-
-static void iwl_mvm_bcast_filter_iterator(void *_data, u8 *mac,
- struct ieee80211_vif *vif)
-{
- struct iwl_bcast_iter_data *data = _data;
- struct iwl_mvm *mvm = data->mvm;
- struct iwl_bcast_filter_cmd *cmd = data->cmd;
- struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- struct iwl_fw_bcast_mac *bcast_mac;
- int i;
-
- if (WARN_ON(mvmvif->id >= ARRAY_SIZE(cmd->macs)))
- return;
-
- bcast_mac = &cmd->macs[mvmvif->id];
-
- /*
- * enable filtering only for associated stations, but not for P2P
- * Clients
- */
- if (vif->type != NL80211_IFTYPE_STATION || vif->p2p ||
- !vif->bss_conf.assoc)
- return;
-
- bcast_mac->default_discard = 1;
-
- /* copy all configured filters */
- for (i = 0; mvm->bcast_filters[i].attrs[0].mask; i++) {
- /*
- * Make sure we don't exceed our filters limit.
- * if there is still a valid filter to be configured,
- * be on the safe side and just allow bcast for this mac.
- */
- if (WARN_ON_ONCE(data->current_filter >=
- ARRAY_SIZE(cmd->filters))) {
- bcast_mac->default_discard = 0;
- bcast_mac->attached_filters = 0;
- break;
- }
-
- iwl_mvm_set_bcast_filter(vif,
- &mvm->bcast_filters[i],
- &cmd->filters[data->current_filter]);
-
- /* skip current filter if it contains no attributes */
- if (!cmd->filters[data->current_filter].num_attrs)
- continue;
-
- /* attach the filter to current mac */
- bcast_mac->attached_filters |=
- cpu_to_le16(BIT(data->current_filter));
-
- data->current_filter++;
- }
-}
-
-bool iwl_mvm_bcast_filter_build_cmd(struct iwl_mvm *mvm,
- struct iwl_bcast_filter_cmd *cmd)
-{
- struct iwl_bcast_iter_data iter_data = {
- .mvm = mvm,
- .cmd = cmd,
- };
-
- if (IWL_MVM_FW_BCAST_FILTER_PASS_ALL)
- return false;
-
- memset(cmd, 0, sizeof(*cmd));
- cmd->max_bcast_filters = ARRAY_SIZE(cmd->filters);
- cmd->max_macs = ARRAY_SIZE(cmd->macs);
-
-#ifdef CONFIG_IWLWIFI_DEBUGFS
- /* use debugfs filters/macs if override is configured */
- if (mvm->dbgfs_bcast_filtering.override) {
- memcpy(cmd->filters, &mvm->dbgfs_bcast_filtering.cmd.filters,
- sizeof(cmd->filters));
- memcpy(cmd->macs, &mvm->dbgfs_bcast_filtering.cmd.macs,
- sizeof(cmd->macs));
- return true;
- }
-#endif
-
- /* if no filters are configured, do nothing */
- if (!mvm->bcast_filters)
- return false;
-
- /* configure and attach these filters for each associated sta vif */
- ieee80211_iterate_active_interfaces(
- mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
- iwl_mvm_bcast_filter_iterator, &iter_data);
-
- return true;
-}
-
-static int iwl_mvm_configure_bcast_filter(struct iwl_mvm *mvm)
-{
- struct iwl_bcast_filter_cmd cmd;
-
- if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING))
- return 0;
-
- if (!iwl_mvm_bcast_filter_build_cmd(mvm, &cmd))
- return 0;
-
- return iwl_mvm_send_cmd_pdu(mvm, BCAST_FILTER_CMD, 0,
- sizeof(cmd), &cmd);
-}
-#else
-static inline int iwl_mvm_configure_bcast_filter(struct iwl_mvm *mvm)
-{
- return 0;
-}
-#endif
-
static int iwl_mvm_update_mu_groups(struct iwl_mvm *mvm,
struct ieee80211_vif *vif)
{
@@ -2520,7 +2285,6 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
}
iwl_mvm_recalc_multicast(mvm);
- iwl_mvm_configure_bcast_filter(mvm);
/* reset rssi values */
mvmvif->bf_data.ave_beacon_signal = 0;
@@ -2570,11 +2334,6 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
}
}
- if (changes & BSS_CHANGED_ARP_FILTER) {
- IWL_DEBUG_MAC80211(mvm, "arp filter changed\n");
- iwl_mvm_configure_bcast_filter(mvm);
- }
-
if (changes & BSS_CHANGED_BANDWIDTH)
iwl_mvm_apply_fw_smps_request(vif);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 1dcbb0eb63c3..d78f40730594 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -884,17 +884,6 @@ struct iwl_mvm {
/* rx chain antennas set through debugfs for the scan command */
u8 scan_rx_ant;
-#ifdef CONFIG_IWLWIFI_BCAST_FILTERING
- /* broadcast filters to configure for each associated station */
- const struct iwl_fw_bcast_filter *bcast_filters;
-#ifdef CONFIG_IWLWIFI_DEBUGFS
- struct {
- bool override;
- struct iwl_bcast_filter_cmd cmd;
- } dbgfs_bcast_filtering;
-#endif
-#endif
-
/* Internal station */
struct iwl_mvm_int_sta aux_sta;
struct iwl_mvm_int_sta snif_sta;
@@ -1593,8 +1582,6 @@ int iwl_mvm_up(struct iwl_mvm *mvm);
int iwl_mvm_load_d3_fw(struct iwl_mvm *mvm);
int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm);
-bool iwl_mvm_bcast_filter_build_cmd(struct iwl_mvm *mvm,
- struct iwl_bcast_filter_cmd *cmd);
/*
* FW notifications / CMD responses handlers
@@ -2225,7 +2212,7 @@ static inline void iwl_mvm_mei_device_down(struct iwl_mvm *mvm)
static inline void iwl_mvm_mei_set_sw_rfkill_state(struct iwl_mvm *mvm)
{
bool sw_rfkill =
- mvm->hw_registered ? rfkill_blocked(mvm->hw->wiphy->rfkill) : false;
+ mvm->hw_registered ? rfkill_soft_blocked(mvm->hw->wiphy->rfkill) : false;
if (mvm->mei_registered)
iwl_mei_set_rfkill_state(iwl_mvm_is_radio_killed(mvm),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 87630d38dc52..1f8b97995b94 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -469,7 +469,6 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = {
HCMD_NAME(MCC_CHUB_UPDATE_CMD),
HCMD_NAME(MARKER_CMD),
HCMD_NAME(BT_PROFILE_NOTIFICATION),
- HCMD_NAME(BCAST_FILTER_CMD),
HCMD_NAME(MCAST_FILTER_CMD),
HCMD_NAME(REPLY_SF_CFG_CMD),
HCMD_NAME(REPLY_BEACON_FILTERING_CMD),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 6fa2c12f7955..9213f8518f10 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1427,7 +1427,7 @@ static void iwl_mvm_hwrate_to_tx_status(const struct iwl_fw *fw,
struct ieee80211_tx_rate *r = &info->status.rates[0];
if (iwl_fw_lookup_notif_ver(fw, LONG_GROUP,
- TX_CMD, 0) > 6)
+ TX_CMD, 0) <= 6)
rate_n_flags = iwl_new_rate_from_v1(rate_n_flags);
info->status.antenna =
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c
index 78450366312b..080a1587caa5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c
@@ -71,12 +71,13 @@ static int iwl_mvm_vendor_host_get_ownership(struct wiphy *wiphy,
{
struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ int ret;
mutex_lock(&mvm->mutex);
- iwl_mvm_mei_get_ownership(mvm);
+ ret = iwl_mvm_mei_get_ownership(mvm);
mutex_unlock(&mvm->mutex);
- return 0;
+ return ret;
}
static const struct wiphy_vendor_command iwl_mvm_vendor_commands[] = {
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 0febdcacbd42..94f40c4d2421 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -385,8 +385,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
/* This may fail if AMT took ownership of the device */
if (iwl_pcie_prepare_card_hw(trans)) {
IWL_WARN(trans, "Exit HW not ready\n");
- ret = -EIO;
- goto out;
+ return -EIO;
}
iwl_enable_rfkill_int(trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index a63386a01232..ef14584fc0a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1329,8 +1329,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
/* This may fail if AMT took ownership of the device */
if (iwl_pcie_prepare_card_hw(trans)) {
IWL_WARN(trans, "Exit HW not ready\n");
- ret = -EIO;
- goto out;
+ return -EIO;
}
iwl_enable_rfkill_int(trans);
diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c
index ab0fe8565851..f99b7ba69fc3 100644
--- a/drivers/net/wireless/intersil/p54/p54spi.c
+++ b/drivers/net/wireless/intersil/p54/p54spi.c
@@ -669,7 +669,7 @@ err_free:
return ret;
}
-static int p54spi_remove(struct spi_device *spi)
+static void p54spi_remove(struct spi_device *spi)
{
struct p54s_priv *priv = spi_get_drvdata(spi);
@@ -684,8 +684,6 @@ static int p54spi_remove(struct spi_device *spi)
mutex_destroy(&priv->mutex);
p54_free_common(priv->hw);
-
- return 0;
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 8d54f9face2f..fc5725f6daee 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2336,6 +2336,15 @@ static void hw_scan_work(struct work_struct *work)
if (req->ie_len)
skb_put_data(probe, req->ie, req->ie_len);
+ if (!ieee80211_tx_prepare_skb(hwsim->hw,
+ hwsim->hw_scan_vif,
+ probe,
+ hwsim->tmp_chan->band,
+ NULL)) {
+ kfree_skb(probe);
+ continue;
+ }
+
local_bh_disable();
mac80211_hwsim_tx_frame(hwsim->hw, probe,
hwsim->tmp_chan);
@@ -3770,6 +3779,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
}
txi->flags |= IEEE80211_TX_STAT_ACK;
}
+
+ if (hwsim_flags & HWSIM_TX_CTL_NO_ACK)
+ txi->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
+
ieee80211_tx_status_irqsafe(data2->hw, skb);
return 0;
out:
diff --git a/drivers/net/wireless/marvell/libertas/if_spi.c b/drivers/net/wireless/marvell/libertas/if_spi.c
index cd9f8ecf171f..ff1c7ec8c450 100644
--- a/drivers/net/wireless/marvell/libertas/if_spi.c
+++ b/drivers/net/wireless/marvell/libertas/if_spi.c
@@ -1195,7 +1195,7 @@ out:
return err;
}
-static int libertas_spi_remove(struct spi_device *spi)
+static void libertas_spi_remove(struct spi_device *spi)
{
struct if_spi_card *card = spi_get_drvdata(spi);
struct lbs_private *priv = card->priv;
@@ -1212,8 +1212,6 @@ static int libertas_spi_remove(struct spi_device *spi)
if (card->pdata->teardown)
card->pdata->teardown(spi);
free_if_spi_card(card);
-
- return 0;
}
static int if_spi_suspend(struct device *dev)
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 2c2ed4b09efd..d2db52289399 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -240,7 +240,7 @@ free:
return ret;
}
-static int wilc_bus_remove(struct spi_device *spi)
+static void wilc_bus_remove(struct spi_device *spi)
{
struct wilc *wilc = spi_get_drvdata(spi);
struct wilc_spi *spi_priv = wilc->bus_data;
@@ -248,8 +248,6 @@ static int wilc_bus_remove(struct spi_device *spi)
clk_disable_unprepare(wilc->rtc_clk);
wilc_netdev_cleanup(wilc);
kfree(spi_priv);
-
- return 0;
}
static const struct of_device_id wilc_of_match[] = {
diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c
index 271ed2ce2d7f..fe0d220da44d 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_spi.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c
@@ -423,7 +423,7 @@ static int cw1200_spi_probe(struct spi_device *func)
}
/* Disconnect Function to be called by SPI stack when device is disconnected */
-static int cw1200_spi_disconnect(struct spi_device *func)
+static void cw1200_spi_disconnect(struct spi_device *func)
{
struct hwbus_priv *self = spi_get_drvdata(func);
@@ -435,8 +435,6 @@ static int cw1200_spi_disconnect(struct spi_device *func)
}
}
cw1200_spi_off(dev_get_platdata(&func->dev));
-
- return 0;
}
static int __maybe_unused cw1200_spi_suspend(struct device *dev)
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index 5b894bd6237e..9df38726e8b0 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -327,14 +327,12 @@ out_free:
return ret;
}
-static int wl1251_spi_remove(struct spi_device *spi)
+static void wl1251_spi_remove(struct spi_device *spi)
{
struct wl1251 *wl = spi_get_drvdata(spi);
wl1251_free_hw(wl);
regulator_disable(wl->vio);
-
- return 0;
}
static struct spi_driver wl1251_spi_driver = {
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 354a7e1c3315..7eae1ec2eb2b 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -546,13 +546,11 @@ out_dev_put:
return ret;
}
-static int wl1271_remove(struct spi_device *spi)
+static void wl1271_remove(struct spi_device *spi)
{
struct wl12xx_spi_glue *glue = spi_get_drvdata(spi);
platform_device_unregister(glue->core);
-
- return 0;
}
static struct spi_driver wl1271_spi_driver = {
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d24b7a7993aa..990360d75cb6 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -256,6 +256,7 @@ static void backend_disconnect(struct backend_info *be)
unsigned int queue_index;
xen_unregister_watchers(vif);
+ xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
#ifdef CONFIG_DEBUG_FS
xenvif_debugfs_delif(vif);
#endif /* CONFIG_DEBUG_FS */
@@ -675,7 +676,6 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
/* Not interested in this watch anymore. */
unregister_hotplug_status_watch(be);
- xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
}
kfree(str);
}
@@ -824,15 +824,11 @@ static void connect(struct backend_info *be)
xenvif_carrier_on(be->vif);
unregister_hotplug_status_watch(be);
- if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) {
- err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
- NULL, hotplug_status_changed,
- "%s/%s", dev->nodename,
- "hotplug-status");
- if (err)
- goto err;
+ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL,
+ hotplug_status_changed,
+ "%s/%s", dev->nodename, "hotplug-status");
+ if (!err)
be->have_hotplug_status_watch = 1;
- }
netif_tx_wake_all_queues(be->vif->dev);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8b18246ad999..daa4e6106aac 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -424,14 +424,12 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue)
queue->tx_link[id] = TX_LINK_NONE;
skb = queue->tx_skbs[id];
queue->tx_skbs[id] = NULL;
- if (unlikely(gnttab_query_foreign_access(
- queue->grant_tx_ref[id]) != 0)) {
+ if (unlikely(!gnttab_end_foreign_access_ref(
+ queue->grant_tx_ref[id], GNTMAP_readonly))) {
dev_alert(dev,
"Grant still in use by backend domain\n");
goto err;
}
- gnttab_end_foreign_access_ref(
- queue->grant_tx_ref[id], GNTMAP_readonly);
gnttab_release_grant_reference(
&queue->gref_tx_head, queue->grant_tx_ref[id]);
queue->grant_tx_ref[id] = GRANT_INVALID_REF;
@@ -842,6 +840,28 @@ static int xennet_close(struct net_device *dev)
return 0;
}
+static void xennet_destroy_queues(struct netfront_info *info)
+{
+ unsigned int i;
+
+ for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
+ struct netfront_queue *queue = &info->queues[i];
+
+ if (netif_running(info->netdev))
+ napi_disable(&queue->napi);
+ netif_napi_del(&queue->napi);
+ }
+
+ kfree(info->queues);
+ info->queues = NULL;
+}
+
+static void xennet_uninit(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ xennet_destroy_queues(np);
+}
+
static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
{
unsigned long flags;
@@ -968,7 +988,6 @@ static int xennet_get_responses(struct netfront_queue *queue,
struct device *dev = &queue->info->netdev->dev;
struct bpf_prog *xdp_prog;
struct xdp_buff xdp;
- unsigned long ret;
int slots = 1;
int err = 0;
u32 verdict;
@@ -1010,8 +1029,13 @@ static int xennet_get_responses(struct netfront_queue *queue,
goto next;
}
- ret = gnttab_end_foreign_access_ref(ref, 0);
- BUG_ON(!ret);
+ if (!gnttab_end_foreign_access_ref(ref, 0)) {
+ dev_alert(dev,
+ "Grant still in use by backend domain\n");
+ queue->info->broken = true;
+ dev_alert(dev, "Disabled for further use\n");
+ return -EINVAL;
+ }
gnttab_release_grant_reference(&queue->gref_rx_head, ref);
@@ -1232,6 +1256,10 @@ static int xennet_poll(struct napi_struct *napi, int budget)
&need_xdp_flush);
if (unlikely(err)) {
+ if (queue->info->broken) {
+ spin_unlock(&queue->rx_lock);
+ return 0;
+ }
err:
while ((skb = __skb_dequeue(&tmpq)))
__skb_queue_tail(&errq, skb);
@@ -1611,6 +1639,7 @@ static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
static const struct net_device_ops xennet_netdev_ops = {
+ .ndo_uninit = xennet_uninit,
.ndo_open = xennet_open,
.ndo_stop = xennet_close,
.ndo_start_xmit = xennet_start_xmit,
@@ -1895,7 +1924,7 @@ static int setup_netfront(struct xenbus_device *dev,
struct netfront_queue *queue, unsigned int feature_split_evtchn)
{
struct xen_netif_tx_sring *txs;
- struct xen_netif_rx_sring *rxs;
+ struct xen_netif_rx_sring *rxs = NULL;
grant_ref_t gref;
int err;
@@ -1915,21 +1944,21 @@ static int setup_netfront(struct xenbus_device *dev,
err = xenbus_grant_ring(dev, txs, 1, &gref);
if (err < 0)
- goto grant_tx_ring_fail;
+ goto fail;
queue->tx_ring_ref = gref;
rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
if (!rxs) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err, "allocating rx ring page");
- goto alloc_rx_ring_fail;
+ goto fail;
}
SHARED_RING_INIT(rxs);
FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
err = xenbus_grant_ring(dev, rxs, 1, &gref);
if (err < 0)
- goto grant_rx_ring_fail;
+ goto fail;
queue->rx_ring_ref = gref;
if (feature_split_evtchn)
@@ -1942,22 +1971,28 @@ static int setup_netfront(struct xenbus_device *dev,
err = setup_netfront_single(queue);
if (err)
- goto alloc_evtchn_fail;
+ goto fail;
return 0;
/* If we fail to setup netfront, it is safe to just revoke access to
* granted pages because backend is not accessing it at this point.
*/
-alloc_evtchn_fail:
- gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
-grant_rx_ring_fail:
- free_page((unsigned long)rxs);
-alloc_rx_ring_fail:
- gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
-grant_tx_ring_fail:
- free_page((unsigned long)txs);
-fail:
+ fail:
+ if (queue->rx_ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(queue->rx_ring_ref, 0,
+ (unsigned long)rxs);
+ queue->rx_ring_ref = GRANT_INVALID_REF;
+ } else {
+ free_page((unsigned long)rxs);
+ }
+ if (queue->tx_ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(queue->tx_ring_ref, 0,
+ (unsigned long)txs);
+ queue->tx_ring_ref = GRANT_INVALID_REF;
+ } else {
+ free_page((unsigned long)txs);
+ }
return err;
}
@@ -2103,22 +2138,6 @@ error:
return err;
}
-static void xennet_destroy_queues(struct netfront_info *info)
-{
- unsigned int i;
-
- for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
- struct netfront_queue *queue = &info->queues[i];
-
- if (netif_running(info->netdev))
- napi_disable(&queue->napi);
- netif_napi_del(&queue->napi);
- }
-
- kfree(info->queues);
- info->queues = NULL;
-}
-
static int xennet_create_page_pool(struct netfront_queue *queue)
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 5b833a9a83f8..a38e2fcdfd39 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -174,12 +174,11 @@ static int nfcmrvl_spi_probe(struct spi_device *spi)
return 0;
}
-static int nfcmrvl_spi_remove(struct spi_device *spi)
+static void nfcmrvl_spi_remove(struct spi_device *spi)
{
struct nfcmrvl_spi_drv_data *drv_data = spi_get_drvdata(spi);
nfcmrvl_nci_unregister_dev(drv_data->priv);
- return 0;
}
static const struct of_device_id of_nfcmrvl_spi_match[] __maybe_unused = {
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index d7db1a0e6be1..00d8ea6dcb5d 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -1612,7 +1612,9 @@ free_nfc_dev:
nfc_digital_free_device(dev->nfc_digital_dev);
error:
+ usb_kill_urb(dev->in_urb);
usb_free_urb(dev->in_urb);
+ usb_kill_urb(dev->out_urb);
usb_free_urb(dev->out_urb);
usb_put_dev(dev->udev);
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 4e723992e74c..169eacc0a32a 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -263,13 +263,11 @@ static int st_nci_spi_probe(struct spi_device *dev)
return r;
}
-static int st_nci_spi_remove(struct spi_device *dev)
+static void st_nci_spi_remove(struct spi_device *dev)
{
struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
ndlc_remove(phy->ndlc);
-
- return 0;
}
static struct spi_device_id st_nci_spi_id_table[] = {
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index b23f47936473..ed704bb77226 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1198,7 +1198,7 @@ err_disable_regulator:
return ret;
}
-static int st95hf_remove(struct spi_device *nfc_spi_dev)
+static void st95hf_remove(struct spi_device *nfc_spi_dev)
{
int result = 0;
unsigned char reset_cmd = ST95HF_COMMAND_RESET;
@@ -1236,8 +1236,6 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev)
/* disable regulator */
if (stcontext->st95hf_supply)
regulator_disable(stcontext->st95hf_supply);
-
- return 0;
}
/* Register as SPI protocol driver */
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 29ca9c328df2..21d68664fe08 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -2144,7 +2144,7 @@ err_destroy_lock:
return ret;
}
-static int trf7970a_remove(struct spi_device *spi)
+static void trf7970a_remove(struct spi_device *spi)
{
struct trf7970a *trf = spi_get_drvdata(spi);
@@ -2160,8 +2160,6 @@ static int trf7970a_remove(struct spi_device *spi)
regulator_disable(trf->regulator);
mutex_destroy(&trf->lock);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c
index fede05151f69..4081fc538ff4 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen4.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c
@@ -168,6 +168,18 @@ static enum ntb_topo gen4_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
return NTB_TOPO_NONE;
}
+static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
+{
+ switch (ppd & SPR_PPD_TOPO_MASK) {
+ case SPR_PPD_TOPO_B2B_USD:
+ return NTB_TOPO_B2B_USD;
+ case SPR_PPD_TOPO_B2B_DSD:
+ return NTB_TOPO_B2B_DSD;
+ }
+
+ return NTB_TOPO_NONE;
+}
+
int gen4_init_dev(struct intel_ntb_dev *ndev)
{
struct pci_dev *pdev = ndev->ntb.pdev;
@@ -183,7 +195,10 @@ int gen4_init_dev(struct intel_ntb_dev *ndev)
}
ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET);
- ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1);
+ if (pdev_is_ICX(pdev))
+ ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1);
+ else if (pdev_is_SPR(pdev))
+ ndev->ntb.topo = spr_ppd_topo(ndev, ppd1);
dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1,
ntb_topo_string(ndev->ntb.topo));
if (ndev->ntb.topo == NTB_TOPO_NONE)
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h
index 3fcd3fdce9ed..f91323eaf5ce 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen4.h
+++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h
@@ -49,10 +49,14 @@
#define GEN4_PPD_CLEAR_TRN 0x0001
#define GEN4_PPD_LINKTRN 0x0008
#define GEN4_PPD_CONN_MASK 0x0300
+#define SPR_PPD_CONN_MASK 0x0700
#define GEN4_PPD_CONN_B2B 0x0200
#define GEN4_PPD_DEV_MASK 0x1000
#define GEN4_PPD_DEV_DSD 0x1000
#define GEN4_PPD_DEV_USD 0x0000
+#define SPR_PPD_DEV_MASK 0x4000
+#define SPR_PPD_DEV_DSD 0x4000
+#define SPR_PPD_DEV_USD 0x0000
#define GEN4_LINK_CTRL_LINK_DISABLE 0x0010
#define GEN4_SLOTSTS 0xb05a
@@ -62,6 +66,10 @@
#define GEN4_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_USD)
#define GEN4_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_DSD)
+#define SPR_PPD_TOPO_MASK (SPR_PPD_CONN_MASK | SPR_PPD_DEV_MASK)
+#define SPR_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_USD)
+#define SPR_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_DSD)
+
#define GEN4_DB_COUNT 32
#define GEN4_DB_LINK 32
#define GEN4_DB_LINK_BIT BIT_ULL(GEN4_DB_LINK)
@@ -112,4 +120,12 @@ static inline int pdev_is_ICX(struct pci_dev *pdev)
return 0;
}
+static inline int pdev_is_SPR(struct pci_dev *pdev)
+{
+ if (pdev_is_gen4(pdev) &&
+ pdev->revision > PCI_DEVICE_REVISION_ICX_MAX)
+ return 1;
+ return 0;
+}
+
#endif
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
index dd683cb58d09..6295e55ef85e 100644
--- a/drivers/ntb/msi.c
+++ b/drivers/ntb/msi.c
@@ -33,7 +33,6 @@ int ntb_msi_init(struct ntb_dev *ntb,
{
phys_addr_t mw_phys_addr;
resource_size_t mw_size;
- size_t struct_size;
int peer_widx;
int peers;
int ret;
@@ -43,9 +42,8 @@ int ntb_msi_init(struct ntb_dev *ntb,
if (peers <= 0)
return -EINVAL;
- struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
-
- ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+ ntb->msi = devm_kzalloc(&ntb->dev, struct_size(ntb->msi, peer_mws, peers),
+ GFP_KERNEL);
if (!ntb->msi)
return -ENOMEM;
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 228c33b8d1d6..0a3873833594 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -6,7 +6,6 @@
#include <linux/blkdev.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/nd.h>
@@ -89,10 +88,9 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
*/
cur_len = min(len, bv.bv_len);
- iobuf = kmap_atomic(bv.bv_page);
- err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
- cur_len, rw);
- kunmap_atomic(iobuf);
+ iobuf = bvec_kmap_local(&bv);
+ err = ndbr->do_io(ndbr, dev_offset, iobuf, cur_len, rw);
+ kunmap_local(iobuf);
if (err)
return err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index da3f007a1211..9613e54c7a67 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -11,7 +11,6 @@
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/hdreg.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/ndctl.h>
#include <linux/fs.h>
@@ -1164,17 +1163,15 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
*/
cur_len = min(len, bv.bv_len);
- mem = kmap_atomic(bv.bv_page);
+ mem = bvec_kmap_local(&bv);
if (rw)
- ret = arena_write_bytes(arena, meta_nsoff,
- mem + bv.bv_offset, cur_len,
+ ret = arena_write_bytes(arena, meta_nsoff, mem, cur_len,
NVDIMM_IO_ATOMIC);
else
- ret = arena_read_bytes(arena, meta_nsoff,
- mem + bv.bv_offset, cur_len,
+ ret = arena_read_bytes(arena, meta_nsoff, mem, cur_len,
NVDIMM_IO_ATOMIC);
- kunmap_atomic(mem);
+ kunmap_local(mem);
if (ret)
return ret;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 8b52e5144f08..e5a58520d398 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -4,7 +4,6 @@
*/
#include <linux/blkdev.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/fs.h>
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9dc7f3edd42b..5bbe31b08581 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,7 +11,6 @@
#include <linux/blkdev.h>
#include <linux/fcntl.h>
#include <linux/async.h>
-#include <linux/genhd.h>
#include <linux/ndctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fa..c6a648fd8744 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -105,12 +105,12 @@ int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
* parent bio. Otherwise directly call nd_region flush.
*/
if (bio && bio->bi_iter.bi_sector != -1) {
- struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+ struct bio *child = bio_alloc(bio->bi_bdev, 0, REQ_PREFLUSH,
+ GFP_ATOMIC);
if (!child)
return -ENOMEM;
- bio_copy_dev(child, bio);
- child->bi_opf = REQ_PREFLUSH;
+ bio_clone_blkg_association(child, bio);
child->bi_iter.bi_sector = -1;
bio_chain(child, bio);
submit_bio(child);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 58eda16f5c53..c31e184bfa45 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -5,7 +5,6 @@
#include <linux/memremap.h>
#include <linux/blkdev.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/fs.h>
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 59cfe13ea8a8..1f51a2361429 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -3,6 +3,7 @@
#define __NVDIMM_PMEM_H__
#include <linux/page-flags.h>
#include <linux/badblocks.h>
+#include <linux/memremap.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/fs.h>
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index dc0450ca23a3..d6d056963c06 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -24,6 +24,14 @@ config NVME_MULTIPATH
/dev/nvmeXnY device will show up for each NVMe namespace,
even if it is accessible through multiple controllers.
+config NVME_VERBOSE_ERRORS
+ bool "NVMe verbose error reporting"
+ depends on NVME_CORE
+ help
+ This option enables verbose reporting for NVMe errors. The
+ error translation table will grow the kernel image size by
+ about 4 KB.
+
config NVME_HWMON
bool "NVMe hardware monitoring"
depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index dfaacd472e5d..476c5c988496 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
-nvme-core-y := core.o ioctl.o
+nvme-core-y := core.o ioctl.o constants.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
new file mode 100644
index 000000000000..7d49eb34b348
--- /dev/null
+++ b/drivers/nvme/host/constants.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express device driver verbose errors
+ * Copyright (c) 2022, Oracle and/or its affiliates
+ */
+
+#include <linux/blkdev.h>
+#include "nvme.h"
+
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+static const char * const nvme_ops[] = {
+ [nvme_cmd_flush] = "Flush",
+ [nvme_cmd_write] = "Write",
+ [nvme_cmd_read] = "Read",
+ [nvme_cmd_write_uncor] = "Write Uncorrectable",
+ [nvme_cmd_compare] = "Compare",
+ [nvme_cmd_write_zeroes] = "Write Zeros",
+ [nvme_cmd_dsm] = "Dataset Management",
+ [nvme_cmd_verify] = "Verify",
+ [nvme_cmd_resv_register] = "Reservation Register",
+ [nvme_cmd_resv_report] = "Reservation Report",
+ [nvme_cmd_resv_acquire] = "Reservation Acquire",
+ [nvme_cmd_resv_release] = "Reservation Release",
+ [nvme_cmd_zone_mgmt_send] = "Zone Management Send",
+ [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
+ [nvme_cmd_zone_append] = "Zone Management Append",
+};
+
+static const char * const nvme_admin_ops[] = {
+ [nvme_admin_delete_sq] = "Delete SQ",
+ [nvme_admin_create_sq] = "Create SQ",
+ [nvme_admin_get_log_page] = "Get Log Page",
+ [nvme_admin_delete_cq] = "Delete CQ",
+ [nvme_admin_create_cq] = "Create CQ",
+ [nvme_admin_identify] = "Identify",
+ [nvme_admin_abort_cmd] = "Abort Command",
+ [nvme_admin_set_features] = "Set Features",
+ [nvme_admin_get_features] = "Get Features",
+ [nvme_admin_async_event] = "Async Event",
+ [nvme_admin_ns_mgmt] = "Namespace Management",
+ [nvme_admin_activate_fw] = "Activate Firmware",
+ [nvme_admin_download_fw] = "Download Firmware",
+ [nvme_admin_dev_self_test] = "Device Self Test",
+ [nvme_admin_ns_attach] = "Namespace Attach",
+ [nvme_admin_keep_alive] = "Keep Alive",
+ [nvme_admin_directive_send] = "Directive Send",
+ [nvme_admin_directive_recv] = "Directive Receive",
+ [nvme_admin_virtual_mgmt] = "Virtual Management",
+ [nvme_admin_nvme_mi_send] = "NVMe Send MI",
+ [nvme_admin_nvme_mi_recv] = "NVMe Receive MI",
+ [nvme_admin_dbbuf] = "Doorbell Buffer Config",
+ [nvme_admin_format_nvm] = "Format NVM",
+ [nvme_admin_security_send] = "Security Send",
+ [nvme_admin_security_recv] = "Security Receive",
+ [nvme_admin_sanitize_nvm] = "Sanitize NVM",
+ [nvme_admin_get_lba_status] = "Get LBA Status",
+};
+
+static const char * const nvme_statuses[] = {
+ [NVME_SC_SUCCESS] = "Success",
+ [NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode",
+ [NVME_SC_INVALID_FIELD] = "Invalid Field in Command",
+ [NVME_SC_CMDID_CONFLICT] = "Command ID Conflict",
+ [NVME_SC_DATA_XFER_ERROR] = "Data Transfer Error",
+ [NVME_SC_POWER_LOSS] = "Commands Aborted due to Power Loss Notification",
+ [NVME_SC_INTERNAL] = "Internal Error",
+ [NVME_SC_ABORT_REQ] = "Command Abort Requested",
+ [NVME_SC_ABORT_QUEUE] = "Command Aborted due to SQ Deletion",
+ [NVME_SC_FUSED_FAIL] = "Command Aborted due to Failed Fused Command",
+ [NVME_SC_FUSED_MISSING] = "Command Aborted due to Missing Fused Command",
+ [NVME_SC_INVALID_NS] = "Invalid Namespace or Format",
+ [NVME_SC_CMD_SEQ_ERROR] = "Command Sequence Error",
+ [NVME_SC_SGL_INVALID_LAST] = "Invalid SGL Segment Descriptor",
+ [NVME_SC_SGL_INVALID_COUNT] = "Invalid Number of SGL Descriptors",
+ [NVME_SC_SGL_INVALID_DATA] = "Data SGL Length Invalid",
+ [NVME_SC_SGL_INVALID_METADATA] = "Metadata SGL Length Invalid",
+ [NVME_SC_SGL_INVALID_TYPE] = "SGL Descriptor Type Invalid",
+ [NVME_SC_CMB_INVALID_USE] = "Invalid Use of Controller Memory Buffer",
+ [NVME_SC_PRP_INVALID_OFFSET] = "PRP Offset Invalid",
+ [NVME_SC_ATOMIC_WU_EXCEEDED] = "Atomic Write Unit Exceeded",
+ [NVME_SC_OP_DENIED] = "Operation Denied",
+ [NVME_SC_SGL_INVALID_OFFSET] = "SGL Offset Invalid",
+ [NVME_SC_RESERVED] = "Reserved",
+ [NVME_SC_HOST_ID_INCONSIST] = "Host Identifier Inconsistent Format",
+ [NVME_SC_KA_TIMEOUT_EXPIRED] = "Keep Alive Timeout Expired",
+ [NVME_SC_KA_TIMEOUT_INVALID] = "Keep Alive Timeout Invalid",
+ [NVME_SC_ABORTED_PREEMPT_ABORT] = "Command Aborted due to Preempt and Abort",
+ [NVME_SC_SANITIZE_FAILED] = "Sanitize Failed",
+ [NVME_SC_SANITIZE_IN_PROGRESS] = "Sanitize In Progress",
+ [NVME_SC_SGL_INVALID_GRANULARITY] = "SGL Data Block Granularity Invalid",
+ [NVME_SC_CMD_NOT_SUP_CMB_QUEUE] = "Command Not Supported for Queue in CMB",
+ [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
+ [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
+ [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
+ [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
+ [NVME_SC_LBA_RANGE] = "LBA Out of Range",
+ [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
+ [NVME_SC_NS_NOT_READY] = "Namespace Not Ready",
+ [NVME_SC_RESERVATION_CONFLICT] = "Reservation Conflict",
+ [NVME_SC_FORMAT_IN_PROGRESS] = "Format In Progress",
+ [NVME_SC_CQ_INVALID] = "Completion Queue Invalid",
+ [NVME_SC_QID_INVALID] = "Invalid Queue Identifier",
+ [NVME_SC_QUEUE_SIZE] = "Invalid Queue Size",
+ [NVME_SC_ABORT_LIMIT] = "Abort Command Limit Exceeded",
+ [NVME_SC_ABORT_MISSING] = "Reserved", /* XXX */
+ [NVME_SC_ASYNC_LIMIT] = "Asynchronous Event Request Limit Exceeded",
+ [NVME_SC_FIRMWARE_SLOT] = "Invalid Firmware Slot",
+ [NVME_SC_FIRMWARE_IMAGE] = "Invalid Firmware Image",
+ [NVME_SC_INVALID_VECTOR] = "Invalid Interrupt Vector",
+ [NVME_SC_INVALID_LOG_PAGE] = "Invalid Log Page",
+ [NVME_SC_INVALID_FORMAT] = "Invalid Format",
+ [NVME_SC_FW_NEEDS_CONV_RESET] = "Firmware Activation Requires Conventional Reset",
+ [NVME_SC_INVALID_QUEUE] = "Invalid Queue Deletion",
+ [NVME_SC_FEATURE_NOT_SAVEABLE] = "Feature Identifier Not Saveable",
+ [NVME_SC_FEATURE_NOT_CHANGEABLE] = "Feature Not Changeable",
+ [NVME_SC_FEATURE_NOT_PER_NS] = "Feature Not Namespace Specific",
+ [NVME_SC_FW_NEEDS_SUBSYS_RESET] = "Firmware Activation Requires NVM Subsystem Reset",
+ [NVME_SC_FW_NEEDS_RESET] = "Firmware Activation Requires Reset",
+ [NVME_SC_FW_NEEDS_MAX_TIME] = "Firmware Activation Requires Maximum Time Violation",
+ [NVME_SC_FW_ACTIVATE_PROHIBITED] = "Firmware Activation Prohibited",
+ [NVME_SC_OVERLAPPING_RANGE] = "Overlapping Range",
+ [NVME_SC_NS_INSUFFICIENT_CAP] = "Namespace Insufficient Capacity",
+ [NVME_SC_NS_ID_UNAVAILABLE] = "Namespace Identifier Unavailable",
+ [NVME_SC_NS_ALREADY_ATTACHED] = "Namespace Already Attached",
+ [NVME_SC_NS_IS_PRIVATE] = "Namespace Is Private",
+ [NVME_SC_NS_NOT_ATTACHED] = "Namespace Not Attached",
+ [NVME_SC_THIN_PROV_NOT_SUPP] = "Thin Provisioning Not Supported",
+ [NVME_SC_CTRL_LIST_INVALID] = "Controller List Invalid",
+ [NVME_SC_SELT_TEST_IN_PROGRESS] = "Device Self-test In Progress",
+ [NVME_SC_BP_WRITE_PROHIBITED] = "Boot Partition Write Prohibited",
+ [NVME_SC_CTRL_ID_INVALID] = "Invalid Controller Identifier",
+ [NVME_SC_SEC_CTRL_STATE_INVALID] = "Invalid Secondary Controller State",
+ [NVME_SC_CTRL_RES_NUM_INVALID] = "Invalid Number of Controller Resources",
+ [NVME_SC_RES_ID_INVALID] = "Invalid Resource Identifier",
+ [NVME_SC_PMR_SAN_PROHIBITED] = "Sanitize Prohibited",
+ [NVME_SC_ANA_GROUP_ID_INVALID] = "ANA Group Identifier Invalid",
+ [NVME_SC_ANA_ATTACH_FAILED] = "ANA Attach Failed",
+ [NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes",
+ [NVME_SC_INVALID_PI] = "Invalid Protection Information",
+ [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range",
+ [NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported",
+ [NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error",
+ [NVME_SC_ZONE_FULL] = "Zone Is Full",
+ [NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only",
+ [NVME_SC_ZONE_OFFLINE] = "Zone Is Offline",
+ [NVME_SC_ZONE_INVALID_WRITE] = "Zone Invalid Write",
+ [NVME_SC_ZONE_TOO_MANY_ACTIVE] = "Too Many Active Zones",
+ [NVME_SC_ZONE_TOO_MANY_OPEN] = "Too Many Open Zones",
+ [NVME_SC_ZONE_INVALID_TRANSITION] = "Invalid Zone State Transition",
+ [NVME_SC_WRITE_FAULT] = "Write Fault",
+ [NVME_SC_READ_ERROR] = "Unrecovered Read Error",
+ [NVME_SC_GUARD_CHECK] = "End-to-end Guard Check Error",
+ [NVME_SC_APPTAG_CHECK] = "End-to-end Application Tag Check Error",
+ [NVME_SC_REFTAG_CHECK] = "End-to-end Reference Tag Check Error",
+ [NVME_SC_COMPARE_FAILED] = "Compare Failure",
+ [NVME_SC_ACCESS_DENIED] = "Access Denied",
+ [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
+ [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
+ [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
+ [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
+ [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
+};
+
+const unsigned char *nvme_get_error_status_str(u16 status)
+{
+ status &= 0x7ff;
+ if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
+ return nvme_statuses[status & 0x7ff];
+ return "Unknown";
+}
+
+const unsigned char *nvme_get_opcode_str(u8 opcode)
+{
+ if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode])
+ return nvme_ops[opcode];
+ return "Unknown";
+}
+
+const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+{
+ if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode])
+ return nvme_admin_ops[opcode];
+ return "Unknown";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5e0bfda04bd7..cd6eac8e3dd6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -299,6 +299,37 @@ static void nvme_retry_req(struct request *req)
blk_mq_delay_kick_requeue_list(req->q, delay);
}
+static void nvme_log_error(struct request *req)
+{
+ struct nvme_ns *ns = req->q->queuedata;
+ struct nvme_request *nr = nvme_req(req);
+
+ if (ns) {
+ pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
+ ns->disk ? ns->disk->disk_name : "?",
+ nvme_get_opcode_str(nr->cmd->common.opcode),
+ nr->cmd->common.opcode,
+ (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)),
+ (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift,
+ nvme_get_error_status_str(nr->status),
+ nr->status >> 8 & 7, /* Status Code Type */
+ nr->status & 0xff, /* Status Code */
+ nr->status & NVME_SC_MORE ? "MORE " : "",
+ nr->status & NVME_SC_DNR ? "DNR " : "");
+ return;
+ }
+
+ pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s\n",
+ dev_name(nr->ctrl->device),
+ nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+ nr->cmd->common.opcode,
+ nvme_get_error_status_str(nr->status),
+ nr->status >> 8 & 7, /* Status Code Type */
+ nr->status & 0xff, /* Status Code */
+ nr->status & NVME_SC_MORE ? "MORE " : "",
+ nr->status & NVME_SC_DNR ? "DNR " : "");
+}
+
enum nvme_disposition {
COMPLETE,
RETRY,
@@ -339,6 +370,8 @@ static inline void nvme_end_req(struct request *req)
{
blk_status_t status = nvme_error_status(nvme_req(req)->status);
+ if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS))
+ nvme_log_error(req);
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
blk_mq_end_request(req, status);
@@ -368,6 +401,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
void nvme_complete_batch_req(struct request *req)
{
+ trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req);
nvme_end_req_zoned(req);
}
@@ -561,7 +595,7 @@ static void nvme_free_ns_head(struct kref *ref)
container_of(ref, struct nvme_ns_head, ref);
nvme_mpath_remove_disk(head);
- ida_simple_remove(&head->subsys->ns_ida, head->instance);
+ ida_free(&head->subsys->ns_ida, head->instance);
cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
@@ -606,13 +640,8 @@ static inline void nvme_clear_nvme_request(struct request *req)
req->rq_flags |= RQF_DONTPREP;
}
-static inline unsigned int nvme_req_op(struct nvme_command *cmd)
-{
- return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
-}
-
-static inline void nvme_init_request(struct request *req,
- struct nvme_command *cmd)
+/* initialize a passthrough request */
+void nvme_init_request(struct request *req, struct nvme_command *cmd)
{
if (req->q->queuedata)
req->timeout = NVME_IO_TIMEOUT;
@@ -628,30 +657,7 @@ static inline void nvme_init_request(struct request *req,
nvme_clear_nvme_request(req);
memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
}
-
-struct request *nvme_alloc_request(struct request_queue *q,
- struct nvme_command *cmd, blk_mq_req_flags_t flags)
-{
- struct request *req;
-
- req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
- if (!IS_ERR(req))
- nvme_init_request(req, cmd);
- return req;
-}
-EXPORT_SYMBOL_GPL(nvme_alloc_request);
-
-static struct request *nvme_alloc_request_qid(struct request_queue *q,
- struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
-{
- struct request *req;
-
- req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
- qid ? qid - 1 : 0);
- if (!IS_ERR(req))
- nvme_init_request(req, cmd);
- return req;
-}
+EXPORT_SYMBOL_GPL(nvme_init_request);
/*
* For something we're not in a state to send to the device the default action
@@ -757,6 +763,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
static int nvme_configure_directives(struct nvme_ctrl *ctrl)
{
struct streams_directive_params s;
+ u16 nssa;
int ret;
if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
@@ -772,14 +779,16 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
if (ret)
goto out_disable_stream;
- ctrl->nssa = le16_to_cpu(s.nssa);
- if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+ nssa = le16_to_cpu(s.nssa);
+ if (nssa < BLK_MAX_WRITE_HINTS - 1) {
dev_info(ctrl->device, "too few streams (%u) available\n",
- ctrl->nssa);
+ nssa);
+ /* this condition is not an error: streams are optional */
+ ret = 0;
goto out_disable_stream;
}
- ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+ ctrl->nr_streams = min_t(u16, nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0;
@@ -1049,8 +1058,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
* >0: nvme controller's cqe status response
* <0: kernel error in lieu of controller response
*/
-static int nvme_execute_rq(struct gendisk *disk, struct request *rq,
- bool at_head)
+static int nvme_execute_rq(struct request *rq, bool at_head)
{
blk_status_t status;
@@ -1075,11 +1083,14 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int ret;
if (qid == NVME_QID_ANY)
- req = nvme_alloc_request(q, cmd, flags);
+ req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
else
- req = nvme_alloc_request_qid(q, cmd, flags, qid);
+ req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
+ qid ? qid - 1 : 0);
+
if (IS_ERR(req))
return PTR_ERR(req);
+ nvme_init_request(req, cmd);
if (timeout)
req->timeout = timeout;
@@ -1090,7 +1101,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
goto out;
}
- ret = nvme_execute_rq(NULL, req, at_head);
+ ret = nvme_execute_rq(req, at_head);
if (result && ret >= 0)
*result = nvme_req(req)->result;
out:
@@ -1206,12 +1217,11 @@ int nvme_execute_passthru_rq(struct request *rq)
struct nvme_command *cmd = nvme_req(rq)->cmd;
struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
struct nvme_ns *ns = rq->q->queuedata;
- struct gendisk *disk = ns ? ns->disk : NULL;
u32 effects;
int ret;
effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
- ret = nvme_execute_rq(disk, rq, false);
+ ret = nvme_execute_rq(rq, false);
if (effects) /* nothing to be done for zero cmd effects */
nvme_passthru_end(ctrl, effects, cmd, ret);
@@ -1270,14 +1280,15 @@ static void nvme_keep_alive_work(struct work_struct *work)
return;
}
- rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
- BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+ rq = blk_mq_alloc_request(ctrl->admin_q, nvme_req_op(&ctrl->ka_cmd),
+ BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
if (IS_ERR(rq)) {
/* allocation failure, reset the controller */
dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq));
nvme_reset_ctrl(ctrl);
return;
}
+ nvme_init_request(rq, &ctrl->ka_cmd);
rq->timeout = ctrl->kato * HZ;
rq->end_io_data = ctrl;
@@ -1682,13 +1693,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
}
-static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
-{
- return !uuid_is_null(&ids->uuid) ||
- memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) ||
- memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
-}
-
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
{
return uuid_equal(&a->uuid, &b->uuid) &&
@@ -1722,7 +1726,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return 0;
}
-static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -1738,7 +1742,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
- return 0;
+ return;
+
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
* The NVMe over Fabrics specification only supports metadata as
@@ -1746,7 +1751,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
* remap the separate metadata buffer from the block layer.
*/
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
- return -EINVAL;
+ return;
ns->features |= NVME_NS_EXT_LBAS;
@@ -1773,8 +1778,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
else
ns->features |= NVME_NS_METADATA_SUPPORTED;
}
-
- return 0;
}
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
@@ -1915,9 +1918,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
ns->lba_shift = id->lbaf[lbaf].ds;
nvme_set_queue_limits(ns->ctrl, ns->queue);
- ret = nvme_configure_metadata(ns, id);
- if (ret)
- goto out_unfreeze;
+ nvme_configure_metadata(ns, id);
nvme_set_chunk_sectors(ns, id);
nvme_update_disk_info(ns->disk, ns, id);
@@ -1933,7 +1934,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns);
if (ret && !nvme_first_scan(ns->disk))
- goto out;
+ return ret;
}
if (nvme_ns_head_multipath(ns->head)) {
@@ -1948,16 +1949,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
return 0;
out_unfreeze:
- blk_mq_unfreeze_queue(ns->disk->queue);
-out:
/*
* If probing fails due an unsupported feature, hide the block device,
* but still allow other access.
*/
if (ret == -ENODEV) {
ns->disk->flags |= GENHD_FL_HIDDEN;
+ set_bit(NVME_NS_READY, &ns->flags);
ret = 0;
}
+ blk_mq_unfreeze_queue(ns->disk->queue);
return ret;
}
@@ -1979,7 +1980,7 @@ static char nvme_pr_type(enum pr_type type)
default:
return 0;
}
-};
+}
static int nvme_send_ns_head_pr_command(struct block_device *bdev,
struct nvme_command *c, u8 data[16])
@@ -2567,7 +2568,7 @@ static void nvme_release_subsystem(struct device *dev)
container_of(dev, struct nvme_subsystem, dev);
if (subsys->instance >= 0)
- ida_simple_remove(&nvme_instance_ida, subsys->instance);
+ ida_free(&nvme_instance_ida, subsys->instance);
kfree(subsys);
}
@@ -2992,6 +2993,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->max_namespaces = le32_to_cpu(id->mnan);
ctrl->ctratt = le32_to_cpu(id->ctratt);
+ ctrl->cntrltype = id->cntrltype;
+ ctrl->dctype = id->dctype;
+
if (id->rtd3e) {
/* us -> s */
u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC;
@@ -3525,6 +3529,40 @@ static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev,
static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store);
+static ssize_t cntrltype_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ static const char * const type[] = {
+ [NVME_CTRL_IO] = "io\n",
+ [NVME_CTRL_DISC] = "discovery\n",
+ [NVME_CTRL_ADMIN] = "admin\n",
+ };
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype])
+ return sysfs_emit(buf, "reserved\n");
+
+ return sysfs_emit(buf, type[ctrl->cntrltype]);
+}
+static DEVICE_ATTR_RO(cntrltype);
+
+static ssize_t dctype_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ static const char * const type[] = {
+ [NVME_DCTYPE_NOT_REPORTED] = "none\n",
+ [NVME_DCTYPE_DDC] = "ddc\n",
+ [NVME_DCTYPE_CDC] = "cdc\n",
+ };
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype])
+ return sysfs_emit(buf, "reserved\n");
+
+ return sysfs_emit(buf, type[ctrl->dctype]);
+}
+static DEVICE_ATTR_RO(dctype);
+
static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr,
&dev_attr_rescan_controller.attr,
@@ -3546,6 +3584,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reconnect_delay.attr,
&dev_attr_fast_io_fail_tmo.attr,
&dev_attr_kato.attr,
+ &dev_attr_cntrltype.attr,
+ &dev_attr_dctype.attr,
NULL
};
@@ -3600,16 +3640,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
return NULL;
}
-static int __nvme_check_ids(struct nvme_subsystem *subsys,
- struct nvme_ns_head *new)
+static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys,
+ struct nvme_ns_ids *ids)
{
+ bool has_uuid = !uuid_is_null(&ids->uuid);
+ bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid));
+ bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
struct nvme_ns_head *h;
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry) {
- if (nvme_ns_ids_valid(&new->ids) &&
- nvme_ns_ids_equal(&new->ids, &h->ids))
+ if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid))
+ return -EINVAL;
+ if (has_nguid &&
+ memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0)
+ return -EINVAL;
+ if (has_eui64 &&
+ memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0)
return -EINVAL;
}
@@ -3618,7 +3666,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
static void nvme_cdev_rel(struct device *dev)
{
- ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
+ ida_free(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
}
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
@@ -3632,7 +3680,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
{
int minor, ret;
- minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL);
+ minor = ida_alloc(&nvme_ns_chr_minor_ida, GFP_KERNEL);
if (minor < 0)
return minor;
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
@@ -3695,7 +3743,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head = kzalloc(size, GFP_KERNEL);
if (!head)
goto out;
- ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
+ ret = ida_alloc_min(&ctrl->subsys->ns_ida, 1, GFP_KERNEL);
if (ret < 0)
goto out_free_head;
head->instance = ret;
@@ -3708,13 +3756,6 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ids = *ids;
kref_init(&head->ref);
- ret = __nvme_check_ids(ctrl->subsys, head);
- if (ret) {
- dev_err(ctrl->device,
- "duplicate IDs for nsid %d\n", nsid);
- goto out_cleanup_srcu;
- }
-
if (head->ids.csi) {
ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects);
if (ret)
@@ -3734,7 +3775,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
out_cleanup_srcu:
cleanup_srcu_struct(&head->srcu);
out_ida_remove:
- ida_simple_remove(&ctrl->subsys->ns_ida, head->instance);
+ ida_free(&ctrl->subsys->ns_ida, head->instance);
out_free_head:
kfree(head);
out:
@@ -3743,16 +3784,56 @@ out:
return ERR_PTR(ret);
}
+static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
+ struct nvme_ns_ids *ids)
+{
+ struct nvme_subsystem *s;
+ int ret = 0;
+
+ /*
+ * Note that this check is racy as we try to avoid holding the global
+ * lock over the whole ns_head creation. But it is only intended as
+ * a sanity check anyway.
+ */
+ mutex_lock(&nvme_subsystems_lock);
+ list_for_each_entry(s, &nvme_subsystems, entry) {
+ if (s == this)
+ continue;
+ mutex_lock(&s->lock);
+ ret = nvme_subsys_check_duplicate_ids(s, ids);
+ mutex_unlock(&s->lock);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&nvme_subsystems_lock);
+
+ return ret;
+}
+
static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
struct nvme_ns_ids *ids, bool is_shared)
{
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_ns_head *head = NULL;
- int ret = 0;
+ int ret;
+
+ ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids);
+ if (ret) {
+ dev_err(ctrl->device,
+ "globally duplicate IDs for nsid %d\n", nsid);
+ return ret;
+ }
mutex_lock(&ctrl->subsys->lock);
head = nvme_find_ns_head(ctrl->subsys, nsid);
if (!head) {
+ ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
+ if (ret) {
+ dev_err(ctrl->device,
+ "duplicate IDs in subsystem for nsid %d\n",
+ nsid);
+ goto out_unlock;
+ }
head = nvme_alloc_ns_head(ctrl, nsid, ids);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
@@ -3772,6 +3853,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
nsid);
goto out_put_ns_head;
}
+
+ if (!multipath && !list_empty(&head->list)) {
+ dev_warn(ctrl->device,
+ "Found shared namespace %d, but multipathing not supported.\n",
+ nsid);
+ dev_warn_once(ctrl->device,
+ "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
+ }
}
list_add_tail_rcu(&ns->siblings, &head->list);
@@ -3860,13 +3949,27 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_cleanup_disk;
/*
- * Without the multipath code enabled, multiple controller per
- * subsystems are visible as devices and thus we cannot use the
- * subsystem instance.
+ * If multipathing is enabled, the device name for all disks and not
+ * just those that represent shared namespaces needs to be based on the
+ * subsystem instance. Using the controller instance for private
+ * namespaces could lead to naming collisions between shared and private
+ * namespaces if they don't use a common numbering scheme.
+ *
+ * If multipathing is not enabled, disk names must use the controller
+ * instance as shared namespaces will show up as multiple block
+ * devices.
*/
- if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags))
+ if (ns->head->disk) {
+ sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+ ctrl->instance, ns->head->instance);
+ disk->flags |= GENHD_FL_HIDDEN;
+ } else if (multipath) {
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance,
+ ns->head->instance);
+ } else {
sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
ns->head->instance);
+ }
if (nvme_update_ns_info(ns, id))
goto out_unlink_ns;
@@ -4231,6 +4334,13 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
return ret;
}
+static void nvme_change_uevent(struct nvme_ctrl *ctrl, char *envdata)
+{
+ char *envp[2] = { envdata, NULL };
+
+ kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
+}
+
static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
{
char *envp[2] = { NULL, NULL };
@@ -4253,7 +4363,14 @@ static void nvme_async_event_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, async_event_work);
nvme_aen_uevent(ctrl);
- ctrl->ops->submit_async_event(ctrl);
+
+ /*
+ * The transport drivers must guarantee AER submission here is safe by
+ * flushing ctrl async_event_work after changing the controller state
+ * from LIVE and before freeing the admin queue.
+ */
+ if (ctrl->state == NVME_CTRL_LIVE)
+ ctrl->ops->submit_async_event(ctrl);
}
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
@@ -4398,6 +4515,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
nvme_queue_scan(ctrl);
nvme_start_queues(ctrl);
}
+
+ nvme_change_uevent(ctrl, "NVME_EVENT=connected");
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
@@ -4431,7 +4550,7 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_subsystem *subsys = ctrl->subsys;
if (!subsys || ctrl->instance != subsys->instance)
- ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+ ida_free(&nvme_instance_ida, ctrl->instance);
nvme_free_cels(ctrl);
nvme_mpath_uninit(ctrl);
@@ -4490,7 +4609,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
goto out;
}
- ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
+ ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
if (ret < 0)
goto out;
ctrl->instance = ret;
@@ -4531,7 +4650,7 @@ out_free_name:
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
out_release_instance:
- ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+ ida_free(&nvme_instance_ida, ctrl->instance);
out:
if (ctrl->discard_page)
__free_page(ctrl->discard_page);
@@ -4566,7 +4685,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- blk_set_queue_dying(ns->queue);
+ blk_mark_disk_dead(ns->disk);
nvme_start_ns_queue(ns);
set_capacity_and_notify(ns->disk, 0);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 7ae041e2b3fb..ee79a6d639b4 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -144,11 +144,10 @@ EXPORT_SYMBOL_GPL(nvmf_get_address);
*/
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
- struct nvme_command cmd;
+ struct nvme_command cmd = { };
union nvme_result res;
int ret;
- memset(&cmd, 0, sizeof(cmd));
cmd.prop_get.opcode = nvme_fabrics_command;
cmd.prop_get.fctype = nvme_fabrics_type_property_get;
cmd.prop_get.offset = cpu_to_le32(off);
@@ -272,7 +271,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
int err_sctype = errval & ~NVME_SC_DNR;
switch (err_sctype) {
- case (NVME_SC_CONNECT_INVALID_PARAM):
+ case NVME_SC_CONNECT_INVALID_PARAM:
if (offset >> 16) {
char *inv_data = "Connect Invalid Data Parameter";
@@ -873,7 +872,7 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
unsigned int required_opts)
{
if ((opts->mask & required_opts) != required_opts) {
- int i;
+ unsigned int i;
for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
if ((opt_tokens[i].token & required_opts) &&
@@ -923,7 +922,7 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
unsigned int allowed_opts)
{
if (opts->mask & ~allowed_opts) {
- int i;
+ unsigned int i;
for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
if ((opt_tokens[i].token & opts->mask) &&
@@ -1092,7 +1091,6 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
static int nvmf_dev_show(struct seq_file *seq_file, void *private)
{
struct nvme_ctrl *ctrl;
- int ret = 0;
mutex_lock(&nvmf_dev_mutex);
ctrl = seq_file->private;
@@ -1106,7 +1104,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private)
out_unlock:
mutex_unlock(&nvmf_dev_mutex);
- return ret;
+ return 0;
}
static int nvmf_dev_open(struct inode *inode, struct file *file)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index c3203ff1c654..1e3a09cad961 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -170,6 +170,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
if (ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DELETING_NOIO ||
ctrl->state == NVME_CTRL_DEAD ||
strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 71b3108c22f0..080f85f4105f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -259,7 +259,7 @@ nvme_fc_free_lport(struct kref *ref)
complete(&nvme_fc_unload_proceed);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
- ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
+ ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num);
ida_destroy(&lport->endp_cnt);
put_device(lport->dev);
@@ -399,7 +399,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
goto out_reghost_failed;
}
- idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL);
+ idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
goto out_fail_kfree;
@@ -439,7 +439,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
return 0;
out_ida_put:
- ida_simple_remove(&nvme_fc_local_port_cnt, idx);
+ ida_free(&nvme_fc_local_port_cnt, idx);
out_fail_kfree:
kfree(newrec);
out_reghost_failed:
@@ -535,7 +535,7 @@ nvme_fc_free_rport(struct kref *ref)
spin_unlock_irqrestore(&nvme_fc_lock, flags);
WARN_ON(!list_empty(&rport->disc_list));
- ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
+ ida_free(&lport->endp_cnt, rport->remoteport.port_num);
kfree(rport);
@@ -713,7 +713,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
goto out_lport_put;
}
- idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
+ idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
goto out_kfree_rport;
@@ -2393,7 +2393,7 @@ nvme_fc_ctrl_free(struct kref *ref)
put_device(ctrl->dev);
nvme_fc_rport_put(ctrl->rport);
- ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+ ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
if (ctrl->ctrl.opts)
nvmf_free_options(ctrl->ctrl.opts);
kfree(ctrl);
@@ -2916,11 +2916,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
ctrl->ctrl.tagset = &ctrl->tag_set;
- ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
- if (IS_ERR(ctrl->ctrl.connect_q)) {
- ret = PTR_ERR(ctrl->ctrl.connect_q);
+ ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+ if (ret)
goto out_free_tag_set;
- }
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
@@ -3472,7 +3470,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_fail;
}
- idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
+ idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
goto out_free_ctrl;
@@ -3635,7 +3633,7 @@ out_free_queues:
kfree(ctrl->queues);
out_free_ida:
put_device(ctrl->dev);
- ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+ ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
out_free_ctrl:
kfree(ctrl);
out_fail:
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 22314962842d..554566371ffa 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -56,7 +56,7 @@ out:
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u64 *result, unsigned timeout)
+ u32 meta_seed, u64 *result, unsigned timeout, bool vec)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -66,17 +66,32 @@ static int nvme_submit_user_cmd(struct request_queue *q,
void *meta = NULL;
int ret;
- req = nvme_alloc_request(q, cmd, 0);
+ req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
if (IS_ERR(req))
return PTR_ERR(req);
+ nvme_init_request(req, cmd);
if (timeout)
req->timeout = timeout;
nvme_req(req)->flags |= NVME_REQ_USERCMD;
if (ubuffer && bufflen) {
- ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+ if (!vec)
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
GFP_KERNEL);
+ else {
+ struct iovec fast_iov[UIO_FASTIOV];
+ struct iovec *iov = fast_iov;
+ struct iov_iter iter;
+
+ ret = import_iovec(rq_data_dir(req), ubuffer, bufflen,
+ UIO_FASTIOV, &iov, &iter);
+ if (ret < 0)
+ goto out;
+ ret = blk_rq_map_user_iov(q, req, NULL, &iter,
+ GFP_KERNEL);
+ kfree(iov);
+ }
if (ret)
goto out;
bio = req->bio;
@@ -170,7 +185,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return nvme_submit_user_cmd(ns->queue, &c,
nvme_to_user_ptr(io.addr), length,
- metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
+ metadata, meta_len, lower_32_bits(io.slba), NULL, 0,
+ false);
}
static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
@@ -224,7 +240,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
nvme_to_user_ptr(cmd.addr), cmd.data_len,
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &result, timeout);
+ 0, &result, timeout, false);
if (status >= 0) {
if (put_user(result, &ucmd->result))
@@ -235,7 +251,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd)
+ struct nvme_passthru_cmd64 __user *ucmd, bool vec)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
@@ -270,7 +286,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
nvme_to_user_ptr(cmd.addr), cmd.data_len,
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &cmd.result, timeout);
+ 0, &cmd.result, timeout, vec);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
@@ -296,7 +312,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp);
+ return nvme_user_cmd64(ctrl, NULL, argp, false);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -340,7 +356,9 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp);
case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, false);
+ case NVME_IOCTL_IO64_CMD_VEC:
+ return nvme_user_cmd64(ns->ctrl, ns, argp, true);
default:
return -ENOTTY;
}
@@ -480,7 +498,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp);
+ return nvme_user_cmd64(ctrl, NULL, argp, false);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f8bf6606eb2f..1b31f19e1053 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -5,10 +5,11 @@
#include <linux/backing-dev.h>
#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
#include <trace/events/block.h>
#include "nvme.h"
-static bool multipath = true;
+bool multipath = true;
module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
@@ -79,28 +80,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
blk_freeze_queue_start(h->disk->queue);
}
-/*
- * If multipathing is enabled we need to always use the subsystem instance
- * number for numbering our devices to avoid conflicts between subsystems that
- * have multiple controllers and thus use the multipath-aware subsystem node
- * and those that have a single controller and use the controller node
- * directly.
- */
-bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags)
-{
- if (!multipath)
- return false;
- if (!ns->head->disk) {
- sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance,
- ns->head->instance);
- return true;
- }
- sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance,
- ns->ctrl->instance, ns->head->instance);
- *flags = GENHD_FL_HIDDEN;
- return true;
-}
-
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
@@ -386,8 +365,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
} else {
dev_warn_ratelimited(dev, "no available path - failing I/O\n");
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_io_error(bio);
}
srcu_read_unlock(&head->srcu, srcu_idx);
@@ -848,7 +826,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- blk_set_queue_dying(head->disk->queue);
+ blk_mark_disk_dead(head->disk);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
@@ -898,7 +876,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
if (ana_log_size > ctrl->ana_log_size) {
nvme_mpath_stop(ctrl);
nvme_mpath_uninit(ctrl);
- ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
+ ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
return -ENOMEM;
}
@@ -915,7 +893,7 @@ out_uninit:
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
- kfree(ctrl->ana_log_buf);
+ kvfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = NULL;
ctrl->ana_log_size = 0;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a162f6c6da6e..1ea908d43e17 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,7 +280,6 @@ struct nvme_ctrl {
u16 crdt[3];
u16 oncs;
u16 oacs;
- u16 nssa;
u16 nr_streams;
u16 sqsize;
u32 max_namespaces;
@@ -349,6 +348,9 @@ struct nvme_ctrl {
unsigned long discard_page_busy;
struct nvme_fault_inject fault_inject;
+
+ enum nvme_ctrl_type cntrltype;
+ enum nvme_dctype dctype;
};
enum nvme_iopolicy {
@@ -696,9 +698,13 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl);
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);
+static inline unsigned int nvme_req_op(struct nvme_command *cmd)
+{
+ return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
+}
+
#define NVME_QID_ANY -1
-struct request *nvme_alloc_request(struct request_queue *q,
- struct nvme_command *cmd, blk_mq_req_flags_t flags);
+void nvme_init_request(struct request *req, struct nvme_command *cmd);
void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
@@ -768,7 +774,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
-bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags);
void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
@@ -791,20 +796,17 @@ static inline void nvme_trace_bio_complete(struct request *req)
trace_block_bio_complete(ns->head->disk->queue, req->bio);
}
+extern bool multipath;
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
#else
+#define multipath false
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
-static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name,
- int *flags)
-{
- return false;
-}
static inline void nvme_failover_req(struct request *req)
{
}
@@ -894,6 +896,14 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
}
#endif
+static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl)
+{
+ ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ctrl->connect_q))
+ return PTR_ERR(ctrl->connect_q);
+ return 0;
+}
+
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
return dev_to_disk(dev)->private_data;
@@ -930,4 +940,23 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
}
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+const unsigned char *nvme_get_error_status_str(u16 status);
+const unsigned char *nvme_get_opcode_str(u8 opcode);
+const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
+#else /* CONFIG_NVME_VERBOSE_ERRORS */
+static inline const unsigned char *nvme_get_error_status_str(u16 status)
+{
+ return "I/O Error";
+}
+static inline const unsigned char *nvme_get_opcode_str(u8 opcode)
+{
+ return "I/O Cmd";
+}
+static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+{
+ return "Admin Cmd";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
+
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d8585df2c2fd..2e98ac3f3ad6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -424,8 +425,9 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
return 0;
}
-static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
+static int nvme_pci_init_request(struct blk_mq_tag_set *set,
+ struct request *req, unsigned int hctx_idx,
+ unsigned int numa_node)
{
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1428,12 +1430,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, aborting\n",
req->tag, nvmeq->qid);
- abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
- BLK_MQ_REQ_NOWAIT);
+ abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
+ BLK_MQ_REQ_NOWAIT);
if (IS_ERR(abort_req)) {
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
+ nvme_init_request(abort_req, &cmd);
abort_req->end_io_data = NULL;
blk_execute_rq_nowait(abort_req, false, abort_endio);
@@ -1722,7 +1725,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
- .init_request = nvme_init_request,
+ .init_request = nvme_pci_init_request,
.timeout = nvme_timeout,
};
@@ -1732,7 +1735,7 @@ static const struct blk_mq_ops nvme_mq_ops = {
.complete = nvme_pci_complete_rq,
.commit_rqs = nvme_commit_rqs,
.init_hctx = nvme_init_hctx,
- .init_request = nvme_init_request,
+ .init_request = nvme_pci_init_request,
.map_queues = nvme_pci_map_queues,
.timeout = nvme_timeout,
.poll = nvme_poll,
@@ -2475,9 +2478,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
cmd.delete_queue.opcode = opcode;
cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
- req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+ req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT);
if (IS_ERR(req))
return PTR_ERR(req);
+ nvme_init_request(req, &cmd);
req->end_io_data = nvmeq;
@@ -3391,7 +3395,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE |
- NVME_QUIRK_DEALLOCATE_ZEROES, },
+ NVME_QUIRK_DEALLOCATE_ZEROES |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 850f84d204d0..d9f19d901313 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -978,11 +978,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_free_io_queues;
}
- ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
- if (IS_ERR(ctrl->ctrl.connect_q)) {
- ret = PTR_ERR(ctrl->ctrl.connect_q);
+ ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+ if (ret)
goto out_free_tag_set;
- }
}
ret = nvme_rdma_start_io_queues(ctrl);
@@ -1200,6 +1198,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl, err_work);
nvme_stop_keep_alive(&ctrl->ctrl);
+ flush_work(&ctrl->ctrl.async_event_work);
nvme_rdma_teardown_io_queues(ctrl, false);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
@@ -1282,6 +1281,22 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
return ib_post_send(queue->qp, &wr, NULL);
}
+static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq)
+{
+ struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+
+ if (blk_integrity_rq(rq)) {
+ ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents, rq_dma_dir(rq));
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
+ }
+
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+}
+
static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct request *rq)
{
@@ -1293,13 +1308,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
if (!blk_rq_nr_phys_segments(rq))
return;
- if (blk_integrity_rq(rq)) {
- ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
- req->metadata_sgl->nents, rq_dma_dir(rq));
- sg_free_table_chained(&req->metadata_sgl->sg_table,
- NVME_INLINE_METADATA_SG_CNT);
- }
-
if (req->use_sig_mr)
pool = &queue->qp->sig_mrs;
@@ -1308,9 +1316,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
req->mr = NULL;
}
- ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
- rq_dma_dir(rq));
- sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+ nvme_rdma_dma_unmap_req(ibdev, rq);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1521,22 +1527,11 @@ mr_put:
return -EINVAL;
}
-static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
- struct request *rq, struct nvme_command *c)
+static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
+ int *count, int *pi_count)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
- struct nvme_rdma_device *dev = queue->device;
- struct ib_device *ibdev = dev->dev;
- int pi_count = 0;
- int count, ret;
-
- req->num_sge = 1;
- refcount_set(&req->ref, 2); /* send and recv completions */
-
- c->common.flags |= NVME_CMD_SGL_METABUF;
-
- if (!blk_rq_nr_phys_segments(rq))
- return nvme_rdma_set_sg_null(c);
+ int ret;
req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
@@ -1548,9 +1543,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
req->data_sgl.sg_table.sgl);
- count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
- req->data_sgl.nents, rq_dma_dir(rq));
- if (unlikely(count <= 0)) {
+ *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
+ req->data_sgl.nents, rq_dma_dir(rq));
+ if (unlikely(*count <= 0)) {
ret = -EIO;
goto out_free_table;
}
@@ -1569,16 +1564,50 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
rq->bio, req->metadata_sgl->sg_table.sgl);
- pi_count = ib_dma_map_sg(ibdev,
- req->metadata_sgl->sg_table.sgl,
- req->metadata_sgl->nents,
- rq_dma_dir(rq));
- if (unlikely(pi_count <= 0)) {
+ *pi_count = ib_dma_map_sg(ibdev,
+ req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents,
+ rq_dma_dir(rq));
+ if (unlikely(*pi_count <= 0)) {
ret = -EIO;
goto out_free_pi_table;
}
}
+ return 0;
+
+out_free_pi_table:
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
+out_unmap_sg:
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
+out_free_table:
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+ return ret;
+}
+
+static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
+ struct request *rq, struct nvme_command *c)
+{
+ struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_rdma_device *dev = queue->device;
+ struct ib_device *ibdev = dev->dev;
+ int pi_count = 0;
+ int count, ret;
+
+ req->num_sge = 1;
+ refcount_set(&req->ref, 2); /* send and recv completions */
+
+ c->common.flags |= NVME_CMD_SGL_METABUF;
+
+ if (!blk_rq_nr_phys_segments(rq))
+ return nvme_rdma_set_sg_null(c);
+
+ ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count);
+ if (unlikely(ret))
+ return ret;
+
if (req->use_sig_mr) {
ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
goto out;
@@ -1602,23 +1631,12 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
- goto out_unmap_pi_sg;
+ goto out_dma_unmap_req;
return 0;
-out_unmap_pi_sg:
- if (blk_integrity_rq(rq))
- ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
- req->metadata_sgl->nents, rq_dma_dir(rq));
-out_free_pi_table:
- if (blk_integrity_rq(rq))
- sg_free_table_chained(&req->metadata_sgl->sg_table,
- NVME_INLINE_METADATA_SG_CNT);
-out_unmap_sg:
- ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
- rq_dma_dir(rq));
-out_free_table:
- sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+out_dma_unmap_req:
+ nvme_rdma_dma_unmap_req(ibdev, rq);
return ret;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4ceb28675fdf..ad3a2bf2f1e9 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -30,6 +30,44 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/* lockdep can detect a circular dependency of the form
+ * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
+ * because dependencies are tracked for both nvme-tcp and user contexts. Using
+ * a separate class prevents lockdep from conflating nvme-tcp socket use with
+ * user-space socket API use.
+ */
+static struct lock_class_key nvme_tcp_sk_key[2];
+static struct lock_class_key nvme_tcp_slock_key[2];
+
+static void nvme_tcp_reclassify_socket(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
+ return;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
+ &nvme_tcp_slock_key[0],
+ "sk_lock-AF_INET-NVME",
+ &nvme_tcp_sk_key[0]);
+ break;
+ case AF_INET6:
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
+ &nvme_tcp_slock_key[1],
+ "sk_lock-AF_INET6-NVME",
+ &nvme_tcp_sk_key[1]);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+#else
+static void nvme_tcp_reclassify_socket(struct socket *sock) { }
+#endif
+
enum nvme_tcp_send_state {
NVME_TCP_SEND_CMD_PDU = 0,
NVME_TCP_SEND_H2C_PDU,
@@ -44,6 +82,8 @@ struct nvme_tcp_request {
u32 data_len;
u32 pdu_len;
u32 pdu_sent;
+ u32 h2cdata_left;
+ u32 h2cdata_offset;
u16 ttag;
__le16 status;
struct list_head entry;
@@ -95,6 +135,7 @@ struct nvme_tcp_queue {
struct nvme_tcp_request *request;
int queue_size;
+ u32 maxh2cdata;
size_t cmnd_capsule_len;
struct nvme_tcp_ctrl *ctrl;
unsigned long flags;
@@ -572,23 +613,26 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
return ret;
}
-static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
- struct nvme_tcp_r2t_pdu *pdu)
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_data_pdu *data = req->pdu;
struct nvme_tcp_queue *queue = req->queue;
struct request *rq = blk_mq_rq_from_pdu(req);
+ u32 h2cdata_sent = req->pdu_len;
u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue);
req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0;
- req->pdu_len = le32_to_cpu(pdu->r2t_length);
+ req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
req->pdu_sent = 0;
+ req->h2cdata_left -= req->pdu_len;
+ req->h2cdata_offset += h2cdata_sent;
memset(data, 0, sizeof(*data));
data->hdr.type = nvme_tcp_h2c_data;
- data->hdr.flags = NVME_TCP_F_DATA_LAST;
+ if (!req->h2cdata_left)
+ data->hdr.flags = NVME_TCP_F_DATA_LAST;
if (queue->hdr_digest)
data->hdr.flags |= NVME_TCP_F_HDGST;
if (queue->data_digest)
@@ -597,9 +641,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->hdr.pdo = data->hdr.hlen + hdgst;
data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
- data->ttag = pdu->ttag;
+ data->ttag = req->ttag;
data->command_id = nvme_cid(rq);
- data->data_offset = pdu->r2t_offset;
+ data->data_offset = cpu_to_le32(req->h2cdata_offset);
data->data_length = cpu_to_le32(req->pdu_len);
}
@@ -609,6 +653,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct nvme_tcp_request *req;
struct request *rq;
u32 r2t_length = le32_to_cpu(pdu->r2t_length);
+ u32 r2t_offset = le32_to_cpu(pdu->r2t_offset);
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
@@ -633,14 +678,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return -EPROTO;
}
- if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+ if (unlikely(r2t_offset < req->data_sent)) {
dev_err(queue->ctrl->ctrl.device,
"req %d unexpected r2t offset %u (expected %zu)\n",
- rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+ rq->tag, r2t_offset, req->data_sent);
return -EPROTO;
}
- nvme_tcp_setup_h2c_data_pdu(req, pdu);
+ req->pdu_len = 0;
+ req->h2cdata_left = r2t_length;
+ req->h2cdata_offset = r2t_offset;
+ req->ttag = pdu->ttag;
+
+ nvme_tcp_setup_h2c_data_pdu(req);
nvme_tcp_queue_request(req, false, true);
return 0;
@@ -913,13 +963,22 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
{
- nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
+ if (nvme_tcp_async_req(req)) {
+ union nvme_result res = {};
+
+ nvme_complete_async_event(&req->queue->ctrl->ctrl,
+ cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res);
+ } else {
+ nvme_tcp_end_request(blk_mq_rq_from_pdu(req),
+ NVME_SC_HOST_PATH_ERROR);
+ }
}
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len;
+ u32 h2cdata_left = req->h2cdata_left;
while (true) {
struct page *page = nvme_tcp_req_cur_page(req);
@@ -964,7 +1023,10 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
req->state = NVME_TCP_SEND_DDGST;
req->offset = 0;
} else {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left)
+ nvme_tcp_setup_h2c_data_pdu(req);
+ else
+ nvme_tcp_done_send_req(queue);
}
return 1;
}
@@ -1022,9 +1084,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ if (!req->h2cdata_left)
+ ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len,
+ MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ else
+ ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len,
+ MSG_DONTWAIT | MSG_MORE);
if (unlikely(ret <= 0))
return ret;
@@ -1044,6 +1111,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset;
+ u32 h2cdata_left = req->h2cdata_left;
int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
@@ -1061,7 +1129,10 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
return ret;
if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left)
+ nvme_tcp_setup_h2c_data_pdu(req);
+ else
+ nvme_tcp_done_send_req(queue);
return 1;
}
@@ -1253,6 +1324,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
struct msghdr msg = {};
struct kvec iov;
bool ctrl_hdgst, ctrl_ddgst;
+ u32 maxh2cdata;
int ret;
icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
@@ -1336,6 +1408,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
goto free_icresp;
}
+ maxh2cdata = le32_to_cpu(icresp->maxdata);
+ if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
+ pr_err("queue %d: invalid maxh2cdata returned %u\n",
+ nvme_tcp_queue_id(queue), maxh2cdata);
+ goto free_icresp;
+ }
+ queue->maxh2cdata = maxh2cdata;
+
ret = 0;
free_icresp:
kfree(icresp);
@@ -1427,6 +1507,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_destroy_mutex;
}
+ nvme_tcp_reclassify_socket(queue->sock);
+
/* Single syn retry */
tcp_sock_set_syncnt(queue->sock->sk, 1);
@@ -1674,7 +1756,7 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
{
- int i, ret = 0;
+ int i, ret;
for (i = 1; i < ctrl->queue_count; i++) {
ret = nvme_tcp_start_queue(ctrl, i);
@@ -1714,8 +1796,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
int i, ret;
for (i = 1; i < ctrl->queue_count; i++) {
- ret = nvme_tcp_alloc_queue(ctrl, i,
- ctrl->sqsize + 1);
+ ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1);
if (ret)
goto out_free_queues;
}
@@ -1825,11 +1906,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_free_io_queues;
}
- ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ctrl->connect_q)) {
- ret = PTR_ERR(ctrl->connect_q);
+ ret = nvme_ctrl_init_connect_q(ctrl);
+ if (ret)
goto out_free_tag_set;
- }
}
ret = nvme_tcp_start_io_queues(ctrl);
@@ -2096,6 +2175,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_stop_keep_alive(ctrl);
+ flush_work(&ctrl->async_event_work);
nvme_tcp_teardown_io_queues(ctrl, false);
/* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl);
@@ -2320,6 +2400,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
req->data_sent = 0;
req->pdu_len = 0;
req->pdu_sent = 0;
+ req->h2cdata_left = 0;
req->data_len = blk_rq_nr_phys_segments(rq) ?
blk_rq_payload_bytes(rq) : 0;
req->curr_bio = rq->bio;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 6fb24746de06..46d0dab686dd 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -511,7 +511,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
goto done;
}
- nvmet_ns_revalidate(req->ns);
+ if (nvmet_ns_revalidate(req->ns)) {
+ mutex_lock(&req->ns->subsys->lock);
+ nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
+ mutex_unlock(&req->ns->subsys->lock);
+ }
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 091a0ca16361..8fedd1e052fe 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -60,10 +60,11 @@ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
if (nvmet_addr_family[i].type == adrfam)
- return sprintf(page, "%s\n", nvmet_addr_family[i].name);
+ return snprintf(page, PAGE_SIZE, "%s\n",
+ nvmet_addr_family[i].name);
}
- return sprintf(page, "\n");
+ return snprintf(page, PAGE_SIZE, "\n");
}
static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
@@ -93,10 +94,9 @@ CONFIGFS_ATTR(nvmet_, addr_adrfam);
static ssize_t nvmet_addr_portid_show(struct config_item *item,
char *page)
{
- struct nvmet_port *port = to_nvmet_port(item);
+ __le16 portid = to_nvmet_port(item)->disc_addr.portid;
- return snprintf(page, PAGE_SIZE, "%d\n",
- le16_to_cpu(port->disc_addr.portid));
+ return snprintf(page, PAGE_SIZE, "%d\n", le16_to_cpu(portid));
}
static ssize_t nvmet_addr_portid_store(struct config_item *item,
@@ -124,8 +124,7 @@ static ssize_t nvmet_addr_traddr_show(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
- return snprintf(page, PAGE_SIZE, "%s\n",
- port->disc_addr.traddr);
+ return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.traddr);
}
static ssize_t nvmet_addr_traddr_store(struct config_item *item,
@@ -162,10 +161,11 @@ static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
if (treq == nvmet_addr_treq[i].type)
- return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
+ return snprintf(page, PAGE_SIZE, "%s\n",
+ nvmet_addr_treq[i].name);
}
- return sprintf(page, "\n");
+ return snprintf(page, PAGE_SIZE, "\n");
}
static ssize_t nvmet_addr_treq_store(struct config_item *item,
@@ -199,8 +199,7 @@ static ssize_t nvmet_addr_trsvcid_show(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
- return snprintf(page, PAGE_SIZE, "%s\n",
- port->disc_addr.trsvcid);
+ return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.trsvcid);
}
static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
@@ -284,7 +283,8 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item,
for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
if (port->disc_addr.trtype == nvmet_transport[i].type)
- return sprintf(page, "%s\n", nvmet_transport[i].name);
+ return snprintf(page, PAGE_SIZE,
+ "%s\n", nvmet_transport[i].name);
}
return sprintf(page, "\n");
@@ -586,7 +586,8 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
- nvmet_ns_revalidate(ns);
+ if (nvmet_ns_revalidate(ns))
+ nvmet_ns_changed(ns->subsys, ns->nsid);
mutex_unlock(&ns->subsys->lock);
return count;
}
@@ -1233,44 +1234,6 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
-static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item,
- char *page)
-{
- return snprintf(page, PAGE_SIZE, "%s\n",
- nvmet_disc_subsys->subsysnqn);
-}
-
-static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item,
- const char *page, size_t count)
-{
- struct nvmet_subsys *subsys = to_subsys(item);
- char *subsysnqn;
- int len;
-
- len = strcspn(page, "\n");
- if (!len)
- return -EINVAL;
-
- subsysnqn = kmemdup_nul(page, len, GFP_KERNEL);
- if (!subsysnqn)
- return -ENOMEM;
-
- /*
- * The discovery NQN must be different from subsystem NQN.
- */
- if (!strcmp(subsysnqn, subsys->subsysnqn)) {
- kfree(subsysnqn);
- return -EBUSY;
- }
- down_write(&nvmet_config_sem);
- kfree(nvmet_disc_subsys->subsysnqn);
- nvmet_disc_subsys->subsysnqn = subsysnqn;
- up_write(&nvmet_config_sem);
-
- return count;
-}
-CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn);
-
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page)
@@ -1300,7 +1263,6 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
- &nvmet_subsys_attr_attr_discovery_nqn,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable,
#endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5119c687de68..64c2d2f3e25c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -531,7 +531,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
ns->nsid);
}
-void nvmet_ns_revalidate(struct nvmet_ns *ns)
+bool nvmet_ns_revalidate(struct nvmet_ns *ns)
{
loff_t oldsize = ns->size;
@@ -540,8 +540,7 @@ void nvmet_ns_revalidate(struct nvmet_ns *ns)
else
nvmet_file_ns_revalidate(ns);
- if (oldsize != ns->size)
- nvmet_ns_changed(ns->subsys, ns->nsid);
+ return oldsize != ns->size;
}
int nvmet_ns_enable(struct nvmet_ns *ns)
@@ -1400,7 +1399,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (subsys->cntlid_min > subsys->cntlid_max)
goto out_free_sqs;
- ret = ida_simple_get(&cntlid_ida,
+ ret = ida_alloc_range(&cntlid_ida,
subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
@@ -1459,7 +1458,7 @@ static void nvmet_ctrl_free(struct kref *ref)
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fatal_err_work);
- ida_simple_remove(&cntlid_ida, ctrl->cntlid);
+ ida_free(&cntlid_ida, ctrl->cntlid);
nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
@@ -1493,8 +1492,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
if (!port)
return NULL;
- if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) ||
- !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) {
+ if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
return NULL;
return nvmet_disc_subsys;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 22b5108168a6..de90001fc5c4 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1115,7 +1115,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
if (!assoc)
return NULL;
- idx = ida_simple_get(&tgtport->assoc_cnt, 0, 0, GFP_KERNEL);
+ idx = ida_alloc(&tgtport->assoc_cnt, GFP_KERNEL);
if (idx < 0)
goto out_free_assoc;
@@ -1157,7 +1157,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
out_put:
nvmet_fc_tgtport_put(tgtport);
out_ida:
- ida_simple_remove(&tgtport->assoc_cnt, idx);
+ ida_free(&tgtport->assoc_cnt, idx);
out_free_assoc:
kfree(assoc);
return NULL;
@@ -1183,7 +1183,7 @@ nvmet_fc_target_assoc_free(struct kref *ref)
/* if pending Rcv Disconnect Association LS, send rsp now */
if (oldls)
nvmet_fc_xmt_ls_rsp(tgtport, oldls);
- ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id);
+ ida_free(&tgtport->assoc_cnt, assoc->a_id);
dev_info(tgtport->dev,
"{%d:%d} Association freed\n",
tgtport->fc_target_port.port_num, assoc->a_id);
@@ -1341,7 +1341,7 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport)
}
/**
- * nvme_fc_register_targetport - transport entry point called by an
+ * nvmet_fc_register_targetport - transport entry point called by an
* LLDD to register the existence of a local
* NVME subystem FC port.
* @pinfo: pointer to information about the port to be registered
@@ -1383,7 +1383,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
goto out_regtgt_failed;
}
- idx = ida_simple_get(&nvmet_fc_tgtport_cnt, 0, 0, GFP_KERNEL);
+ idx = ida_alloc(&nvmet_fc_tgtport_cnt, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
goto out_fail_kfree;
@@ -1433,7 +1433,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
out_free_newrec:
put_device(dev);
out_ida_put:
- ida_simple_remove(&nvmet_fc_tgtport_cnt, idx);
+ ida_free(&nvmet_fc_tgtport_cnt, idx);
out_fail_kfree:
kfree(newrec);
out_regtgt_failed:
@@ -1460,7 +1460,7 @@ nvmet_fc_free_tgtport(struct kref *ref)
/* let the LLDD know we've finished tearing it down */
tgtport->ops->targetport_delete(&tgtport->fc_target_port);
- ida_simple_remove(&nvmet_fc_tgtport_cnt,
+ ida_free(&nvmet_fc_tgtport_cnt,
tgtport->fc_target_port.port_num);
ida_destroy(&tgtport->assoc_cnt);
@@ -1604,7 +1604,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
}
/**
- * nvme_fc_unregister_targetport - transport entry point called by an
+ * nvmet_fc_unregister_targetport - transport entry point called by an
* LLDD to deregister/remove a previously
* registered a local NVME subsystem FC port.
* @target_port: pointer to the (registered) target port that is to be
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 70ca9dfc1771..d886c2c59554 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
+#include <linux/memremap.h>
#include <linux/module.h>
#include "nvmet.h"
@@ -76,6 +77,14 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
+ /*
+ * When buffered_io namespace attribute is enabled that means user want
+ * this block device to be used as a file, so block device can take
+ * an advantage of cache.
+ */
+ if (ns->buffered_io)
+ return -ENOTBLK;
+
ns->bdev = blkdev_get_by_path(ns->device_path,
FMODE_READ | FMODE_WRITE, NULL);
if (IS_ERR(ns->bdev)) {
@@ -267,15 +276,15 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
if (nvmet_use_inline_bvec(req)) {
bio = &req->b.inline_bio;
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ bio_init(bio, req->ns->bdev, req->inline_bvec,
+ ARRAY_SIZE(req->inline_bvec), op);
} else {
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
+ bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op,
+ GFP_KERNEL);
}
- bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- bio->bi_opf = op;
blk_start_plug(&plug);
if (req->metadata_len)
@@ -296,10 +305,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
}
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
- bio_set_dev(bio, req->ns->bdev);
+ bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
+ op, GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
- bio->bi_opf = op;
bio_chain(bio, prev);
submit_bio(prev);
@@ -328,11 +336,10 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
if (!nvmet_check_transfer_len(req, 0))
return;
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
- bio_set_dev(bio, req->ns->bdev);
+ bio_init(bio, req->ns->bdev, req->inline_bvec,
+ ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
submit_bio(bio);
}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 6be6e59d273b..6485dc8eb974 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -14,16 +14,9 @@
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
-int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
{
- struct kstat stat;
- int ret;
-
- ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
- AT_STATX_FORCE_SYNC);
- if (!ret)
- ns->size = stat.size;
- return ret;
+ ns->size = i_size_read(ns->file->f_mapping->host);
}
void nvmet_file_ns_disable(struct nvmet_ns *ns)
@@ -43,7 +36,7 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
int flags = O_RDWR | O_LARGEFILE;
- int ret;
+ int ret = 0;
if (!ns->buffered_io)
flags |= O_DIRECT;
@@ -57,9 +50,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return ret;
}
- ret = nvmet_file_ns_revalidate(ns);
- if (ret)
- goto err;
+ nvmet_file_ns_revalidate(ns);
/*
* i_blkbits can be greater than the universally accepted upper bound,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index eb1094254c82..23f9d6f88804 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -543,11 +543,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
if (ret)
goto out_destroy_queues;
- ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
- if (IS_ERR(ctrl->ctrl.connect_q)) {
- ret = PTR_ERR(ctrl->ctrl.connect_q);
+ ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+ if (ret)
goto out_free_tagset;
- }
ret = nvme_loop_connect_io_queues(ctrl);
if (ret)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index af193423c10b..d910c6aad4b6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -541,8 +541,8 @@ u16 nvmet_bdev_flush(struct nvmet_req *req);
u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
-int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
-void nvmet_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+bool nvmet_ns_revalidate(struct nvmet_ns *ns);
u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 9e5b89ae29df..a4de1e0d518b 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -206,12 +206,13 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
if (nvmet_use_inline_bvec(req)) {
bio = &req->p.inline_bio;
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ bio_init(bio, NULL, req->inline_bvec,
+ ARRAY_SIZE(req->inline_bvec), req_op(rq));
} else {
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
+ bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq),
+ GFP_KERNEL);
bio->bi_end_io = bio_put;
}
- bio->bi_opf = req_op(rq);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
@@ -253,11 +254,12 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
timeout = nvmet_req_subsys(req)->admin_timeout;
}
- rq = nvme_alloc_request(q, req->cmd, 0);
+ rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0);
if (IS_ERR(rq)) {
status = NVME_SC_INTERNAL;
goto out_put_ns;
}
+ nvme_init_request(rq, req->cmd);
if (timeout)
rq->timeout = timeout;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 1deb4043e242..2446d0918a41 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1356,7 +1356,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
!queue->host_qid);
}
nvmet_rdma_free_rsps(queue);
- ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+ ida_free(&nvmet_rdma_queue_ida, queue->idx);
kfree(queue);
}
@@ -1459,7 +1459,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
spin_lock_init(&queue->rsps_lock);
INIT_LIST_HEAD(&queue->queue_list);
- queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
+ queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
if (queue->idx < 0) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_destroy_sq;
@@ -1510,7 +1510,7 @@ out_free_cmds:
out_free_responses:
nvmet_rdma_free_rsps(queue);
out_ida_remove:
- ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+ ida_free(&nvmet_rdma_queue_ida, queue->idx);
out_destroy_sq:
nvmet_sq_destroy(&queue->nvme_sq);
out_free_queue:
@@ -1703,7 +1703,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
}
/**
- * nvme_rdma_device_removal() - Handle RDMA device removal
+ * nvmet_rdma_device_removal() - Handle RDMA device removal
* @cm_id: rdma_cm id, used for nvmet port
* @queue: nvmet rdma queue (cm id qp_context)
*
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 7c1c43ce466b..83ca577f72be 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1473,7 +1473,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_tcp_free_cmds(queue);
if (queue->hdr_digest || queue->data_digest)
nvmet_tcp_free_crypto(queue);
- ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+ ida_free(&nvmet_tcp_queue_ida, queue->idx);
page = virt_to_head_page(queue->pf_cache.va);
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
@@ -1613,7 +1613,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
init_llist_head(&queue->resp_list);
INIT_LIST_HEAD(&queue->resp_send_list);
- queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL);
+ queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL);
if (queue->idx < 0) {
ret = queue->idx;
goto out_free_queue;
@@ -1646,7 +1646,7 @@ out_destroy_sq:
out_free_connect:
nvmet_tcp_free_cmd(&queue->connect);
out_ida_remove:
- ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+ ida_free(&nvmet_tcp_queue_ida, queue->idx);
out_free_queue:
kfree(queue);
return ret;
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 46bc30fe85d2..e34718b09550 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -123,7 +123,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
goto done;
}
- nvmet_ns_revalidate(req->ns);
+ if (nvmet_ns_revalidate(req->ns)) {
+ mutex_lock(&req->ns->subsys->lock);
+ nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
+ mutex_unlock(&req->ns->subsys->lock);
+ }
zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
req->ns->blksize_shift;
id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
@@ -412,10 +416,10 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
while (sector < get_capacity(bdev->bd_disk)) {
if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) {
- bio = blk_next_bio(bio, 0, GFP_KERNEL);
- bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC;
+ bio = blk_next_bio(bio, bdev, 0,
+ zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC,
+ GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
/* This may take a while, so be nice to others */
cond_resched();
}
@@ -522,6 +526,7 @@ static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
{
sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
+ const unsigned int op = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
u16 status = NVME_SC_SUCCESS;
unsigned int total_len = 0;
struct scatterlist *sg;
@@ -551,14 +556,13 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
if (nvmet_use_inline_bvec(req)) {
bio = &req->z.inline_bio;
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ bio_init(bio, req->ns->bdev, req->inline_bvec,
+ ARRAY_SIZE(req->inline_bvec), op);
} else {
- bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
+ bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL);
}
- bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
- bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sect;
bio->bi_private = req;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 23a38dcf0fc4..9fd1602b539d 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -771,7 +771,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (config->wp_gpio)
nvmem->wp_gpio = config->wp_gpio;
- else
+ else if (!config->ignore_wp)
nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp",
GPIOD_OUT_HIGH);
if (IS_ERR(nvmem->wp_gpio)) {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ad85ff6474ff..ec315b060cd5 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -648,8 +648,8 @@ void __init early_init_fdt_scan_reserved_mem(void)
}
fdt_scan_reserved_mem();
- fdt_init_reserved_mem();
fdt_reserve_elfcorehdr();
+ fdt_init_reserved_mem();
}
/**
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9c0fb962c22b..75caa6f5d36f 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -22,6 +22,7 @@
#include <linux/slab.h>
#include <linux/memblock.h>
#include <linux/kmemleak.h>
+#include <linux/cma.h>
#include "of_private.h"
@@ -116,12 +117,8 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
if (IS_ENABLED(CONFIG_CMA)
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
&& of_get_flat_dt_prop(node, "reusable", NULL)
- && !nomap) {
- unsigned long order =
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
-
- align = max(align, (phys_addr_t)PAGE_SIZE << order);
- }
+ && !nomap)
+ align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
if (prop) {
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 70992103c07d..2c2fb161b572 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -513,24 +513,24 @@ static void __init of_unittest_parse_phandle_with_args(void)
memset(&args, 0, sizeof(args));
EXPECT_BEGIN(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1");
rc = of_parse_phandle_with_args(np, "phandle-list-bad-args",
"#phandle-cells", 1, &args);
EXPECT_END(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1");
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
EXPECT_BEGIN(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1");
rc = of_count_phandle_with_args(np, "phandle-list-bad-args",
"#phandle-cells");
EXPECT_END(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1");
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
}
@@ -670,12 +670,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
memset(&args, 0, sizeof(args));
EXPECT_BEGIN(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1");
rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-args",
"phandle", 1, &args);
EXPECT_END(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1");
+ "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1");
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
}
@@ -1257,12 +1257,12 @@ static void __init of_unittest_platform_populate(void)
unittest(pdev, "device 2 creation failed\n");
EXPECT_BEGIN(KERN_INFO,
- "platform testcase-data:testcase-device2: IRQ index 0 not found");
+ "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found");
irq = platform_get_irq(pdev, 0);
EXPECT_END(KERN_INFO,
- "platform testcase-data:testcase-device2: IRQ index 0 not found");
+ "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found");
unittest(irq < 0 && irq != -EPROBE_DEFER,
"device parsing error failed - %d\n", irq);
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 059566f54429..9be007c9420f 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1003,7 +1003,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
ioc->usg_calls++;
#endif
- while(sg_dma_len(sglist) && nents--) {
+ while (nents && sg_dma_len(sglist)) {
#ifdef CCIO_COLLECT_STATS
ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
@@ -1011,6 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
ccio_unmap_page(dev, sg_dma_address(sglist),
sg_dma_len(sglist), direction, 0);
++sglist;
+ nents--;
}
DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index e60690d38d67..374b9199878d 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1047,7 +1047,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
- while (sg_dma_len(sglist) && nents--) {
+ while (nents && sg_dma_len(sglist)) {
sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist),
direction, 0);
@@ -1056,6 +1056,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
ioc->usingle_calls--; /* kluge since call is unmap_sg() */
#endif
++sglist;
+ nents--;
}
DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
index 489586a4cdc7..768d33f9ebc8 100644
--- a/drivers/pci/controller/cadence/pci-j721e.c
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -356,8 +356,8 @@ static int j721e_pcie_probe(struct platform_device *pdev)
const struct j721e_pcie_data *data;
struct cdns_pcie *cdns_pcie;
struct j721e_pcie *pcie;
- struct cdns_pcie_rc *rc;
- struct cdns_pcie_ep *ep;
+ struct cdns_pcie_rc *rc = NULL;
+ struct cdns_pcie_ep *ep = NULL;
struct gpio_desc *gpiod;
void __iomem *base;
struct clk *clk;
@@ -376,6 +376,46 @@ static int j721e_pcie_probe(struct platform_device *pdev)
if (!pcie)
return -ENOMEM;
+ switch (mode) {
+ case PCI_MODE_RC:
+ if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST))
+ return -ENODEV;
+
+ bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
+ if (!bridge)
+ return -ENOMEM;
+
+ if (!data->byte_access_allowed)
+ bridge->ops = &cdns_ti_pcie_host_ops;
+ rc = pci_host_bridge_priv(bridge);
+ rc->quirk_retrain_flag = data->quirk_retrain_flag;
+ rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+
+ cdns_pcie = &rc->pcie;
+ cdns_pcie->dev = dev;
+ cdns_pcie->ops = &j721e_pcie_ops;
+ pcie->cdns_pcie = cdns_pcie;
+ break;
+ case PCI_MODE_EP:
+ if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP))
+ return -ENODEV;
+
+ ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ return -ENOMEM;
+
+ ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+
+ cdns_pcie = &ep->pcie;
+ cdns_pcie->dev = dev;
+ cdns_pcie->ops = &j721e_pcie_ops;
+ pcie->cdns_pcie = cdns_pcie;
+ break;
+ default:
+ dev_err(dev, "INVALID device type %d\n", mode);
+ return 0;
+ }
+
pcie->mode = mode;
pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
@@ -426,28 +466,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
switch (mode) {
case PCI_MODE_RC:
- if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST)) {
- ret = -ENODEV;
- goto err_get_sync;
- }
-
- bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
- if (!bridge) {
- ret = -ENOMEM;
- goto err_get_sync;
- }
-
- if (!data->byte_access_allowed)
- bridge->ops = &cdns_ti_pcie_host_ops;
- rc = pci_host_bridge_priv(bridge);
- rc->quirk_retrain_flag = data->quirk_retrain_flag;
- rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
-
- cdns_pcie = &rc->pcie;
- cdns_pcie->dev = dev;
- cdns_pcie->ops = &j721e_pcie_ops;
- pcie->cdns_pcie = cdns_pcie;
-
gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
if (IS_ERR(gpiod)) {
ret = PTR_ERR(gpiod);
@@ -497,23 +515,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
break;
case PCI_MODE_EP:
- if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP)) {
- ret = -ENODEV;
- goto err_get_sync;
- }
-
- ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- ret = -ENOMEM;
- goto err_get_sync;
- }
- ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
-
- cdns_pcie = &ep->pcie;
- cdns_pcie->dev = dev;
- cdns_pcie->ops = &j721e_pcie_ops;
- pcie->cdns_pcie = cdns_pcie;
-
ret = cdns_pcie_init_phy(dev, cdns_pcie);
if (ret) {
dev_err(dev, "Failed to init phy\n");
@@ -525,8 +526,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
goto err_pcie_setup;
break;
- default:
- dev_err(dev, "INVALID device type %d\n", mode);
}
return 0;
diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c
index fa6886d66488..c625fc6bb287 100644
--- a/drivers/pci/controller/dwc/pcie-kirin.c
+++ b/drivers/pci/controller/dwc/pcie-kirin.c
@@ -756,22 +756,28 @@ static int __exit kirin_pcie_remove(struct platform_device *pdev)
return 0;
}
+struct kirin_pcie_data {
+ enum pcie_kirin_phy_type phy_type;
+};
+
+static const struct kirin_pcie_data kirin_960_data = {
+ .phy_type = PCIE_KIRIN_INTERNAL_PHY,
+};
+
+static const struct kirin_pcie_data kirin_970_data = {
+ .phy_type = PCIE_KIRIN_EXTERNAL_PHY,
+};
+
static const struct of_device_id kirin_pcie_match[] = {
- {
- .compatible = "hisilicon,kirin960-pcie",
- .data = (void *)PCIE_KIRIN_INTERNAL_PHY
- },
- {
- .compatible = "hisilicon,kirin970-pcie",
- .data = (void *)PCIE_KIRIN_EXTERNAL_PHY
- },
+ { .compatible = "hisilicon,kirin960-pcie", .data = &kirin_960_data },
+ { .compatible = "hisilicon,kirin970-pcie", .data = &kirin_970_data },
{},
};
static int kirin_pcie_probe(struct platform_device *pdev)
{
- enum pcie_kirin_phy_type phy_type;
struct device *dev = &pdev->dev;
+ const struct kirin_pcie_data *data;
struct kirin_pcie *kirin_pcie;
struct dw_pcie *pci;
int ret;
@@ -781,13 +787,12 @@ static int kirin_pcie_probe(struct platform_device *pdev)
return -EINVAL;
}
- phy_type = (long)of_device_get_match_data(dev);
- if (!phy_type) {
+ data = of_device_get_match_data(dev);
+ if (!data) {
dev_err(dev, "OF data missing\n");
return -EINVAL;
}
-
kirin_pcie = devm_kzalloc(dev, sizeof(struct kirin_pcie), GFP_KERNEL);
if (!kirin_pcie)
return -ENOMEM;
@@ -800,7 +805,7 @@ static int kirin_pcie_probe(struct platform_device *pdev)
pci->ops = &kirin_dw_pcie_ops;
pci->pp.ops = &kirin_pcie_host_ops;
kirin_pcie->pci = pci;
- kirin_pcie->type = phy_type;
+ kirin_pcie->type = data->phy_type;
ret = kirin_pcie_get_resource(kirin_pcie, pdev);
if (ret)
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 20ea2ee330b8..ae0bc2fee4ca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2155,8 +2155,17 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
if (!hv_dev)
continue;
- if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
- set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node);
+ if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY &&
+ hv_dev->desc.virtual_numa_node < num_possible_nodes())
+ /*
+ * The kernel may boot with some NUMA nodes offline
+ * (e.g. in a KDUMP kernel) or with NUMA disabled via
+ * "numa=off". In those cases, adjust the host provided
+ * NUMA node to a valid NUMA node used by the kernel.
+ */
+ set_dev_node(&dev->dev,
+ numa_map_to_online_node(
+ hv_dev->desc.virtual_numa_node));
put_pcichild(hv_dev);
}
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 71258ea3d35f..f8e82c5e2d87 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -1329,7 +1329,8 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
* indirectly via kernel emulated PCI bridge driver.
*/
mvebu_pcie_setup_hw(port);
- mvebu_pcie_set_local_dev_nr(port, 0);
+ mvebu_pcie_set_local_dev_nr(port, 1);
+ mvebu_pcie_set_local_bus_nr(port, 0);
}
pcie->nports = i;
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 854d95163112..a2c3c207a04b 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -219,7 +219,7 @@ static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
if (hwirq < 0)
return -ENOSPC;
- fwspec.param[1] += hwirq;
+ fwspec.param[fwspec.param_count - 2] += hwirq;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec);
if (ret)
diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c
index 3824862ea144..33eb37a2225c 100644
--- a/drivers/pci/controller/pcie-mt7621.c
+++ b/drivers/pci/controller/pcie-mt7621.c
@@ -109,15 +109,6 @@ static inline void pcie_write(struct mt7621_pcie *pcie, u32 val, u32 reg)
writel_relaxed(val, pcie->base + reg);
}
-static inline void pcie_rmw(struct mt7621_pcie *pcie, u32 reg, u32 clr, u32 set)
-{
- u32 val = readl_relaxed(pcie->base + reg);
-
- val &= ~clr;
- val |= set;
- writel_relaxed(val, pcie->base + reg);
-}
-
static inline u32 pcie_port_read(struct mt7621_pcie_port *port, u32 reg)
{
return readl_relaxed(port->base + reg);
@@ -557,7 +548,7 @@ static struct platform_driver mt7621_pcie_driver = {
.remove = mt7621_pcie_remove,
.driver = {
.name = "mt7621-pci",
- .of_match_table = of_match_ptr(mt7621_pcie_ids),
+ .of_match_table = mt7621_pcie_ids,
},
};
builtin_platform_driver(mt7621_pcie_driver);
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index cc166c683638..eb05cceab964 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -99,11 +99,13 @@ struct vmd_irq {
* @srcu: SRCU struct for local synchronization.
* @count: number of child IRQs assigned to this vector; used to track
* sharing.
+ * @virq: The underlying VMD Linux interrupt number
*/
struct vmd_irq_list {
struct list_head irq_list;
struct srcu_struct srcu;
unsigned int count;
+ unsigned int virq;
};
struct vmd_dev {
@@ -253,7 +255,6 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
struct msi_desc *desc = arg->desc;
struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
- unsigned int index, vector;
if (!vmdirq)
return -ENOMEM;
@@ -261,10 +262,8 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
INIT_LIST_HEAD(&vmdirq->node);
vmdirq->irq = vmd_next_irq(vmd, desc);
vmdirq->virq = virq;
- index = index_from_irqs(vmd, vmdirq->irq);
- vector = pci_irq_vector(vmd->dev, index);
- irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
+ irq_domain_set_info(domain, virq, vmdirq->irq->virq, info->chip, vmdirq,
handle_untracked_irq, vmd, NULL);
return 0;
}
@@ -685,7 +684,8 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
return err;
INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
- err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
+ vmd->irqs[i].virq = pci_irq_vector(dev, i);
+ err = devm_request_irq(&dev->dev, vmd->irqs[i].virq,
vmd_irq, IRQF_NO_THREAD,
vmd->name, &vmd->irqs[i]);
if (err)
@@ -969,7 +969,7 @@ static int vmd_suspend(struct device *dev)
int i;
for (i = 0; i < vmd->msix_count; i++)
- devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
+ devm_free_irq(dev, vmd->irqs[i].virq, &vmd->irqs[i]);
return 0;
}
@@ -981,7 +981,7 @@ static int vmd_resume(struct device *dev)
int err, i;
for (i = 0; i < vmd->msix_count; i++) {
- err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+ err = devm_request_irq(dev, vmd->irqs[i].virq,
vmd_irq, IRQF_NO_THREAD,
vmd->name, &vmd->irqs[i]);
if (err)
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index 0d63541c4052..e9cf318e6670 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -28,6 +28,7 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
msi_domain_free_irqs_descs_locked(domain, &dev->dev);
else
pci_msi_legacy_teardown_msi_irqs(dev);
+ msi_free_msi_descs(&dev->dev);
}
/**
@@ -171,8 +172,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
- info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS |
- MSI_FLAG_FREE_MSI_DESCS;
+ info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
info->flags |= MSI_FLAG_MUST_REACTIVATE;
diff --git a/drivers/pci/msi/legacy.c b/drivers/pci/msi/legacy.c
index cdbb4689db78..db761adef652 100644
--- a/drivers/pci/msi/legacy.c
+++ b/drivers/pci/msi/legacy.c
@@ -77,5 +77,4 @@ void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
{
msi_device_destroy_sysfs(&dev->dev);
arch_teardown_msi_irqs(dev);
- msi_free_msi_descs(&dev->dev);
}
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index c19c7ca58186..9037a7827eca 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -1111,7 +1111,8 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
if (!desc)
return cpu_possible_mask;
- if (WARN_ON_ONCE(!desc->affinity))
+ /* MSI[X] interrupts can be allocated without affinity descriptor */
+ if (!desc->affinity)
return NULL;
/*
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 588588cfda48..4ceeb75fc899 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -350,7 +350,6 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
const struct pci_device_id *id)
{
int error, node, cpu;
- int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
struct drv_dev_and_id ddi = { drv, dev, id };
/*
@@ -368,17 +367,29 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
* device is probed from work_on_cpu() of the Physical device.
*/
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
- pci_physfn_is_probed(dev))
+ pci_physfn_is_probed(dev)) {
cpu = nr_cpu_ids;
- else
+ } else {
+ cpumask_var_t wq_domain_mask;
+
+ if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
+ error = -ENOMEM;
+ goto out;
+ }
+ cpumask_and(wq_domain_mask,
+ housekeeping_cpumask(HK_TYPE_WQ),
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
+
cpu = cpumask_any_and(cpumask_of_node(node),
- housekeeping_cpumask(hk_flags));
+ wq_domain_mask);
+ free_cpumask_var(wq_domain_mask);
+ }
if (cpu < nr_cpu_ids)
error = work_on_cpu(cpu, local_pci_probe, &ddi);
else
error = local_pci_probe(&ddi);
-
+out:
dev->is_probed = 0;
cpu_hotplug_enable();
return error;
@@ -596,7 +607,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
int error;
error = drv->suspend(pci_dev, state);
- suspend_report_result(drv->suspend, error);
+ suspend_report_result(dev, drv->suspend, error);
if (error)
return error;
@@ -775,7 +786,7 @@ static int pci_pm_suspend(struct device *dev)
int error;
error = pm->suspend(dev);
- suspend_report_result(pm->suspend, error);
+ suspend_report_result(dev, pm->suspend, error);
if (error)
return error;
@@ -821,7 +832,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
int error;
error = pm->suspend_noirq(dev);
- suspend_report_result(pm->suspend_noirq, error);
+ suspend_report_result(dev, pm->suspend_noirq, error);
if (error)
return error;
@@ -1010,7 +1021,7 @@ static int pci_pm_freeze(struct device *dev)
int error;
error = pm->freeze(dev);
- suspend_report_result(pm->freeze, error);
+ suspend_report_result(dev, pm->freeze, error);
if (error)
return error;
}
@@ -1030,7 +1041,7 @@ static int pci_pm_freeze_noirq(struct device *dev)
int error;
error = pm->freeze_noirq(dev);
- suspend_report_result(pm->freeze_noirq, error);
+ suspend_report_result(dev, pm->freeze_noirq, error);
if (error)
return error;
}
@@ -1116,7 +1127,7 @@ static int pci_pm_poweroff(struct device *dev)
int error;
error = pm->poweroff(dev);
- suspend_report_result(pm->poweroff, error);
+ suspend_report_result(dev, pm->poweroff, error);
if (error)
return error;
}
@@ -1154,7 +1165,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
int error;
error = pm->poweroff_noirq(dev);
- suspend_report_result(pm->poweroff_noirq, error);
+ suspend_report_result(dev, pm->poweroff_noirq, error);
if (error)
return error;
}
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index bda630889f95..604feeb84ee4 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -166,6 +166,9 @@ static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask)
{
int ret, i;
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ irqs[i] = -1;
+
/*
* If we support PME but can't use MSI/MSI-X for it, we have to
* fall back to INTx or other interrupts, e.g., a system shared
@@ -314,10 +317,8 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
*/
int pcie_port_device_register(struct pci_dev *dev)
{
- int status, capabilities, irq_services, i, nr_service;
- int irqs[PCIE_PORT_DEVICE_MAXSERVICES] = {
- [0 ... PCIE_PORT_DEVICE_MAXSERVICES-1] = -1
- };
+ int status, capabilities, i, nr_service;
+ int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
/* Enable PCI Express port device */
status = pci_enable_device(dev);
@@ -330,32 +331,18 @@ int pcie_port_device_register(struct pci_dev *dev)
return 0;
pci_set_master(dev);
-
- irq_services = 0;
- if (IS_ENABLED(CONFIG_PCIE_PME))
- irq_services |= PCIE_PORT_SERVICE_PME;
- if (IS_ENABLED(CONFIG_PCIEAER))
- irq_services |= PCIE_PORT_SERVICE_AER;
- if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
- irq_services |= PCIE_PORT_SERVICE_HP;
- if (IS_ENABLED(CONFIG_PCIE_DPC))
- irq_services |= PCIE_PORT_SERVICE_DPC;
- irq_services &= capabilities;
-
- if (irq_services) {
- /*
- * Initialize service IRQs. Don't use service devices that
- * require interrupts if there is no way to generate them.
- * However, some drivers may have a polling mode (e.g.
- * pciehp_poll_mode) that can be used in the absence of IRQs.
- * Allow them to determine if that is to be used.
- */
- status = pcie_init_service_irqs(dev, irqs, irq_services);
- if (status) {
- irq_services &= PCIE_PORT_SERVICE_HP;
- if (!irq_services)
- goto error_disable;
- }
+ /*
+ * Initialize service irqs. Don't use service devices that
+ * require interrupts if there is no way to generate them.
+ * However, some drivers may have a polling mode (e.g. pciehp_poll_mode)
+ * that can be used in the absence of irqs. Allow them to determine
+ * if that is to be used.
+ */
+ status = pcie_init_service_irqs(dev, irqs, capabilities);
+ if (status) {
+ capabilities &= PCIE_PORT_SERVICE_HP;
+ if (!capabilities)
+ goto error_disable;
}
/* Allocate child services if any */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index d2dd6a6cda60..65f7f6b0576c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5344,11 +5344,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
*/
static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
{
- if ((pdev->device == 0x7312 && pdev->revision != 0x00) ||
- (pdev->device == 0x7340 && pdev->revision != 0xc5) ||
- (pdev->device == 0x7341 && pdev->revision != 0x00))
- return;
-
if (pdev->device == 0x15d8) {
if (pdev->revision == 0xcf &&
pdev->subsystem_vendor == 0xea50 &&
@@ -5370,10 +5365,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats);
/* AMD Iceland dGPU */
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
/* AMD Navi10 dGPU */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats);
/* AMD Navi14 dGPU */
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats);
/* AMD Raven platform iGPU */
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
#endif /* CONFIG_PCI_ATS */
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e1a0c44bc686..d05ca6ebbb9d 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -141,11 +141,25 @@ config ARM_DMC620_PMU
config MARVELL_CN10K_TAD_PMU
tristate "Marvell CN10K LLC-TAD PMU"
- depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
help
Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
performance monitors on CN10K family silicons.
+config APPLE_M1_CPU_PMU
+ bool "Apple M1 CPU PMU support"
+ depends on ARM_PMU && ARCH_APPLE
+ help
+ Provides support for the non-architectural CPU PMUs present on
+ the Apple M1 SoCs and derivatives.
+
source "drivers/perf/hisilicon/Kconfig"
+config MARVELL_CN10K_DDR_PMU
+ tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ help
+ Enable perf support for Marvell DDR Performance monitoring
+ event on CN10K platform.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 2db5418d5b0a..4f43080ec54e 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
new file mode 100644
index 000000000000..979a7c2b4f56
--- /dev/null
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CPU PMU driver for the Apple M1 and derivatives
+ *
+ * Copyright (C) 2021 Google LLC
+ *
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Most of the information used in this driver was provided by the
+ * Asahi Linux project. The rest was experimentally discovered.
+ */
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+#include <asm/apple_m1_pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+
+#define M1_PMU_NR_COUNTERS 10
+
+#define M1_PMU_CFG_EVENT GENMASK(7, 0)
+
+#define ANY_BUT_0_1 GENMASK(9, 2)
+#define ONLY_2_TO_7 GENMASK(7, 2)
+#define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6))
+#define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7))
+
+/*
+ * Description of the events we actually know about, as well as those with
+ * a specific counter affinity. Yes, this is a grand total of two known
+ * counters, and the rest is anybody's guess.
+ *
+ * Not all counters can count all events. Counters #0 and #1 are wired to
+ * count cycles and instructions respectively, and some events have
+ * bizarre mappings (every other counter, or even *one* counter). These
+ * restrictions equally apply to both P and E cores.
+ *
+ * It is worth noting that the PMUs attached to P and E cores are likely
+ * to be different because the underlying uarches are different. At the
+ * moment, we don't really need to distinguish between the two because we
+ * know next to nothing about the events themselves, and we already have
+ * per cpu-type PMU abstractions.
+ *
+ * If we eventually find out that the events are different across
+ * implementations, we'll have to introduce per cpu-type tables.
+ */
+enum m1_pmu_events {
+ M1_PMU_PERFCTR_UNKNOWN_01 = 0x01,
+ M1_PMU_PERFCTR_CPU_CYCLES = 0x02,
+ M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c,
+ M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d,
+ M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e,
+ M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f,
+ M1_PMU_PERFCTR_UNKNOWN_90 = 0x90,
+ M1_PMU_PERFCTR_UNKNOWN_93 = 0x93,
+ M1_PMU_PERFCTR_UNKNOWN_94 = 0x94,
+ M1_PMU_PERFCTR_UNKNOWN_95 = 0x95,
+ M1_PMU_PERFCTR_UNKNOWN_96 = 0x96,
+ M1_PMU_PERFCTR_UNKNOWN_97 = 0x97,
+ M1_PMU_PERFCTR_UNKNOWN_98 = 0x98,
+ M1_PMU_PERFCTR_UNKNOWN_99 = 0x99,
+ M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a,
+ M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b,
+ M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c,
+ M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
+ M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf,
+ M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0,
+ M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1,
+ M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4,
+ M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5,
+ M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6,
+ M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8,
+ M1_PMU_PERFCTR_UNKNOWN_ca = 0xca,
+ M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb,
+ M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
+ M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
+ M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
+ M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
+ M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
+ M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
+
+ /*
+ * From this point onwards, these are not actual HW events,
+ * but attributes that get stored in hw->config_base.
+ */
+ M1_PMU_CFG_COUNT_USER = BIT(8),
+ M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+};
+
+/*
+ * Per-event affinity table. Most events can be installed on counter
+ * 2-9, but there are a number of exceptions. Note that this table
+ * has been created experimentally, and I wouldn't be surprised if more
+ * counters had strange affinities.
+ */
+static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
+ [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
+ [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7),
+ [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0),
+ [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1),
+ [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7),
+ [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7),
+ [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
+ [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7,
+ [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
+ [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
+ [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
+};
+
+static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS,
+ /* No idea about the rest yet */
+};
+
+/* sysfs definitions */
+static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define M1_PMU_EVENT_ATTR(name, config) \
+ PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
+
+static struct attribute *m1_pmu_event_attrs[] = {
+ M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
+ M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+ NULL,
+};
+
+static const struct attribute_group m1_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = m1_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *m1_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static const struct attribute_group m1_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = m1_pmu_format_attrs,
+};
+
+/* Low level accessors. No synchronisation. */
+#define PMU_READ_COUNTER(_idx) \
+ case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
+
+#define PMU_WRITE_COUNTER(_val, _idx) \
+ case _idx: \
+ write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \
+ return
+
+static u64 m1_pmu_read_hw_counter(unsigned int index)
+{
+ switch (index) {
+ PMU_READ_COUNTER(0);
+ PMU_READ_COUNTER(1);
+ PMU_READ_COUNTER(2);
+ PMU_READ_COUNTER(3);
+ PMU_READ_COUNTER(4);
+ PMU_READ_COUNTER(5);
+ PMU_READ_COUNTER(6);
+ PMU_READ_COUNTER(7);
+ PMU_READ_COUNTER(8);
+ PMU_READ_COUNTER(9);
+ }
+
+ BUG();
+}
+
+static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
+{
+ switch (index) {
+ PMU_WRITE_COUNTER(val, 0);
+ PMU_WRITE_COUNTER(val, 1);
+ PMU_WRITE_COUNTER(val, 2);
+ PMU_WRITE_COUNTER(val, 3);
+ PMU_WRITE_COUNTER(val, 4);
+ PMU_WRITE_COUNTER(val, 5);
+ PMU_WRITE_COUNTER(val, 6);
+ PMU_WRITE_COUNTER(val, 7);
+ PMU_WRITE_COUNTER(val, 8);
+ PMU_WRITE_COUNTER(val, 9);
+ }
+
+ BUG();
+}
+
+#define get_bit_offset(index, mask) (__ffs(mask) + (index))
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+ u64 val, bit;
+
+ switch (index) {
+ case 0 ... 7:
+ bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+ break;
+ case 8 ... 9:
+ bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+ break;
+ default:
+ BUG();
+ }
+
+ val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+ if (en)
+ val |= bit;
+ else
+ val &= ~bit;
+
+ write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter(unsigned int index)
+{
+ __m1_pmu_enable_counter(index, true);
+}
+
+static void m1_pmu_disable_counter(unsigned int index)
+{
+ __m1_pmu_enable_counter(index, false);
+}
+
+static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
+{
+ u64 val, bit;
+
+ switch (index) {
+ case 0 ... 7:
+ bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+ break;
+ case 8 ... 9:
+ bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+ break;
+ default:
+ BUG();
+ }
+
+ val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+ if (en)
+ val |= bit;
+ else
+ val &= ~bit;
+
+ write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter_interrupt(unsigned int index)
+{
+ __m1_pmu_enable_counter_interrupt(index, true);
+}
+
+static void m1_pmu_disable_counter_interrupt(unsigned int index)
+{
+ __m1_pmu_enable_counter_interrupt(index, false);
+}
+
+static void m1_pmu_configure_counter(unsigned int index, u8 event,
+ bool user, bool kernel)
+{
+ u64 val, user_bit, kernel_bit;
+ int shift;
+
+ switch (index) {
+ case 0 ... 7:
+ user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7));
+ kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7));
+ break;
+ case 8 ... 9:
+ user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9));
+ kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9));
+ break;
+ default:
+ BUG();
+ }
+
+ val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
+
+ if (user)
+ val |= user_bit;
+ else
+ val &= ~user_bit;
+
+ if (kernel)
+ val |= kernel_bit;
+ else
+ val &= ~kernel_bit;
+
+ write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+
+ /*
+ * Counters 0 and 1 have fixed events. For anything else,
+ * place the event at the expected location in the relevant
+ * register (PMESR0 holds the event configuration for counters
+ * 2-5, resp. PMESR1 for counters 6-9).
+ */
+ switch (index) {
+ case 0 ... 1:
+ break;
+ case 2 ... 5:
+ shift = (index - 2) * 8;
+ val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
+ val &= ~((u64)0xff << shift);
+ val |= (u64)event << shift;
+ write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+ break;
+ case 6 ... 9:
+ shift = (index - 6) * 8;
+ val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
+ val &= ~((u64)0xff << shift);
+ val |= (u64)event << shift;
+ write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+ break;
+ }
+}
+
+/* arm_pmu backend */
+static void m1_pmu_enable_event(struct perf_event *event)
+{
+ bool user, kernel;
+ u8 evt;
+
+ evt = event->hw.config_base & M1_PMU_CFG_EVENT;
+ user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
+ kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
+
+ m1_pmu_disable_counter_interrupt(event->hw.idx);
+ m1_pmu_disable_counter(event->hw.idx);
+ isb();
+
+ m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+ m1_pmu_enable_counter(event->hw.idx);
+ m1_pmu_enable_counter_interrupt(event->hw.idx);
+ isb();
+}
+
+static void m1_pmu_disable_event(struct perf_event *event)
+{
+ m1_pmu_disable_counter_interrupt(event->hw.idx);
+ m1_pmu_disable_counter(event->hw.idx);
+ isb();
+}
+
+static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+ struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+ struct pt_regs *regs;
+ u64 overflow, state;
+ int idx;
+
+ overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1);
+ if (!overflow) {
+ /* Spurious interrupt? */
+ state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+ state &= ~PMCR0_IACT;
+ write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1);
+ isb();
+ return IRQ_NONE;
+ }
+
+ cpu_pmu->stop(cpu_pmu);
+
+ regs = get_irq_regs();
+
+ for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+ struct perf_event *event = cpuc->events[idx];
+ struct perf_sample_data data;
+
+ if (!event)
+ continue;
+
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ m1_pmu_disable_event(event);
+ }
+
+ cpu_pmu->start(cpu_pmu);
+
+ return IRQ_HANDLED;
+}
+
+static u64 m1_pmu_read_counter(struct perf_event *event)
+{
+ return m1_pmu_read_hw_counter(event->hw.idx);
+}
+
+static void m1_pmu_write_counter(struct perf_event *event, u64 value)
+{
+ m1_pmu_write_hw_counter(value, event->hw.idx);
+ isb();
+}
+
+static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT;
+ unsigned long affinity = m1_pmu_event_affinity[evtype];
+ int idx;
+
+ /*
+ * Place the event on the first free counter that can count
+ * this event.
+ *
+ * We could do a better job if we had a view of all the events
+ * counting on the PMU at any given time, and by placing the
+ * most constraining events first.
+ */
+ for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+
+ return -EAGAIN;
+}
+
+static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void __m1_pmu_set_mode(u8 mode)
+{
+ u64 val;
+
+ val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+ val &= ~(PMCR0_IMODE | PMCR0_IACT);
+ val |= FIELD_PREP(PMCR0_IMODE, mode);
+ write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+ isb();
+}
+
+static void m1_pmu_start(struct arm_pmu *cpu_pmu)
+{
+ __m1_pmu_set_mode(PMCR0_IMODE_FIQ);
+}
+
+static void m1_pmu_stop(struct arm_pmu *cpu_pmu)
+{
+ __m1_pmu_set_mode(PMCR0_IMODE_OFF);
+}
+
+static int m1_pmu_map_event(struct perf_event *event)
+{
+ /*
+ * Although the counters are 48bit wide, bit 47 is what
+ * triggers the overflow interrupt. Advertise the counters
+ * being 47bit wide to mimick the behaviour of the ARM PMU.
+ */
+ event->hw.flags |= ARMPMU_EVT_47BIT;
+ return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static void m1_pmu_reset(void *info)
+{
+ int i;
+
+ __m1_pmu_set_mode(PMCR0_IMODE_OFF);
+
+ for (i = 0; i < M1_PMU_NR_COUNTERS; i++) {
+ m1_pmu_disable_counter(i);
+ m1_pmu_disable_counter_interrupt(i);
+ m1_pmu_write_hw_counter(0, i);
+ }
+
+ isb();
+}
+
+static int m1_pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+
+ if (!attr->exclude_guest)
+ return -EINVAL;
+ if (!attr->exclude_kernel)
+ config_base |= M1_PMU_CFG_COUNT_KERNEL;
+ if (!attr->exclude_user)
+ config_base |= M1_PMU_CFG_COUNT_USER;
+
+ event->config_base = config_base;
+
+ return 0;
+}
+
+static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = m1_pmu_handle_irq;
+ cpu_pmu->enable = m1_pmu_enable_event;
+ cpu_pmu->disable = m1_pmu_disable_event;
+ cpu_pmu->read_counter = m1_pmu_read_counter;
+ cpu_pmu->write_counter = m1_pmu_write_counter;
+ cpu_pmu->get_event_idx = m1_pmu_get_event_idx;
+ cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx;
+ cpu_pmu->start = m1_pmu_start;
+ cpu_pmu->stop = m1_pmu_stop;
+ cpu_pmu->map_event = m1_pmu_map_event;
+ cpu_pmu->reset = m1_pmu_reset;
+ cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
+
+ cpu_pmu->num_events = M1_PMU_NR_COUNTERS;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
+ return 0;
+}
+
+/* Device driver gunk */
+static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "apple_icestorm_pmu";
+ return m1_pmu_init(cpu_pmu);
+}
+
+static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
+{
+ cpu_pmu->name = "apple_firestorm_pmu";
+ return m1_pmu_init(cpu_pmu);
+}
+
+static const struct of_device_id m1_pmu_of_device_ids[] = {
+ { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, },
+ { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, },
+ { },
+};
+MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids);
+
+static int m1_pmu_device_probe(struct platform_device *pdev)
+{
+ return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver m1_pmu_driver = {
+ .driver = {
+ .name = "apple-m1-cpu-pmu",
+ .of_match_table = m1_pmu_of_device_ids,
+ .suppress_bind_attrs = true,
+ },
+ .probe = m1_pmu_device_probe,
+};
+
+module_platform_driver(m1_pmu_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 54aca3a62814..96e09fa40909 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1096,7 +1096,7 @@ static void cci_pmu_enable(struct pmu *pmu)
{
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
- int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
+ bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs);
unsigned long flags;
if (!enabled)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index a96c31604545..40b352e8aa7f 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1460,8 +1460,7 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id)
static int arm_ccn_probe(struct platform_device *pdev)
{
struct arm_ccn *ccn;
- struct resource *res;
- unsigned int irq;
+ int irq;
int err;
ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
@@ -1474,10 +1473,9 @@ static int arm_ccn_probe(struct platform_device *pdev)
if (IS_ERR(ccn->base))
return PTR_ERR(ccn->base);
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res)
- return -EINVAL;
- irq = res->start;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
/* Check if we can use the interrupt */
writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 0e48adce57ef..9c1d82be7a2f 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -71,9 +71,11 @@
#define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18)
#define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00)
#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17)
-#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6)
-#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
-#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4)
+#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(9)
+#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(8)
+#define CMN600_WPn_CONFIG_WP_COMBINE BIT(6)
+#define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
+#define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4)
#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1)
#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0)
#define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08)
@@ -155,6 +157,7 @@
#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24)
#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
+/* Note that we don't yet support the tertiary match group on newer IPs */
#define CMN_CONFIG_WP_GRP BIT_ULL(56)
#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
@@ -353,7 +356,7 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
return NULL;
}
-struct dentry *arm_cmn_debugfs;
+static struct dentry *arm_cmn_debugfs;
#ifdef CONFIG_DEBUG_FS
static const char *arm_cmn_device_type(u8 type)
@@ -595,6 +598,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
return 0;
+ if (chan == 4 && cmn->model == CMN600)
+ return 0;
+
if ((chan == 5 && cmn->rsp_vc_num < 2) ||
(chan == 6 && cmn->dat_vc_num < 2))
return 0;
@@ -905,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
u32 grp = CMN_EVENT_WP_GRP(event);
u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
u32 combine = CMN_EVENT_WP_COMBINE(event);
+ bool is_cmn600 = to_cmn(event->pmu)->model == CMN600;
config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
- FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
+ if (exc)
+ config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
+ CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
if (combine && !grp)
- config |= CMN_DTM_WPn_CONFIG_WP_COMBINE;
-
+ config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
+ CMN_DTM_WPn_CONFIG_WP_COMBINE;
return config;
}
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 295cc7952d0e..9694370651fa 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
{
if (event->hw.flags & ARMPMU_EVT_64BIT)
return GENMASK_ULL(63, 0);
+ else if (event->hw.flags & ARMPMU_EVT_47BIT)
+ return GENMASK_ULL(46, 0);
else
return GENMASK_ULL(31, 0);
}
@@ -524,7 +526,7 @@ static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
/* For task-bound events we may be called on other CPUs */
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -785,7 +787,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index a738aeab5c04..358e4e284a62 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
void hisi_uncore_pmu_enable(struct pmu *pmu)
{
struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
- int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
+ bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask,
hisi_pmu->num_counters);
if (!enabled)
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
new file mode 100644
index 000000000000..665b382a0ee3
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -0,0 +1,758 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/hrtimer.h>
+
+/* Performance Counters Operating Mode Control Registers */
+#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020
+#define OP_MODE_CTRL_VAL_MANNUAL 0x1
+
+/* Performance Counters Start Operation Control Registers */
+#define DDRC_PERF_CNT_START_OP_CTRL 0x8028
+#define START_OP_CTRL_VAL_START 0x1ULL
+#define START_OP_CTRL_VAL_ACTIVE 0x2
+
+/* Performance Counters End Operation Control Registers */
+#define DDRC_PERF_CNT_END_OP_CTRL 0x8030
+#define END_OP_CTRL_VAL_END 0x1ULL
+
+/* Performance Counters End Status Registers */
+#define DDRC_PERF_CNT_END_STATUS 0x8038
+#define END_STATUS_VAL_END_TIMER_MODE_END 0x1
+
+/* Performance Counters Configuration Registers */
+#define DDRC_PERF_CFG_BASE 0x8040
+
+/* 8 Generic event counter + 2 fixed event counters */
+#define DDRC_PERF_NUM_GEN_COUNTERS 8
+#define DDRC_PERF_NUM_FIX_COUNTERS 2
+#define DDRC_PERF_READ_COUNTER_IDX DDRC_PERF_NUM_GEN_COUNTERS
+#define DDRC_PERF_WRITE_COUNTER_IDX (DDRC_PERF_NUM_GEN_COUNTERS + 1)
+#define DDRC_PERF_NUM_COUNTERS (DDRC_PERF_NUM_GEN_COUNTERS + \
+ DDRC_PERF_NUM_FIX_COUNTERS)
+
+/* Generic event counter registers */
+#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n))
+#define EVENT_ENABLE BIT_ULL(63)
+
+/* Two dedicated event counters for DDR reads and writes */
+#define EVENT_DDR_READS 101
+#define EVENT_DDR_WRITES 100
+
+/*
+ * programmable events IDs in programmable event counters.
+ * DO NOT change these event-id numbers, they are used to
+ * program event bitmap in h/w.
+ */
+#define EVENT_OP_IS_ZQLATCH 55
+#define EVENT_OP_IS_ZQSTART 54
+#define EVENT_OP_IS_TCR_MRR 53
+#define EVENT_OP_IS_DQSOSC_MRR 52
+#define EVENT_OP_IS_DQSOSC_MPC 51
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR 50
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD 49
+#define EVENT_BSM_STARVATION 48
+#define EVENT_BSM_ALLOC 47
+#define EVENT_LPR_REQ_WITH_NOCREDIT 46
+#define EVENT_HPR_REQ_WITH_NOCREDIT 45
+#define EVENT_OP_IS_ZQCS 44
+#define EVENT_OP_IS_ZQCL 43
+#define EVENT_OP_IS_LOAD_MODE 42
+#define EVENT_OP_IS_SPEC_REF 41
+#define EVENT_OP_IS_CRIT_REF 40
+#define EVENT_OP_IS_REFRESH 39
+#define EVENT_OP_IS_ENTER_MPSM 35
+#define EVENT_OP_IS_ENTER_POWERDOWN 31
+#define EVENT_OP_IS_ENTER_SELFREF 27
+#define EVENT_WAW_HAZARD 26
+#define EVENT_RAW_HAZARD 25
+#define EVENT_WAR_HAZARD 24
+#define EVENT_WRITE_COMBINE 23
+#define EVENT_RDWR_TRANSITIONS 22
+#define EVENT_PRECHARGE_FOR_OTHER 21
+#define EVENT_PRECHARGE_FOR_RDWR 20
+#define EVENT_OP_IS_PRECHARGE 19
+#define EVENT_OP_IS_MWR 18
+#define EVENT_OP_IS_WR 17
+#define EVENT_OP_IS_RD 16
+#define EVENT_OP_IS_RD_ACTIVATE 15
+#define EVENT_OP_IS_RD_OR_WR 14
+#define EVENT_OP_IS_ACTIVATE 13
+#define EVENT_WR_XACT_WHEN_CRITICAL 12
+#define EVENT_LPR_XACT_WHEN_CRITICAL 11
+#define EVENT_HPR_XACT_WHEN_CRITICAL 10
+#define EVENT_DFI_RD_DATA_CYCLES 9
+#define EVENT_DFI_WR_DATA_CYCLES 8
+#define EVENT_ACT_BYPASS 7
+#define EVENT_READ_BYPASS 6
+#define EVENT_HIF_HI_PRI_RD 5
+#define EVENT_HIF_RMW 4
+#define EVENT_HIF_RD 3
+#define EVENT_HIF_WR 2
+#define EVENT_HIF_RD_OR_WR 1
+
+/* Event counter value registers */
+#define DDRC_PERF_CNT_VALUE_BASE 0x8080
+#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+
+/* Fixed event counter enable/disable register */
+#define DDRC_PERF_CNT_FREERUN_EN 0x80C0
+#define DDRC_PERF_FREERUN_WRITE_EN 0x1
+#define DDRC_PERF_FREERUN_READ_EN 0x2
+
+/* Fixed event counter control register */
+#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8
+#define DDRC_FREERUN_WRITE_CNT_CLR 0x1
+#define DDRC_FREERUN_READ_CNT_CLR 0x2
+
+/* Fixed event counter value register */
+#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0
+#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8
+#define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48)
+#define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0)
+
+struct cn10k_ddr_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct device *dev;
+ int active_events;
+ struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
+ struct hrtimer hrtimer;
+ struct hlist_node node;
+};
+
+#define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu)
+
+static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+
+}
+
+#define CN10K_DDR_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id)
+
+static struct attribute *cn10k_ddr_perf_events_attrs[] = {
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+ EVENT_HPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+ EVENT_LPR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+ EVENT_WR_XACT_WHEN_CRITICAL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+ EVENT_PRECHARGE_FOR_OTHER),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit,
+ EVENT_HPR_REQ_WITH_NOCREDIT),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit,
+ EVENT_LPR_REQ_WITH_NOCREDIT),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+ EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+ /* Free run event counters */
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+ CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+ NULL
+};
+
+static struct attribute_group cn10k_ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = cn10k_ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-8");
+
+static struct attribute *cn10k_ddr_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_format_attr_group = {
+ .name = "format",
+ .attrs = cn10k_ddr_perf_format_attrs,
+};
+
+static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute cn10k_ddr_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = {
+ &cn10k_ddr_perf_cpumask_attr.attr,
+ NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = {
+ .attrs = cn10k_ddr_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *cn10k_attr_groups[] = {
+ &cn10k_ddr_perf_events_attr_group,
+ &cn10k_ddr_perf_format_attr_group,
+ &cn10k_ddr_perf_cpumask_attr_group,
+ NULL,
+};
+
+/* Default poll timeout is 100 sec, which is very sufficient for
+ * 48 bit counter incremented max at 5.6 GT/s, which may take many
+ * hours to overflow.
+ */
+static unsigned long cn10k_ddr_pmu_poll_period_sec = 100;
+module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644);
+
+static ktime_t cn10k_ddr_pmu_timer_period(void)
+{
+ return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
+}
+
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+{
+ switch (eventid) {
+ case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
+ case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
+ *event_bitmap = (1ULL << (eventid - 1));
+ break;
+ case EVENT_OP_IS_ENTER_SELFREF:
+ case EVENT_OP_IS_ENTER_POWERDOWN:
+ case EVENT_OP_IS_ENTER_MPSM:
+ *event_bitmap = (0xFULL << (eventid - 1));
+ break;
+ default:
+ pr_err("%s Invalid eventid %d\n", __func__, eventid);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
+ struct perf_event *event)
+{
+ u8 config = event->attr.config;
+ int i;
+
+ /* DDR read free-run counter index */
+ if (config == EVENT_DDR_READS) {
+ pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event;
+ return DDRC_PERF_READ_COUNTER_IDX;
+ }
+
+ /* DDR write free-run counter index */
+ if (config == EVENT_DDR_WRITES) {
+ pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event;
+ return DDRC_PERF_WRITE_COUNTER_IDX;
+ }
+
+ /* Allocate DDR generic counters */
+ for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+ if (pmu->events[i] == NULL) {
+ pmu->events[i] = event;
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+ pmu->events[counter] = NULL;
+}
+
+static int cn10k_ddr_perf_event_init(struct perf_event *event)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event)) {
+ dev_info(pmu->dev, "Sampling not supported!\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (event->cpu < 0) {
+ dev_warn(pmu->dev, "Can't provide per-task data!\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* We must NOT create groups containing mixed PMUs */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ /* Set ownership of event to one CPU, same event can not be observed
+ * on multiple cpus at same time.
+ */
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+ return 0;
+}
+
+static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
+ int counter, bool enable)
+{
+ u32 reg;
+ u64 val;
+
+ if (counter > DDRC_PERF_NUM_COUNTERS) {
+ pr_err("Error: unsupported counter %d\n", counter);
+ return;
+ }
+
+ if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+ reg = DDRC_PERF_CFG(counter);
+ val = readq_relaxed(pmu->base + reg);
+
+ if (enable)
+ val |= EVENT_ENABLE;
+ else
+ val &= ~EVENT_ENABLE;
+
+ writeq_relaxed(val, pmu->base + reg);
+ } else {
+ val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+ if (enable) {
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ val |= DDRC_PERF_FREERUN_READ_EN;
+ else
+ val |= DDRC_PERF_FREERUN_WRITE_EN;
+ } else {
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ val &= ~DDRC_PERF_FREERUN_READ_EN;
+ else
+ val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+ }
+ writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+ }
+}
+
+static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+ u64 val;
+
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+
+ if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
+ return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+
+ val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+ return val;
+}
+
+static void cn10k_ddr_perf_event_update(struct perf_event *event)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_count, new_count, mask;
+
+ do {
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+ } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+ mask = DDRC_PERF_CNT_MAX_VALUE;
+
+ local64_add((new_count - prev_count) & mask, &event->count);
+}
+
+static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ local64_set(&hwc->prev_count, 0);
+
+ cn10k_ddr_perf_counter_enable(pmu, counter, true);
+
+ hwc->state = 0;
+}
+
+static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u8 config = event->attr.config;
+ int counter, ret;
+ u32 reg_offset;
+ u64 val;
+
+ counter = cn10k_ddr_perf_alloc_counter(pmu, event);
+ if (counter < 0)
+ return -EAGAIN;
+
+ pmu->active_events++;
+ hwc->idx = counter;
+
+ if (pmu->active_events == 1)
+ hrtimer_start(&pmu->hrtimer, cn10k_ddr_pmu_timer_period(),
+ HRTIMER_MODE_REL_PINNED);
+
+ if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+ /* Generic counters, configure event id */
+ reg_offset = DDRC_PERF_CFG(counter);
+ ret = ddr_perf_get_event_bitmap(config, &val);
+ if (ret)
+ return ret;
+
+ writeq_relaxed(val, pmu->base + reg_offset);
+ } else {
+ /* fixed event counter, clear counter value */
+ if (counter == DDRC_PERF_READ_COUNTER_IDX)
+ val = DDRC_FREERUN_READ_CNT_CLR;
+ else
+ val = DDRC_FREERUN_WRITE_CNT_CLR;
+
+ writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+ }
+
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ cn10k_ddr_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ cn10k_ddr_perf_counter_enable(pmu, counter, false);
+
+ if (flags & PERF_EF_UPDATE)
+ cn10k_ddr_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+ struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+ cn10k_ddr_perf_free_counter(pmu, counter);
+ pmu->active_events--;
+ hwc->idx = -1;
+
+ /* Cancel timer when no events to capture */
+ if (pmu->active_events == 0)
+ hrtimer_cancel(&pmu->hrtimer);
+}
+
+static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
+{
+ struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+ writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+ DDRC_PERF_CNT_START_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
+{
+ struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+ writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+ DDRC_PERF_CNT_END_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
+{
+ struct hw_perf_event *hwc;
+ int i;
+
+ for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+ if (pmu->events[i] == NULL)
+ continue;
+
+ cn10k_ddr_perf_event_update(pmu->events[i]);
+ }
+
+ /* Reset previous count as h/w counter are reset */
+ for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+ if (pmu->events[i] == NULL)
+ continue;
+
+ hwc = &pmu->events[i]->hw;
+ local64_set(&hwc->prev_count, 0);
+ }
+}
+
+static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
+{
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ u64 prev_count, new_count;
+ u64 value;
+ int i;
+
+ event = pmu->events[DDRC_PERF_READ_COUNTER_IDX];
+ if (event) {
+ hwc = &event->hw;
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+ /* Overflow condition is when new count less than
+ * previous count
+ */
+ if (new_count < prev_count)
+ cn10k_ddr_perf_event_update(event);
+ }
+
+ event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX];
+ if (event) {
+ hwc = &event->hw;
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+ /* Overflow condition is when new count less than
+ * previous count
+ */
+ if (new_count < prev_count)
+ cn10k_ddr_perf_event_update(event);
+ }
+
+ for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+ if (pmu->events[i] == NULL)
+ continue;
+
+ value = cn10k_ddr_perf_read_counter(pmu, i);
+ if (value == DDRC_PERF_CNT_MAX_VALUE) {
+ pr_info("Counter-(%d) reached max value\n", i);
+ cn10k_ddr_perf_event_update_all(pmu);
+ cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+ cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer)
+{
+ struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu,
+ hrtimer);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cn10k_ddr_pmu_overflow_handler(pmu);
+ local_irq_restore(flags);
+
+ hrtimer_forward_now(hrtimer, cn10k_ddr_pmu_timer_period());
+ return HRTIMER_RESTART;
+}
+
+static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu,
+ node);
+ unsigned int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+ return 0;
+}
+
+static int cn10k_ddr_perf_probe(struct platform_device *pdev)
+{
+ struct cn10k_ddr_pmu *ddr_pmu;
+ struct resource *res;
+ void __iomem *base;
+ char *name;
+ int ret;
+
+ ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddr_pmu), GFP_KERNEL);
+ if (!ddr_pmu)
+ return -ENOMEM;
+
+ ddr_pmu->dev = &pdev->dev;
+ platform_set_drvdata(pdev, ddr_pmu);
+
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ ddr_pmu->base = base;
+
+ /* Setup the PMU counter to work in manual mode */
+ writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
+ DDRC_PERF_CNT_OP_MODE_CTRL);
+
+ ddr_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = cn10k_attr_groups,
+ .event_init = cn10k_ddr_perf_event_init,
+ .add = cn10k_ddr_perf_event_add,
+ .del = cn10k_ddr_perf_event_del,
+ .start = cn10k_ddr_perf_event_start,
+ .stop = cn10k_ddr_perf_event_stop,
+ .read = cn10k_ddr_perf_event_update,
+ .pmu_enable = cn10k_ddr_perf_pmu_enable,
+ .pmu_disable = cn10k_ddr_perf_pmu_disable,
+ };
+
+ /* Choose this cpu to collect perf data */
+ ddr_pmu->cpu = raw_smp_processor_id();
+
+ name = devm_kasprintf(ddr_pmu->dev, GFP_KERNEL, "mrvl_ddr_pmu_%llx",
+ res->start);
+ if (!name)
+ return -ENOMEM;
+
+ hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
+
+ cpuhp_state_add_instance_nocalls(
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ &ddr_pmu->node);
+
+ ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
+ if (ret)
+ goto error;
+
+ pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+ return 0;
+error:
+ cpuhp_state_remove_instance_nocalls(
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ &ddr_pmu->node);
+ return ret;
+}
+
+static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+{
+ struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ &ddr_pmu->node);
+
+ perf_pmu_unregister(&ddr_pmu->pmu);
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
+ { .compatible = "marvell,cn10k-ddr-pmu", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
+#endif
+
+static struct platform_driver cn10k_ddr_pmu_driver = {
+ .driver = {
+ .name = "cn10k-ddr-pmu",
+ .of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = cn10k_ddr_perf_probe,
+ .remove = cn10k_ddr_perf_remove,
+};
+
+static int __init cn10k_ddr_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ "perf/marvell/cn10k/ddr:online", NULL,
+ cn10k_ddr_pmu_offline_cpu);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&cn10k_ddr_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+ return ret;
+}
+
+static void __exit cn10k_ddr_pmu_exit(void)
+{
+ platform_driver_unregister(&cn10k_ddr_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+}
+
+module_init(cn10k_ddr_pmu_init);
+module_exit(cn10k_ddr_pmu_exit);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 7f4d292658e3..ee67305f822d 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -368,10 +368,12 @@ static int tad_pmu_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_OF
static const struct of_device_id tad_pmu_of_match[] = {
{ .compatible = "marvell,cn10k-tad-pmu", },
{},
};
+#endif
static struct platform_driver tad_pmu_driver = {
.driver = {
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 05378c0fd8f3..1edb9c03704f 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -887,13 +887,11 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level,
void *data, void **return_value)
{
+ struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
struct tx2_uncore_pmu *tx2_pmu;
- struct acpi_device *adev;
enum tx2_uncore_type type;
- if (acpi_bus_get_device(handle, &adev))
- return AE_OK;
- if (acpi_bus_get_status(adev) || !adev->status.present)
+ if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
return AE_OK;
type = get_tx2_pmu_type(adev);
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 2b6d476bd213..0c32dffc7ede 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -867,7 +867,7 @@ static void xgene_perf_pmu_enable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
- int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
+ bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask,
pmu_dev->max_counters);
if (!enabled)
@@ -1549,14 +1549,12 @@ static const struct acpi_device_id *xgene_pmu_acpi_match_type(
static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
void *data, void **return_value)
{
+ struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
const struct acpi_device_id *acpi_id;
struct xgene_pmu *xgene_pmu = data;
struct xgene_pmu_dev_ctx *ctx;
- struct acpi_device *adev;
- if (acpi_bus_get_device(handle, &adev))
- return AE_OK;
- if (acpi_bus_get_status(adev) || !adev->status.present)
+ if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
return AE_OK;
acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
index cd2332bf0e31..fdbd64c03e12 100644
--- a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
+++ b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
@@ -9,6 +9,7 @@
#include <linux/bitfield.h>
#include <linux/bitops.h>
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/io.h>
@@ -250,7 +251,7 @@ static int phy_meson_axg_mipi_dphy_power_on(struct phy *phy)
(DIV_ROUND_UP(priv->config.clk_zero, temp) << 16) |
(DIV_ROUND_UP(priv->config.clk_prepare, temp) << 24));
regmap_write(priv->regmap, MIPI_DSI_CLK_TIM1,
- DIV_ROUND_UP(priv->config.clk_pre, temp));
+ DIV_ROUND_UP(priv->config.clk_pre, BITS_PER_BYTE));
regmap_write(priv->regmap, MIPI_DSI_HS_TIM,
DIV_ROUND_UP(priv->config.hs_exit, temp) |
diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
index f81e23742079..849c4204f550 100644
--- a/drivers/phy/broadcom/Kconfig
+++ b/drivers/phy/broadcom/Kconfig
@@ -97,8 +97,7 @@ config PHY_BRCM_USB
depends on OF
select GENERIC_PHY
select SOC_BRCMSTB if ARCH_BRCMSTB
- default ARCH_BCM4908
- default ARCH_BRCMSTB
+ default ARCH_BCM4908 || ARCH_BRCMSTB
help
Enable this to support the Broadcom STB USB PHY.
This driver is required by the USB XHCI, EHCI and OHCI
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index 116fb23aebd9..0f1deb6e0eab 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -18,6 +18,7 @@
#include <linux/soc/brcmstb/brcmstb.h>
#include <dt-bindings/phy/phy.h>
#include <linux/mfd/syscon.h>
+#include <linux/suspend.h>
#include "phy-brcm-usb-init.h"
@@ -70,12 +71,35 @@ struct brcm_usb_phy_data {
int init_count;
int wake_irq;
struct brcm_usb_phy phys[BRCM_USB_PHY_ID_MAX];
+ struct notifier_block pm_notifier;
+ bool pm_active;
};
static s8 *node_reg_names[BRCM_REGS_MAX] = {
"crtl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec"
};
+static int brcm_pm_notifier(struct notifier_block *notifier,
+ unsigned long pm_event,
+ void *unused)
+{
+ struct brcm_usb_phy_data *priv =
+ container_of(notifier, struct brcm_usb_phy_data, pm_notifier);
+
+ switch (pm_event) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ priv->pm_active = true;
+ break;
+ case PM_POST_RESTORE:
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ priv->pm_active = false;
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id)
{
struct phy *gphy = dev_id;
@@ -91,6 +115,9 @@ static int brcm_usb_phy_init(struct phy *gphy)
struct brcm_usb_phy_data *priv =
container_of(phy, struct brcm_usb_phy_data, phys[phy->id]);
+ if (priv->pm_active)
+ return 0;
+
/*
* Use a lock to make sure a second caller waits until
* the base phy is inited before using it.
@@ -120,6 +147,9 @@ static int brcm_usb_phy_exit(struct phy *gphy)
struct brcm_usb_phy_data *priv =
container_of(phy, struct brcm_usb_phy_data, phys[phy->id]);
+ if (priv->pm_active)
+ return 0;
+
dev_dbg(&gphy->dev, "EXIT\n");
if (phy->id == BRCM_USB_PHY_2_0)
brcm_usb_uninit_eohci(&priv->ini);
@@ -488,6 +518,9 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
if (err)
return err;
+ priv->pm_notifier.notifier_call = brcm_pm_notifier;
+ register_pm_notifier(&priv->pm_notifier);
+
mutex_init(&priv->mutex);
/* make sure invert settings are correct */
@@ -528,7 +561,10 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
static int brcm_usb_phy_remove(struct platform_device *pdev)
{
+ struct brcm_usb_phy_data *priv = dev_get_drvdata(&pdev->dev);
+
sysfs_remove_group(&pdev->dev.kobj, &brcm_usb_phy_group);
+ unregister_pm_notifier(&priv->pm_notifier);
return 0;
}
@@ -539,6 +575,7 @@ static int brcm_usb_phy_suspend(struct device *dev)
struct brcm_usb_phy_data *priv = dev_get_drvdata(dev);
if (priv->init_count) {
+ dev_dbg(dev, "SUSPEND\n");
priv->ini.wake_enabled = device_may_wakeup(dev);
if (priv->phys[BRCM_USB_PHY_3_0].inited)
brcm_usb_uninit_xhci(&priv->ini);
@@ -578,6 +615,7 @@ static int brcm_usb_phy_resume(struct device *dev)
* Uninitialize anything that wasn't previously initialized.
*/
if (priv->init_count) {
+ dev_dbg(dev, "RESUME\n");
if (priv->wake_irq >= 0)
disable_irq_wake(priv->wake_irq);
brcm_usb_init_common(&priv->ini);
diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c
index da24acd26666..e265647e29a2 100644
--- a/drivers/phy/cadence/phy-cadence-sierra.c
+++ b/drivers/phy/cadence/phy-cadence-sierra.c
@@ -1338,7 +1338,7 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
const struct cdns_sierra_data *data;
unsigned int id_value;
- int i, ret, node = 0;
+ int ret, node = 0;
void __iomem *base;
struct device_node *dn = dev->of_node, *child;
@@ -1416,7 +1416,8 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
dev_err(dev, "failed to get reset %s\n",
child->full_name);
ret = PTR_ERR(sp->phys[node].lnk_rst);
- goto put_child2;
+ of_node_put(child);
+ goto put_control;
}
if (!sp->autoconf) {
@@ -1424,7 +1425,9 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
if (ret) {
dev_err(dev, "missing property in node %s\n",
child->name);
- goto put_child;
+ of_node_put(child);
+ reset_control_put(sp->phys[node].lnk_rst);
+ goto put_control;
}
}
@@ -1434,7 +1437,9 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
if (IS_ERR(gphy)) {
ret = PTR_ERR(gphy);
- goto put_child;
+ of_node_put(child);
+ reset_control_put(sp->phys[node].lnk_rst);
+ goto put_control;
}
sp->phys[node].phy = gphy;
phy_set_drvdata(gphy, &sp->phys[node]);
@@ -1446,26 +1451,28 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
if (sp->num_lanes > SIERRA_MAX_LANES) {
ret = -EINVAL;
dev_err(dev, "Invalid lane configuration\n");
- goto put_child2;
+ goto put_control;
}
/* If more than one subnode, configure the PHY as multilink */
if (!sp->autoconf && sp->nsubnodes > 1) {
ret = cdns_sierra_phy_configure_multilink(sp);
if (ret)
- goto put_child2;
+ goto put_control;
}
pm_runtime_enable(dev);
phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
- return PTR_ERR_OR_ZERO(phy_provider);
-
-put_child:
- node++;
-put_child2:
- for (i = 0; i < node; i++)
- reset_control_put(sp->phys[i].lnk_rst);
- of_node_put(child);
+ if (IS_ERR(phy_provider)) {
+ ret = PTR_ERR(phy_provider);
+ goto put_control;
+ }
+
+ return 0;
+
+put_control:
+ while (--node >= 0)
+ reset_control_put(sp->phys[node].lnk_rst);
clk_disable:
cdns_sierra_phy_disable_clocks(sp);
reset_control_assert(sp->apb_rst);
diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c
index 6d307102f4f6..8ee7682b8e93 100644
--- a/drivers/phy/mediatek/phy-mtk-tphy.c
+++ b/drivers/phy/mediatek/phy-mtk-tphy.c
@@ -992,7 +992,7 @@ static int phy_efuse_get(struct mtk_tphy *tphy, struct mtk_phy_instance *instanc
/* no efuse, ignore it */
if (!instance->efuse_intr &&
!instance->efuse_rx_imp &&
- !instance->efuse_rx_imp) {
+ !instance->efuse_tx_imp) {
dev_warn(dev, "no u3 intr efuse, but dts enable it\n");
instance->efuse_sw_en = 0;
break;
diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c
index 288c9c67aa74..ccb4045685cd 100644
--- a/drivers/phy/phy-core-mipi-dphy.c
+++ b/drivers/phy/phy-core-mipi-dphy.c
@@ -36,7 +36,7 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock,
cfg->clk_miss = 0;
cfg->clk_post = 60000 + 52 * ui;
- cfg->clk_pre = 8000;
+ cfg->clk_pre = 8;
cfg->clk_prepare = 38000;
cfg->clk_settle = 95000;
cfg->clk_term_en = 0;
@@ -97,7 +97,7 @@ int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg)
if (cfg->clk_post < (60000 + 52 * ui))
return -EINVAL;
- if (cfg->clk_pre < 8000)
+ if (cfg->clk_pre < 8)
return -EINVAL;
if (cfg->clk_prepare < 38000 || cfg->clk_prepare > 95000)
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
index 347dc79a18c1..630e01b5c19b 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
@@ -5,6 +5,7 @@
* Author: Wyon Bi <bivvy.bi@rock-chips.com>
*/
+#include <linux/bits.h>
#include <linux/kernel.h>
#include <linux/clk.h>
#include <linux/iopoll.h>
@@ -364,7 +365,7 @@ static void inno_dsidphy_mipi_mode_enable(struct inno_dsidphy *inno)
* The value of counter for HS Tclk-pre
* Tclk-pre = Tpin_txbyteclkhs * value
*/
- clk_pre = DIV_ROUND_UP(cfg->clk_pre, t_txbyteclkhs);
+ clk_pre = DIV_ROUND_UP(cfg->clk_pre, BITS_PER_BYTE);
/*
* The value of counter for HS Tlpx Time
diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index 2ce9bfd783d4..007a23c78d56 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -304,7 +304,7 @@ static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc)
ret = __stm32_usbphyc_pll_disable(usbphyc);
if (ret)
- return ret;
+ goto dec_n_pll_cons;
}
ret = stm32_usbphyc_regulators_enable(usbphyc);
diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
index b3384c31637a..da546c35d1d5 100644
--- a/drivers/phy/ti/phy-j721e-wiz.c
+++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -233,6 +233,7 @@ static const struct clk_div_table clk_div_table[] = {
{ .val = 1, .div = 2, },
{ .val = 2, .div = 4, },
{ .val = 3, .div = 8, },
+ { /* sentinel */ },
};
static const struct wiz_clk_div_sel clk_div_sel[] = {
diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c
index f478d8a17115..9be9535ad7ab 100644
--- a/drivers/phy/xilinx/phy-zynqmp.c
+++ b/drivers/phy/xilinx/phy-zynqmp.c
@@ -134,7 +134,8 @@
#define PROT_BUS_WIDTH_10 0x0
#define PROT_BUS_WIDTH_20 0x1
#define PROT_BUS_WIDTH_40 0x2
-#define PROT_BUS_WIDTH_SHIFT 2
+#define PROT_BUS_WIDTH_SHIFT(n) ((n) * 2)
+#define PROT_BUS_WIDTH_MASK(n) GENMASK((n) * 2 + 1, (n) * 2)
/* Number of GT lanes */
#define NUM_LANES 4
@@ -445,12 +446,12 @@ static void xpsgtr_phy_init_sata(struct xpsgtr_phy *gtr_phy)
static void xpsgtr_phy_init_sgmii(struct xpsgtr_phy *gtr_phy)
{
struct xpsgtr_dev *gtr_dev = gtr_phy->dev;
+ u32 mask = PROT_BUS_WIDTH_MASK(gtr_phy->lane);
+ u32 val = PROT_BUS_WIDTH_10 << PROT_BUS_WIDTH_SHIFT(gtr_phy->lane);
/* Set SGMII protocol TX and RX bus width to 10 bits. */
- xpsgtr_write(gtr_dev, TX_PROT_BUS_WIDTH,
- PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT));
- xpsgtr_write(gtr_dev, RX_PROT_BUS_WIDTH,
- PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT));
+ xpsgtr_clr_set(gtr_dev, TX_PROT_BUS_WIDTH, mask, val);
+ xpsgtr_clr_set(gtr_dev, RX_PROT_BUS_WIDTH, mask, val);
xpsgtr_bypass_scrambler_8b10b(gtr_phy);
}
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 08c364d611f5..f64d29f614ec 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -42,9 +42,9 @@ obj-$(CONFIG_PINCTRL_PISTACHIO) += pinctrl-pistachio.o
obj-$(CONFIG_PINCTRL_RK805) += pinctrl-rk805.o
obj-$(CONFIG_PINCTRL_ROCKCHIP) += pinctrl-rockchip.o
obj-$(CONFIG_PINCTRL_SINGLE) += pinctrl-single.o
+obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o
obj-$(CONFIG_PINCTRL_STARFIVE) += pinctrl-starfive.o
obj-$(CONFIG_PINCTRL_STMFX) += pinctrl-stmfx.o
-obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o
obj-$(CONFIG_PINCTRL_SX150X) += pinctrl-sx150x.o
obj-$(CONFIG_PINCTRL_TB10X) += pinctrl-tb10x.o
obj-$(CONFIG_PINCTRL_THUNDERBAY) += pinctrl-thunderbay.o
diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig
index 5123f4c33854..ac1e400bbbac 100644
--- a/drivers/pinctrl/bcm/Kconfig
+++ b/drivers/pinctrl/bcm/Kconfig
@@ -35,6 +35,7 @@ config PINCTRL_BCM63XX
select PINCONF
select GENERIC_PINCONF
select GPIOLIB
+ select REGMAP
select GPIO_REGMAP
config PINCTRL_BCM6318
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index c4ebfa852b42..47e433e09c5c 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -1269,16 +1269,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
sizeof(*girq->parents),
GFP_KERNEL);
if (!girq->parents) {
- pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out_remove;
}
if (is_7211) {
pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS,
sizeof(*pc->wake_irq),
GFP_KERNEL);
- if (!pc->wake_irq)
- return -ENOMEM;
+ if (!pc->wake_irq) {
+ err = -ENOMEM;
+ goto out_remove;
+ }
}
/*
@@ -1306,8 +1308,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
len = strlen(dev_name(pc->dev)) + 16;
name = devm_kzalloc(pc->dev, len, GFP_KERNEL);
- if (!name)
- return -ENOMEM;
+ if (!name) {
+ err = -ENOMEM;
+ goto out_remove;
+ }
snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i);
@@ -1326,11 +1330,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
err = gpiochip_add_data(&pc->gpio_chip, pc);
if (err) {
dev_err(dev, "could not add GPIO chip\n");
- pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range);
- return err;
+ goto out_remove;
}
return 0;
+
+out_remove:
+ pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range);
+ return err;
}
static struct platform_driver bcm2835_pinctrl_driver = {
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index abffda1fd51e..1d5818269076 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1471,8 +1471,9 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
offset = cctx->intr_lines[intr_line];
if (offset == CHV_INVALID_HWIRQ) {
- dev_err(dev, "interrupt on unused interrupt line %u\n", intr_line);
- continue;
+ dev_warn_once(dev, "interrupt on unmapped interrupt line %u\n", intr_line);
+ /* Some boards expect hwirq 0 to trigger in this case */
+ offset = 0;
}
generic_handle_domain_irq(gc->irq.domain, offset);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 85750974d182..826d494f3cc6 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -451,8 +451,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
value &= ~PADCFG0_PMODE_MASK;
value |= PADCFG0_PMODE_GPIO;
- /* Disable input and output buffers */
- value |= PADCFG0_GPIORXDIS;
+ /* Disable TX buffer and enable RX (this will be input) */
+ value &= ~PADCFG0_GPIORXDIS;
value |= PADCFG0_GPIOTXDIS;
/* Disable SCI/SMI/NMI generation */
@@ -497,9 +497,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev,
intel_gpio_set_gpio_mode(padcfg0);
- /* Disable TX buffer and enable RX (this will be input) */
- __intel_gpio_set_direction(padcfg0, true);
-
raw_spin_unlock_irqrestore(&pctrl->lock, flags);
return 0;
@@ -1115,9 +1112,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type)
intel_gpio_set_gpio_mode(reg);
- /* Disable TX buffer and enable RX (this will be input) */
- __intel_gpio_set_direction(reg, true);
-
value = readl(reg);
value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV);
@@ -1216,6 +1210,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
return IRQ_RETVAL(ret);
}
+static void intel_gpio_irq_init(struct intel_pinctrl *pctrl)
+{
+ int i;
+
+ for (i = 0; i < pctrl->ncommunities; i++) {
+ const struct intel_community *community;
+ void __iomem *base;
+ unsigned int gpp;
+
+ community = &pctrl->communities[i];
+ base = community->regs;
+
+ for (gpp = 0; gpp < community->ngpps; gpp++) {
+ /* Mask and clear all interrupts */
+ writel(0, base + community->ie_offset + gpp * 4);
+ writel(0xffff, base + community->is_offset + gpp * 4);
+ }
+ }
+}
+
+static int intel_gpio_irq_init_hw(struct gpio_chip *gc)
+{
+ struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
+
+ /*
+ * Make sure the interrupt lines are in a proper state before
+ * further configuration.
+ */
+ intel_gpio_irq_init(pctrl);
+
+ return 0;
+}
+
static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl,
const struct intel_community *community)
{
@@ -1320,6 +1347,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
girq->num_parents = 0;
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
+ girq->init_hw = intel_gpio_irq_init_hw;
ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
if (ret) {
@@ -1695,26 +1723,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev)
}
EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq);
-static void intel_gpio_irq_init(struct intel_pinctrl *pctrl)
-{
- size_t i;
-
- for (i = 0; i < pctrl->ncommunities; i++) {
- const struct intel_community *community;
- void __iomem *base;
- unsigned int gpp;
-
- community = &pctrl->communities[i];
- base = community->regs;
-
- for (gpp = 0; gpp < community->ngpps; gpp++) {
- /* Mask and clear all interrupts */
- writel(0, base + community->ie_offset + gpp * 4);
- writel(0xffff, base + community->is_offset + gpp * 4);
- }
- }
-}
-
static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value)
{
u32 curr, updated;
diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c
index 0bcd19597e4a..3ddaeffc0415 100644
--- a/drivers/pinctrl/intel/pinctrl-tigerlake.c
+++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c
@@ -749,7 +749,6 @@ static const struct acpi_device_id tgl_pinctrl_acpi_match[] = {
{ "INT34C5", (kernel_ulong_t)&tgllp_soc_data },
{ "INT34C6", (kernel_ulong_t)&tglh_soc_data },
{ "INTC1055", (kernel_ulong_t)&tgllp_soc_data },
- { "INTC1057", (kernel_ulong_t)&tgllp_soc_data },
{ }
};
MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match);
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
index 4d81908d6725..ba536fd4d674 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
@@ -78,7 +78,6 @@ struct npcm7xx_gpio {
struct gpio_chip gc;
int irqbase;
int irq;
- void *priv;
struct irq_chip irq_chip;
u32 pinctrl_id;
int (*direction_input)(struct gpio_chip *chip, unsigned offset);
@@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc)
chained_irq_enter(chip, desc);
sts = ioread32(bank->base + NPCM7XX_GP_N_EVST);
en = ioread32(bank->base + NPCM7XX_GP_N_EVEN);
- dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts,
+ dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts,
en);
sts &= en;
@@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type)
gpiochip_get_data(irq_data_get_irq_chip_data(d));
unsigned int gpio = BIT(d->hwirq);
- dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio,
+ dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio,
d->irq, type);
switch (type) {
case IRQ_TYPE_EDGE_RISING:
- dev_dbg(d->chip->parent_device, "edge.rising\n");
+ dev_dbg(bank->gc.parent, "edge.rising\n");
npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
break;
case IRQ_TYPE_EDGE_FALLING:
- dev_dbg(d->chip->parent_device, "edge.falling\n");
+ dev_dbg(bank->gc.parent, "edge.falling\n");
npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
break;
case IRQ_TYPE_EDGE_BOTH:
- dev_dbg(d->chip->parent_device, "edge.both\n");
+ dev_dbg(bank->gc.parent, "edge.both\n");
npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
break;
case IRQ_TYPE_LEVEL_LOW:
- dev_dbg(d->chip->parent_device, "level.low\n");
+ dev_dbg(bank->gc.parent, "level.low\n");
npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
break;
case IRQ_TYPE_LEVEL_HIGH:
- dev_dbg(d->chip->parent_device, "level.high\n");
+ dev_dbg(bank->gc.parent, "level.high\n");
npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
break;
default:
- dev_dbg(d->chip->parent_device, "invalid irq type\n");
+ dev_dbg(bank->gc.parent, "invalid irq type\n");
return -EINVAL;
}
@@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d)
gpiochip_get_data(irq_data_get_irq_chip_data(d));
unsigned int gpio = d->hwirq;
- dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq);
+ dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq);
iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST);
}
@@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d)
unsigned int gpio = d->hwirq;
/* Clear events */
- dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq);
+ dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq);
iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC);
}
@@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d)
unsigned int gpio = d->hwirq;
/* Enable events */
- dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq);
+ dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq);
iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS);
}
@@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d)
unsigned int gpio = d->hwirq;
/* active-high, input, clear interrupt, enable interrupt */
- dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq);
+ dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq);
npcmgpio_direction_input(gc, gpio);
npcmgpio_irq_ack(d);
npcmgpio_irq_unmask(d);
diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c
index 49e32684dbb2..ecab6bf63dc6 100644
--- a/drivers/pinctrl/pinctrl-k210.c
+++ b/drivers/pinctrl/pinctrl-k210.c
@@ -482,7 +482,7 @@ static int k210_pinconf_get_drive(unsigned int max_strength_ua)
{
int i;
- for (i = K210_PC_DRIVE_MAX; i; i--) {
+ for (i = K210_PC_DRIVE_MAX; i >= 0; i--) {
if (k210_pinconf_drive_strength[i] <= max_strength_ua)
return i;
}
@@ -527,7 +527,7 @@ static int k210_pinconf_set_param(struct pinctrl_dev *pctldev,
case PIN_CONFIG_BIAS_PULL_UP:
if (!arg)
return -EINVAL;
- val |= K210_PC_PD;
+ val |= K210_PC_PU;
break;
case PIN_CONFIG_DRIVE_STRENGTH:
arg *= 1000;
diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c
index 8e081c90bdb2..639f1130e989 100644
--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
+++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
@@ -137,7 +137,8 @@ static inline int sgpio_addr_to_pin(struct sgpio_priv *priv, int port, int bit)
static inline u32 sgpio_get_addr(struct sgpio_priv *priv, u32 rno, u32 off)
{
- return priv->properties->regoff[rno] + off;
+ return (priv->properties->regoff[rno] + off) *
+ regmap_get_reg_stride(priv->regs);
}
static u32 sgpio_readl(struct sgpio_priv *priv, u32 rno, u32 off)
diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c
index 0b912152a405..ab4b2ee9f217 100644
--- a/drivers/pinctrl/pinctrl-starfive.c
+++ b/drivers/pinctrl/pinctrl-starfive.c
@@ -1164,6 +1164,7 @@ static int starfive_irq_set_type(struct irq_data *d, unsigned int trigger)
}
static struct irq_chip starfive_irq_chip = {
+ .name = "StarFive GPIO",
.irq_ack = starfive_irq_ack,
.irq_mask = starfive_irq_mask,
.irq_mask_ack = starfive_irq_mask_ack,
@@ -1307,9 +1308,6 @@ static int starfive_probe(struct platform_device *pdev)
sfp->gc.base = -1;
sfp->gc.ngpio = NR_GPIOS;
- starfive_irq_chip.parent_device = dev;
- starfive_irq_chip.name = sfp->gc.label;
-
sfp->gc.irq.chip = &starfive_irq_chip;
sfp->gc.irq.parent_handler = starfive_gpio_irq_handler;
sfp->gc.irq.num_parents = 1;
@@ -1330,6 +1328,8 @@ static int starfive_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "could not register gpiochip\n");
+ irq_domain_set_pm_device(sfp->gc.irq.domain, dev);
+
out_pinctrl_enable:
return pinctrl_enable(sfp->pctl);
}
diff --git a/drivers/pinctrl/pinctrl-thunderbay.c b/drivers/pinctrl/pinctrl-thunderbay.c
index b5b47f4dd774..79d44bca039e 100644
--- a/drivers/pinctrl/pinctrl-thunderbay.c
+++ b/drivers/pinctrl/pinctrl-thunderbay.c
@@ -773,63 +773,42 @@ static int thunderbay_build_groups(struct thunderbay_pinctrl *tpc)
static int thunderbay_add_functions(struct thunderbay_pinctrl *tpc, struct function_desc *funcs)
{
- struct function_desc *function = funcs;
int i;
/* Assign the groups for each function */
- for (i = 0; i < tpc->soc->npins; i++) {
- const struct pinctrl_pin_desc *pin_info = thunderbay_pins + i;
- struct thunderbay_mux_desc *pin_mux = pin_info->drv_data;
-
- while (pin_mux->name) {
- const char **grp;
- int j, grp_num, match = 0;
- size_t grp_size;
- struct function_desc *func;
-
- for (j = 0; j < tpc->nfuncs; j++) {
- if (!strcmp(pin_mux->name, function[j].name)) {
- match = 1;
- break;
- }
- }
-
- if (!match)
- return -EINVAL;
-
- func = function + j;
- grp_num = func->num_group_names;
- grp_size = sizeof(*func->group_names);
-
- if (!func->group_names) {
- func->group_names = devm_kcalloc(tpc->dev,
- grp_num,
- grp_size,
- GFP_KERNEL);
- if (!func->group_names) {
- kfree(func);
- return -ENOMEM;
- }
+ for (i = 0; i < tpc->nfuncs; i++) {
+ struct function_desc *func = &funcs[i];
+ const char **group_names;
+ unsigned int grp_idx = 0;
+ int j;
+
+ group_names = devm_kcalloc(tpc->dev, func->num_group_names,
+ sizeof(*group_names), GFP_KERNEL);
+ if (!group_names)
+ return -ENOMEM;
+
+ for (j = 0; j < tpc->soc->npins; j++) {
+ const struct pinctrl_pin_desc *pin_info = &thunderbay_pins[j];
+ struct thunderbay_mux_desc *pin_mux;
+
+ for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) {
+ if (!strcmp(pin_mux->name, func->name))
+ group_names[grp_idx++] = pin_info->name;
}
-
- grp = func->group_names;
- while (*grp)
- grp++;
-
- *grp = pin_info->name;
- pin_mux++;
}
+
+ func->group_names = group_names;
}
/* Add all functions */
for (i = 0; i < tpc->nfuncs; i++) {
pinmux_generic_add_function(tpc->pctrl,
- function[i].name,
- function[i].group_names,
- function[i].num_group_names,
- function[i].data);
+ funcs[i].name,
+ funcs[i].group_names,
+ funcs[i].num_group_names,
+ funcs[i].data);
}
- kfree(function);
+ kfree(funcs);
return 0;
}
@@ -839,27 +818,30 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc)
void *ptr;
int pin;
- /* Total number of functions is unknown at this point. Allocate first. */
+ /*
+ * Allocate maximum possible number of functions. Assume every pin
+ * being part of 8 (hw maximum) globally unique muxes.
+ */
tpc->nfuncs = 0;
thunderbay_funcs = kcalloc(tpc->soc->npins * 8,
sizeof(*thunderbay_funcs), GFP_KERNEL);
if (!thunderbay_funcs)
return -ENOMEM;
- /* Find total number of functions and each's properties */
+ /* Setup 1 function for each unique mux */
for (pin = 0; pin < tpc->soc->npins; pin++) {
const struct pinctrl_pin_desc *pin_info = thunderbay_pins + pin;
- struct thunderbay_mux_desc *pin_mux = pin_info->drv_data;
+ struct thunderbay_mux_desc *pin_mux;
- while (pin_mux->name) {
- struct function_desc *func = thunderbay_funcs;
+ for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) {
+ struct function_desc *func;
- while (func->name) {
+ /* Check if we already have function for this mux */
+ for (func = thunderbay_funcs; func->name; func++) {
if (!strcmp(pin_mux->name, func->name)) {
func->num_group_names++;
break;
}
- func++;
}
if (!func->name) {
@@ -868,8 +850,6 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc)
func->data = (int *)&pin_mux->mode;
tpc->nfuncs++;
}
-
- pin_mux++;
}
}
diff --git a/drivers/pinctrl/pinctrl-zynqmp.c b/drivers/pinctrl/pinctrl-zynqmp.c
index 42da6bd399ee..e14012209992 100644
--- a/drivers/pinctrl/pinctrl-zynqmp.c
+++ b/drivers/pinctrl/pinctrl-zynqmp.c
@@ -809,7 +809,6 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev,
unsigned int *npins)
{
struct pinctrl_pin_desc *pins, *pin;
- char **pin_names;
int ret;
int i;
@@ -821,14 +820,13 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev,
if (!pins)
return -ENOMEM;
- pin_names = devm_kasprintf_strarray(dev, ZYNQMP_PIN_PREFIX, *npins);
- if (IS_ERR(pin_names))
- return PTR_ERR(pin_names);
-
for (i = 0; i < *npins; i++) {
pin = &pins[i];
pin->number = i;
- pin->name = pin_names[i];
+ pin->name = devm_kasprintf(dev, GFP_KERNEL, "%s%d",
+ ZYNQMP_PIN_PREFIX, i);
+ if (!pin->name)
+ return -ENOMEM;
}
*zynqmp_pins = pins;
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
index ce1917e230f4..152b71226a80 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
@@ -363,16 +363,16 @@ static const struct sunxi_desc_pin h616_pins[] = {
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
SUNXI_FUNCTION(0x2, "uart2"), /* CTS */
- SUNXI_FUNCTION(0x3, "i2s3"), /* DO0 */
+ SUNXI_FUNCTION(0x3, "i2s3_dout0"), /* DO0 */
SUNXI_FUNCTION(0x4, "spi1"), /* MISO */
- SUNXI_FUNCTION(0x5, "i2s3"), /* DI1 */
+ SUNXI_FUNCTION(0x5, "i2s3_din1"), /* DI1 */
SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 8)), /* PH_EINT8 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 9),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x3, "i2s3"), /* DI0 */
+ SUNXI_FUNCTION(0x3, "i2s3_din0"), /* DI0 */
SUNXI_FUNCTION(0x4, "spi1"), /* CS1 */
- SUNXI_FUNCTION(0x3, "i2s3"), /* DO1 */
+ SUNXI_FUNCTION(0x5, "i2s3_dout1"), /* DO1 */
SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 9)), /* PH_EINT9 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 10),
SUNXI_FUNCTION(0x0, "gpio_in"),
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 80d6750c74a6..1f401377ff60 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -36,6 +36,13 @@
#include "../core.h"
#include "pinctrl-sunxi.h"
+/*
+ * These lock classes tell lockdep that GPIO IRQs are in a different
+ * category than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key sunxi_pinctrl_irq_lock_class;
+static struct lock_class_key sunxi_pinctrl_irq_request_class;
+
static struct irq_chip sunxi_pinctrl_edge_irq_chip;
static struct irq_chip sunxi_pinctrl_level_irq_chip;
@@ -837,7 +844,8 @@ static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip,
{
struct sunxi_pinctrl *pctl = gpiochip_get_data(chip);
- return sunxi_pmx_gpio_set_direction(pctl->pctl_dev, NULL, offset, true);
+ return sunxi_pmx_gpio_set_direction(pctl->pctl_dev, NULL,
+ chip->base + offset, true);
}
static int sunxi_pinctrl_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -890,7 +898,8 @@ static int sunxi_pinctrl_gpio_direction_output(struct gpio_chip *chip,
struct sunxi_pinctrl *pctl = gpiochip_get_data(chip);
sunxi_pinctrl_gpio_set(chip, offset, value);
- return sunxi_pmx_gpio_set_direction(pctl->pctl_dev, NULL, offset, false);
+ return sunxi_pmx_gpio_set_direction(pctl->pctl_dev, NULL,
+ chip->base + offset, false);
}
static int sunxi_pinctrl_gpio_of_xlate(struct gpio_chip *gc,
@@ -1555,6 +1564,8 @@ int sunxi_pinctrl_init_with_variant(struct platform_device *pdev,
for (i = 0; i < (pctl->desc->irq_banks * IRQ_PER_BANK); i++) {
int irqno = irq_create_mapping(pctl->domain, i);
+ irq_set_lockdep_class(irqno, &sunxi_pinctrl_irq_lock_class,
+ &sunxi_pinctrl_irq_request_class);
irq_set_chip_and_handler(irqno, &sunxi_pinctrl_edge_irq_chip,
handle_edge_irq);
irq_set_chip_data(irqno, pctl);
diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c
index fc5aa1525d13..d49a4efe46c8 100644
--- a/drivers/platform/chrome/cros_ec.c
+++ b/drivers/platform/chrome/cros_ec.c
@@ -302,13 +302,11 @@ EXPORT_SYMBOL(cros_ec_register);
*
* Return: 0 on success or negative error code.
*/
-int cros_ec_unregister(struct cros_ec_device *ec_dev)
+void cros_ec_unregister(struct cros_ec_device *ec_dev)
{
if (ec_dev->pd)
platform_device_unregister(ec_dev->pd);
platform_device_unregister(ec_dev->ec);
-
- return 0;
}
EXPORT_SYMBOL(cros_ec_unregister);
diff --git a/drivers/platform/chrome/cros_ec.h b/drivers/platform/chrome/cros_ec.h
index 78363dcfdf23..bbca0096868a 100644
--- a/drivers/platform/chrome/cros_ec.h
+++ b/drivers/platform/chrome/cros_ec.h
@@ -11,7 +11,7 @@
#include <linux/interrupt.h>
int cros_ec_register(struct cros_ec_device *ec_dev);
-int cros_ec_unregister(struct cros_ec_device *ec_dev);
+void cros_ec_unregister(struct cros_ec_device *ec_dev);
int cros_ec_suspend(struct cros_ec_device *ec_dev);
int cros_ec_resume(struct cros_ec_device *ec_dev);
diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c
index 30c8938c27d5..22feb0fd4ce7 100644
--- a/drivers/platform/chrome/cros_ec_i2c.c
+++ b/drivers/platform/chrome/cros_ec_i2c.c
@@ -313,7 +313,9 @@ static int cros_ec_i2c_remove(struct i2c_client *client)
{
struct cros_ec_device *ec_dev = i2c_get_clientdata(client);
- return cros_ec_unregister(ec_dev);
+ cros_ec_unregister(ec_dev);
+
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index d6306d2a096f..7651417b4a25 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -439,7 +439,9 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
cros_ec_lpc_acpi_notify);
- return cros_ec_unregister(ec_dev);
+ cros_ec_unregister(ec_dev);
+
+ return 0;
}
static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = {
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 14c4046fa04d..8493af0f680e 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -786,11 +786,11 @@ static int cros_ec_spi_probe(struct spi_device *spi)
return 0;
}
-static int cros_ec_spi_remove(struct spi_device *spi)
+static void cros_ec_spi_remove(struct spi_device *spi)
{
struct cros_ec_device *ec_dev = spi_get_drvdata(spi);
- return cros_ec_unregister(ec_dev);
+ cros_ec_unregister(ec_dev);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c
index 0d46706afd2d..4823bd2819f6 100644
--- a/drivers/platform/olpc/olpc-xo175-ec.c
+++ b/drivers/platform/olpc/olpc-xo175-ec.c
@@ -648,7 +648,7 @@ static struct olpc_ec_driver olpc_xo175_ec_driver = {
.ec_cmd = olpc_xo175_ec_cmd,
};
-static int olpc_xo175_ec_remove(struct spi_device *spi)
+static void olpc_xo175_ec_remove(struct spi_device *spi)
{
if (pm_power_off == olpc_xo175_ec_power_off)
pm_power_off = NULL;
@@ -657,8 +657,6 @@ static int olpc_xo175_ec_remove(struct spi_device *spi)
platform_device_unregister(olpc_ec);
olpc_ec = NULL;
-
- return 0;
}
static int olpc_xo175_ec_probe(struct spi_device *spi)
diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig
index 5f0578e25f71..463f1ec5c14e 100644
--- a/drivers/platform/surface/Kconfig
+++ b/drivers/platform/surface/Kconfig
@@ -5,6 +5,7 @@
menuconfig SURFACE_PLATFORMS
bool "Microsoft Surface Platform-Specific Device Drivers"
+ depends on ARM64 || X86 || COMPILE_TEST
default y
help
Say Y here to get to see options for platform-specific device drivers
diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c
index abac3eec565e..444ec81ba02d 100644
--- a/drivers/platform/surface/surface3_power.c
+++ b/drivers/platform/surface/surface3_power.c
@@ -232,14 +232,21 @@ static int mshw0011_bix(struct mshw0011_data *cdata, struct bix *bix)
}
bix->last_full_charg_capacity = ret;
- /* get serial number */
+ /*
+ * Get serial number, on some devices (with unofficial replacement
+ * battery?) reading any of the serial number range addresses gets
+ * nacked in this case just leave the serial number empty.
+ */
ret = i2c_smbus_read_i2c_block_data(client, MSHW0011_BAT0_REG_SERIAL_NO,
sizeof(buf), buf);
- if (ret != sizeof(buf)) {
+ if (ret == -EREMOTEIO) {
+ /* no serial number available */
+ } else if (ret != sizeof(buf)) {
dev_err(&client->dev, "Error reading serial no: %d\n", ret);
return ret;
+ } else {
+ snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf);
}
- snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf);
/* get cycle count */
ret = i2c_smbus_read_word_data(client, MSHW0011_BAT0_REG_CYCLE_CNT);
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index f794343d6aaa..b1103f85a85a 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
#include <linux/rtc.h>
#include <linux/suspend.h>
#include <linux/seq_file.h>
@@ -85,6 +86,9 @@
#define PMC_MSG_DELAY_MIN_US 50
#define RESPONSE_REGISTER_LOOP_MAX 20000
+/* QoS request for letting CPUs in idle states, but not the deepest */
+#define AMD_PMC_MAX_IDLE_STATE_LATENCY 3
+
#define SOC_SUBSYSTEM_IP_MAX 12
#define DELAY_MIN_US 2000
#define DELAY_MAX_US 3000
@@ -124,12 +128,14 @@ struct amd_pmc_dev {
u32 cpu_id;
u32 active_ips;
/* SMU version information */
- u16 major;
- u16 minor;
- u16 rev;
+ u8 smu_program;
+ u8 major;
+ u8 minor;
+ u8 rev;
struct device *dev;
struct pci_dev *rdev;
struct mutex lock; /* generic mutex lock */
+ struct pm_qos_request amd_pmc_pm_qos_req;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbgfs_dir;
#endif /* CONFIG_DEBUG_FS */
@@ -180,11 +186,13 @@ static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
if (rc)
return rc;
- dev->major = (val >> 16) & GENMASK(15, 0);
+ dev->smu_program = (val >> 24) & GENMASK(7, 0);
+ dev->major = (val >> 16) & GENMASK(7, 0);
dev->minor = (val >> 8) & GENMASK(7, 0);
dev->rev = (val >> 0) & GENMASK(7, 0);
- dev_dbg(dev->dev, "SMU version is %u.%u.%u\n", dev->major, dev->minor, dev->rev);
+ dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n",
+ dev->smu_program, dev->major, dev->minor, dev->rev);
return 0;
}
@@ -226,7 +234,7 @@ static int amd_pmc_stb_debugfs_release(struct inode *inode, struct file *filp)
return 0;
}
-const struct file_operations amd_pmc_stb_debugfs_fops = {
+static const struct file_operations amd_pmc_stb_debugfs_fops = {
.owner = THIS_MODULE,
.open = amd_pmc_stb_debugfs_open,
.read = amd_pmc_stb_debugfs_read,
@@ -518,6 +526,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
rc = rtc_alarm_irq_enable(rtc_device, 0);
dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration);
+ /*
+ * Prevent CPUs from getting into deep idle states while sending OS_HINT
+ * which is otherwise generally safe to send when at least one of the CPUs
+ * is not in deep idle states.
+ */
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, AMD_PMC_MAX_IDLE_STATE_LATENCY);
+ wake_up_all_idle_cpus();
+
return rc;
}
@@ -535,24 +551,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
/* Activate CZN specific RTC functionality */
if (pdev->cpu_id == AMD_CPU_ID_CZN) {
rc = amd_pmc_verify_czn_rtc(pdev, &arg);
- if (rc < 0)
- return rc;
+ if (rc)
+ goto fail;
}
/* Dump the IdleMask before we send hint to SMU */
amd_pmc_idlemask_read(pdev, dev, NULL);
msg = amd_pmc_get_os_hint(pdev);
rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0);
- if (rc)
+ if (rc) {
dev_err(pdev->dev, "suspend failed\n");
+ goto fail;
+ }
if (enable_stb)
rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF);
- if (rc) {
+ if (rc) {
dev_err(pdev->dev, "error writing to STB\n");
- return rc;
+ goto fail;
}
+ return 0;
+fail:
+ if (pdev->cpu_id == AMD_CPU_ID_CZN)
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req,
+ PM_QOS_DEFAULT_VALUE);
return rc;
}
@@ -576,12 +599,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
/* Write data incremented by 1 to distinguish in stb_read */
if (enable_stb)
rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1);
- if (rc) {
+ if (rc)
dev_err(pdev->dev, "error writing to STB\n");
- return rc;
- }
- return 0;
+ /* Restore the QoS request back to defaults if it was set */
+ if (pdev->cpu_id == AMD_CPU_ID_CZN)
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req,
+ PM_QOS_DEFAULT_VALUE);
+
+ return rc;
}
static const struct dev_pm_ops amd_pmc_pm_ops = {
@@ -719,6 +745,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
amd_pmc_get_smu_version(dev);
platform_set_drvdata(pdev, dev);
amd_pmc_dbgfs_register(dev);
+ cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE);
return 0;
err_pci_dev_put:
diff --git a/drivers/platform/x86/asus-tf103c-dock.c b/drivers/platform/x86/asus-tf103c-dock.c
index d4ef8f362ee6..6fd0c9fea82d 100644
--- a/drivers/platform/x86/asus-tf103c-dock.c
+++ b/drivers/platform/x86/asus-tf103c-dock.c
@@ -250,7 +250,7 @@ static int tf103c_dock_hid_raw_request(struct hid_device *hid, u8 reportnum,
return 0;
}
-struct hid_ll_driver tf103c_dock_hid_ll_driver = {
+static struct hid_ll_driver tf103c_dock_hid_ll_driver = {
.parse = tf103c_dock_hid_parse,
.start = tf103c_dock_hid_start,
.stop = tf103c_dock_hid_stop,
@@ -921,7 +921,7 @@ static int __maybe_unused tf103c_dock_resume(struct device *dev)
return 0;
}
-SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume);
+static SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume);
static const struct acpi_device_id tf103c_dock_acpi_match[] = {
{"NPCE69A"},
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index a3b83b22a3b1..2104a2621e50 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2223,7 +2223,7 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available,
err = fan_curve_get_factory_default(asus, fan_dev);
if (err) {
- if (err == -ENODEV)
+ if (err == -ENODEV || err == -ENODATA)
return 0;
return err;
}
diff --git a/drivers/platform/x86/intel/crystal_cove_charger.c b/drivers/platform/x86/intel/crystal_cove_charger.c
index 0374bc742513..e4299cfa2205 100644
--- a/drivers/platform/x86/intel/crystal_cove_charger.c
+++ b/drivers/platform/x86/intel/crystal_cove_charger.c
@@ -17,6 +17,7 @@
#include <linux/regmap.h>
#define CHGRIRQ_REG 0x0a
+#define MCHGRIRQ_REG 0x17
struct crystal_cove_charger_data {
struct mutex buslock; /* irq_bus_lock */
@@ -25,8 +26,8 @@ struct crystal_cove_charger_data {
struct irq_domain *irq_domain;
int irq;
int charger_irq;
- bool irq_enabled;
- bool irq_is_enabled;
+ u8 mask;
+ u8 new_mask;
};
static irqreturn_t crystal_cove_charger_irq(int irq, void *data)
@@ -53,13 +54,9 @@ static void crystal_cove_charger_irq_bus_sync_unlock(struct irq_data *data)
{
struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data);
- if (charger->irq_is_enabled != charger->irq_enabled) {
- if (charger->irq_enabled)
- enable_irq(charger->irq);
- else
- disable_irq(charger->irq);
-
- charger->irq_is_enabled = charger->irq_enabled;
+ if (charger->mask != charger->new_mask) {
+ regmap_write(charger->regmap, MCHGRIRQ_REG, charger->new_mask);
+ charger->mask = charger->new_mask;
}
mutex_unlock(&charger->buslock);
@@ -69,14 +66,14 @@ static void crystal_cove_charger_irq_unmask(struct irq_data *data)
{
struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data);
- charger->irq_enabled = true;
+ charger->new_mask &= ~BIT(data->hwirq);
}
static void crystal_cove_charger_irq_mask(struct irq_data *data)
{
struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data);
- charger->irq_enabled = false;
+ charger->new_mask |= BIT(data->hwirq);
}
static void crystal_cove_charger_rm_irq_domain(void *data)
@@ -130,10 +127,13 @@ static int crystal_cove_charger_probe(struct platform_device *pdev)
irq_set_nested_thread(charger->charger_irq, true);
irq_set_noprobe(charger->charger_irq);
+ /* Mask the single 2nd level IRQ before enabling the 1st level IRQ */
+ charger->mask = charger->new_mask = BIT(0);
+ regmap_write(charger->regmap, MCHGRIRQ_REG, charger->mask);
+
ret = devm_request_threaded_irq(&pdev->dev, charger->irq, NULL,
crystal_cove_charger_irq,
- IRQF_ONESHOT | IRQF_NO_AUTOEN,
- KBUILD_MODNAME, charger);
+ IRQF_ONESHOT, KBUILD_MODNAME, charger);
if (ret)
return dev_err_probe(&pdev->dev, ret, "requesting irq\n");
diff --git a/drivers/platform/x86/intel/int3472/tps68470_board_data.c b/drivers/platform/x86/intel/int3472/tps68470_board_data.c
index f93d437fd192..525f09a3b5ff 100644
--- a/drivers/platform/x86/intel/int3472/tps68470_board_data.c
+++ b/drivers/platform/x86/intel/int3472/tps68470_board_data.c
@@ -100,7 +100,8 @@ static struct gpiod_lookup_table surface_go_tps68470_gpios = {
.dev_id = "i2c-INT347A:00",
.table = {
GPIO_LOOKUP("tps68470-gpio", 9, "reset", GPIO_ACTIVE_LOW),
- GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW)
+ GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW),
+ { }
}
};
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index c9a85eb2e860..e8424e70d81d 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -596,7 +596,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd,
return ret;
}
-static DEFINE_MUTEX(punit_misc_dev_lock);
+/* Lock to prevent module registration when already opened by user space */
+static DEFINE_MUTEX(punit_misc_dev_open_lock);
+/* Lock to allow one share misc device for all ISST interace */
+static DEFINE_MUTEX(punit_misc_dev_reg_lock);
static int misc_usage_count;
static int misc_device_ret;
static int misc_device_open;
@@ -606,7 +609,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
int i, ret = 0;
/* Fail open, if a module is going away */
- mutex_lock(&punit_misc_dev_lock);
+ mutex_lock(&punit_misc_dev_open_lock);
for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
struct isst_if_cmd_cb *cb = &punit_callbacks[i];
@@ -628,7 +631,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
} else {
misc_device_open++;
}
- mutex_unlock(&punit_misc_dev_lock);
+ mutex_unlock(&punit_misc_dev_open_lock);
return ret;
}
@@ -637,7 +640,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
{
int i;
- mutex_lock(&punit_misc_dev_lock);
+ mutex_lock(&punit_misc_dev_open_lock);
misc_device_open--;
for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
struct isst_if_cmd_cb *cb = &punit_callbacks[i];
@@ -645,7 +648,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
if (cb->registered)
module_put(cb->owner);
}
- mutex_unlock(&punit_misc_dev_lock);
+ mutex_unlock(&punit_misc_dev_open_lock);
return 0;
}
@@ -662,6 +665,43 @@ static struct miscdevice isst_if_char_driver = {
.fops = &isst_if_char_driver_ops,
};
+static int isst_misc_reg(void)
+{
+ mutex_lock(&punit_misc_dev_reg_lock);
+ if (misc_device_ret)
+ goto unlock_exit;
+
+ if (!misc_usage_count) {
+ misc_device_ret = isst_if_cpu_info_init();
+ if (misc_device_ret)
+ goto unlock_exit;
+
+ misc_device_ret = misc_register(&isst_if_char_driver);
+ if (misc_device_ret) {
+ isst_if_cpu_info_exit();
+ goto unlock_exit;
+ }
+ }
+ misc_usage_count++;
+
+unlock_exit:
+ mutex_unlock(&punit_misc_dev_reg_lock);
+
+ return misc_device_ret;
+}
+
+static void isst_misc_unreg(void)
+{
+ mutex_lock(&punit_misc_dev_reg_lock);
+ if (misc_usage_count)
+ misc_usage_count--;
+ if (!misc_usage_count && !misc_device_ret) {
+ misc_deregister(&isst_if_char_driver);
+ isst_if_cpu_info_exit();
+ }
+ mutex_unlock(&punit_misc_dev_reg_lock);
+}
+
/**
* isst_if_cdev_register() - Register callback for IOCTL
* @device_type: The device type this callback handling.
@@ -679,38 +719,31 @@ static struct miscdevice isst_if_char_driver = {
*/
int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb)
{
- if (misc_device_ret)
- return misc_device_ret;
+ int ret;
if (device_type >= ISST_IF_DEV_MAX)
return -EINVAL;
- mutex_lock(&punit_misc_dev_lock);
+ mutex_lock(&punit_misc_dev_open_lock);
+ /* Device is already open, we don't want to add new callbacks */
if (misc_device_open) {
- mutex_unlock(&punit_misc_dev_lock);
+ mutex_unlock(&punit_misc_dev_open_lock);
return -EAGAIN;
}
- if (!misc_usage_count) {
- int ret;
-
- misc_device_ret = misc_register(&isst_if_char_driver);
- if (misc_device_ret)
- goto unlock_exit;
-
- ret = isst_if_cpu_info_init();
- if (ret) {
- misc_deregister(&isst_if_char_driver);
- misc_device_ret = ret;
- goto unlock_exit;
- }
- }
memcpy(&punit_callbacks[device_type], cb, sizeof(*cb));
punit_callbacks[device_type].registered = 1;
- misc_usage_count++;
-unlock_exit:
- mutex_unlock(&punit_misc_dev_lock);
+ mutex_unlock(&punit_misc_dev_open_lock);
- return misc_device_ret;
+ ret = isst_misc_reg();
+ if (ret) {
+ /*
+ * No need of mutex as the misc device register failed
+ * as no one can open device yet. Hence no contention.
+ */
+ punit_callbacks[device_type].registered = 0;
+ return ret;
+ }
+ return 0;
}
EXPORT_SYMBOL_GPL(isst_if_cdev_register);
@@ -725,16 +758,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register);
*/
void isst_if_cdev_unregister(int device_type)
{
- mutex_lock(&punit_misc_dev_lock);
- misc_usage_count--;
+ isst_misc_unreg();
+ mutex_lock(&punit_misc_dev_open_lock);
punit_callbacks[device_type].registered = 0;
if (device_type == ISST_IF_DEV_MBOX)
isst_delete_hash();
- if (!misc_usage_count && !misc_device_ret) {
- misc_deregister(&isst_if_char_driver);
- isst_if_cpu_info_exit();
- }
- mutex_unlock(&punit_misc_dev_lock);
+ mutex_unlock(&punit_misc_dev_open_lock);
}
EXPORT_SYMBOL_GPL(isst_if_cdev_unregister);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 098180fb1cfc..3424b080db77 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -8679,9 +8679,10 @@ static const struct attribute_group fan_driver_attr_group = {
.attrs = fan_driver_attributes,
};
-#define TPACPI_FAN_Q1 0x0001 /* Unitialized HFSP */
-#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */
-#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */
+#define TPACPI_FAN_Q1 0x0001 /* Uninitialized HFSP */
+#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */
+#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */
+#define TPACPI_FAN_NOFAN 0x0008 /* no fan available */
static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1),
@@ -8702,6 +8703,8 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */
TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */
TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */
+ TPACPI_Q_LNV3('N', '3', '7', TPACPI_FAN_2CTL), /* T15g (2nd gen) */
+ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */
};
static int __init fan_init(struct ibm_init_struct *iibm)
@@ -8730,6 +8733,11 @@ static int __init fan_init(struct ibm_init_struct *iibm)
quirks = tpacpi_check_quirks(fan_quirk_table,
ARRAY_SIZE(fan_quirk_table));
+ if (quirks & TPACPI_FAN_NOFAN) {
+ pr_info("No integrated ThinkPad fan available\n");
+ return -ENODEV;
+ }
+
if (gfan_handle) {
/* 570, 600e/x, 770e, 770x */
fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN;
@@ -10112,6 +10120,9 @@ static struct ibm_struct proxsensor_driver_data = {
#define DYTC_CMD_MMC_GET 8 /* To get current MMC function and mode */
#define DYTC_CMD_RESET 0x1ff /* To reset back to default */
+#define DYTC_CMD_FUNC_CAP 3 /* To get DYTC capabilities */
+#define DYTC_FC_MMC 27 /* MMC Mode supported */
+
#define DYTC_GET_FUNCTION_BIT 8 /* Bits 8-11 - function setting */
#define DYTC_GET_MODE_BIT 12 /* Bits 12-15 - mode setting */
@@ -10324,6 +10335,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
if (dytc_version < 5)
return -ENODEV;
+ /* Check what capabilities are supported. Currently MMC is needed */
+ err = dytc_command(DYTC_CMD_FUNC_CAP, &output);
+ if (err)
+ return err;
+ if (!(output & BIT(DYTC_FC_MMC))) {
+ dbg_printk(TPACPI_DBG_INIT, " DYTC MMC mode not supported\n");
+ return -ENODEV;
+ }
+
dbg_printk(TPACPI_DBG_INIT,
"DYTC version %d: thermal mode available\n", dytc_version);
/*
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 494f23052678..bc97bfa8e8a6 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -770,6 +770,21 @@ static const struct ts_dmi_data predia_basic_data = {
.properties = predia_basic_props,
};
+static const struct property_entry rwc_nanote_p8_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 46),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ { }
+};
+
+static const struct ts_dmi_data rwc_nanote_p8_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = rwc_nanote_p8_props,
+};
+
static const struct property_entry schneider_sct101ctm_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 1715),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -1395,6 +1410,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* RWC NANOTE P8 */
+ .driver_data = (void *)&rwc_nanote_p8_data,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"),
+ DMI_MATCH(DMI_PRODUCT_SKU, "0001")
+ },
+ },
+ {
/* Schneider SCT101CTM */
.driver_data = (void *)&schneider_sct101ctm_data,
.matches = {
diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c
index 3ba63ad91b28..9360a8a92486 100644
--- a/drivers/platform/x86/x86-android-tablets.c
+++ b/drivers/platform/x86/x86-android-tablets.c
@@ -26,6 +26,7 @@
#include <linux/string.h>
/* For gpio_get_desc() which is EXPORT_SYMBOL_GPL() */
#include "../../gpio/gpiolib.h"
+#include "../../gpio/gpiolib-acpi.h"
/*
* Helper code to get Linux IRQ numbers given a description of the IRQ source
@@ -47,7 +48,7 @@ struct x86_acpi_irq_data {
int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */
};
-static int x86_acpi_irq_helper_gpiochip_find(struct gpio_chip *gc, void *data)
+static int gpiochip_find_match_label(struct gpio_chip *gc, void *data)
{
return gc->label && !strcmp(gc->label, data);
}
@@ -73,7 +74,7 @@ static int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data)
return irq;
case X86_ACPI_IRQ_TYPE_GPIOINT:
/* Like acpi_dev_gpio_irq_get(), but without parsing ACPI resources */
- chip = gpiochip_find(data->chip, x86_acpi_irq_helper_gpiochip_find);
+ chip = gpiochip_find(data->chip, gpiochip_find_match_label);
if (!chip) {
pr_err("error cannot find GPIO chip %s\n", data->chip);
return -ENODEV;
@@ -143,14 +144,17 @@ struct x86_serdev_info {
};
struct x86_dev_info {
+ char *invalid_aei_gpiochip;
const char * const *modules;
- struct gpiod_lookup_table **gpiod_lookup_tables;
+ struct gpiod_lookup_table * const *gpiod_lookup_tables;
const struct x86_i2c_client_info *i2c_client_info;
const struct platform_device_info *pdev_info;
const struct x86_serdev_info *serdev_info;
int i2c_client_count;
int pdev_count;
int serdev_count;
+ int (*init)(void);
+ void (*exit)(void);
};
/* Generic / shared bq24190 settings */
@@ -187,8 +191,8 @@ static struct bq24190_platform_data bq24190_pdata = {
};
static const char * const bq24190_modules[] __initconst = {
- "crystal_cove_charger", /* For the bq24190 IRQ */
- "bq24190_charger", /* For the Vbus regulator for intel-int3496 */
+ "intel_crystal_cove_charger", /* For the bq24190 IRQ */
+ "bq24190_charger", /* For the Vbus regulator for intel-int3496 */
NULL
};
@@ -302,7 +306,7 @@ static struct gpiod_lookup_table asus_me176c_goodix_gpios = {
},
};
-static struct gpiod_lookup_table *asus_me176c_gpios[] = {
+static struct gpiod_lookup_table * const asus_me176c_gpios[] = {
&int3496_gpo2_pin22_gpios,
&asus_me176c_goodix_gpios,
NULL
@@ -317,6 +321,7 @@ static const struct x86_dev_info asus_me176c_info __initconst = {
.serdev_count = ARRAY_SIZE(asus_me176c_serdevs),
.gpiod_lookup_tables = asus_me176c_gpios,
.modules = bq24190_modules,
+ .invalid_aei_gpiochip = "INT33FC:02",
};
/* Asus TF103C tablets have an Android factory img with everything hardcoded */
@@ -405,7 +410,7 @@ static const struct x86_i2c_client_info asus_tf103c_i2c_clients[] __initconst =
},
};
-static struct gpiod_lookup_table *asus_tf103c_gpios[] = {
+static struct gpiod_lookup_table * const asus_tf103c_gpios[] = {
&int3496_gpo2_pin22_gpios,
NULL
};
@@ -417,6 +422,7 @@ static const struct x86_dev_info asus_tf103c_info __initconst = {
.pdev_count = ARRAY_SIZE(int3496_pdevs),
.gpiod_lookup_tables = asus_tf103c_gpios,
.modules = bq24190_modules,
+ .invalid_aei_gpiochip = "INT33FC:02",
};
/*
@@ -490,6 +496,39 @@ static const struct x86_dev_info chuwi_hi8_info __initconst = {
.i2c_client_count = ARRAY_SIZE(chuwi_hi8_i2c_clients),
};
+#define CZC_EC_EXTRA_PORT 0x68
+#define CZC_EC_ANDROID_KEYS 0x63
+
+static int __init czc_p10t_init(void)
+{
+ /*
+ * The device boots up in "Windows 7" mode, when the home button sends a
+ * Windows specific key sequence (Left Meta + D) and the second button
+ * sends an unknown one while also toggling the Radio Kill Switch.
+ * This is a surprising behavior when the second button is labeled "Back".
+ *
+ * The vendor-supplied Android-x86 build switches the device to a "Android"
+ * mode by writing value 0x63 to the I/O port 0x68. This just seems to just
+ * set bit 6 on address 0x96 in the EC region; switching the bit directly
+ * seems to achieve the same result. It uses a "p10t_switcher" to do the
+ * job. It doesn't seem to be able to do anything else, and no other use
+ * of the port 0x68 is known.
+ *
+ * In the Android mode, the home button sends just a single scancode,
+ * which can be handled in Linux userspace more reasonably and the back
+ * button only sends a scancode without toggling the kill switch.
+ * The scancode can then be mapped either to Back or RF Kill functionality
+ * in userspace, depending on how the button is labeled on that particular
+ * model.
+ */
+ outb(CZC_EC_ANDROID_KEYS, CZC_EC_EXTRA_PORT);
+ return 0;
+}
+
+static const struct x86_dev_info czc_p10t __initconst = {
+ .init = czc_p10t_init,
+};
+
/*
* Whitelabel (sold as various brands) TM800A550L tablets.
* These tablet's DSDT contains a whole bunch of bogus ACPI I2C devices
@@ -559,7 +598,7 @@ static struct gpiod_lookup_table whitelabel_tm800a550l_goodix_gpios = {
},
};
-static struct gpiod_lookup_table *whitelabel_tm800a550l_gpios[] = {
+static struct gpiod_lookup_table * const whitelabel_tm800a550l_gpios[] = {
&whitelabel_tm800a550l_goodix_gpios,
NULL
};
@@ -642,6 +681,24 @@ static const struct dmi_system_id x86_android_tablet_ids[] __initconst = {
.driver_data = (void *)&chuwi_hi8_info,
},
{
+ /* CZC P10T */
+ .ident = "CZC ODEON TPC-10 (\"P10T\")",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "CZC"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ODEON*TPC-10"),
+ },
+ .driver_data = (void *)&czc_p10t,
+ },
+ {
+ /* A variant of CZC P10T */
+ .ident = "ViewSonic ViewPad 10",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ViewSonic"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VPAD10"),
+ },
+ .driver_data = (void *)&czc_p10t,
+ },
+ {
/* Whitelabel (sold as various brands) TM800A550L */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
@@ -669,7 +726,8 @@ static int serdev_count;
static struct i2c_client **i2c_clients;
static struct platform_device **pdevs;
static struct serdev_device **serdevs;
-static struct gpiod_lookup_table **gpiod_lookup_tables;
+static struct gpiod_lookup_table * const *gpiod_lookup_tables;
+static void (*exit_handler)(void);
static __init int x86_instantiate_i2c_client(const struct x86_dev_info *dev_info,
int idx)
@@ -787,6 +845,9 @@ static void x86_android_tablet_cleanup(void)
kfree(i2c_clients);
+ if (exit_handler)
+ exit_handler();
+
for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++)
gpiod_remove_lookup_table(gpiod_lookup_tables[i]);
}
@@ -795,6 +856,7 @@ static __init int x86_android_tablet_init(void)
{
const struct x86_dev_info *dev_info;
const struct dmi_system_id *id;
+ struct gpio_chip *chip;
int i, ret = 0;
id = dmi_first_match(x86_android_tablet_ids);
@@ -804,6 +866,20 @@ static __init int x86_android_tablet_init(void)
dev_info = id->driver_data;
/*
+ * The broken DSDTs on these devices often also include broken
+ * _AEI (ACPI Event Interrupt) handlers, disable these.
+ */
+ if (dev_info->invalid_aei_gpiochip) {
+ chip = gpiochip_find(dev_info->invalid_aei_gpiochip,
+ gpiochip_find_match_label);
+ if (!chip) {
+ pr_err("error cannot find GPIO chip %s\n", dev_info->invalid_aei_gpiochip);
+ return -ENODEV;
+ }
+ acpi_gpiochip_free_interrupts(chip);
+ }
+
+ /*
* Since this runs from module_init() it cannot use -EPROBE_DEFER,
* instead pre-load any modules which are listed as requirements.
*/
@@ -814,6 +890,15 @@ static __init int x86_android_tablet_init(void)
for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++)
gpiod_add_lookup_table(gpiod_lookup_tables[i]);
+ if (dev_info->init) {
+ ret = dev_info->init();
+ if (ret < 0) {
+ x86_android_tablet_cleanup();
+ return ret;
+ }
+ exit_handler = dev_info->exit;
+ }
+
i2c_clients = kcalloc(dev_info->i2c_client_count, sizeof(*i2c_clients), GFP_KERNEL);
if (!i2c_clients) {
x86_android_tablet_cleanup();
@@ -865,6 +950,6 @@ static __init int x86_android_tablet_init(void)
module_init(x86_android_tablet_init);
module_exit(x86_android_tablet_cleanup);
-MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com");
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
MODULE_DESCRIPTION("X86 Android tablets DSDT fixups driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index cc6757dfa3f1..c02e7bf643a6 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -171,7 +171,7 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state)
if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) {
error = pnp_drv->driver.pm->suspend(dev);
- suspend_report_result(pnp_drv->driver.pm->suspend, error);
+ suspend_report_result(dev, pnp_drv->driver.pm->suspend, error);
if (error)
return error;
}
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index afaf30a3622c..38928ff7472b 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -287,9 +287,9 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
u32 lvl, void *context,
void **rv)
{
- struct acpi_device *device;
+ struct acpi_device *device = acpi_fetch_acpi_dev(handle);
- if (acpi_bus_get_device(handle, &device))
+ if (!device)
return AE_CTRL_DEPTH;
if (acpi_is_pnp_device(device))
pnpacpi_add_device(device);
diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c
index b274942dc46a..01ad84fd147c 100644
--- a/drivers/power/supply/bq256xx_charger.c
+++ b/drivers/power/supply/bq256xx_charger.c
@@ -1523,6 +1523,9 @@ static int bq256xx_hw_init(struct bq256xx_device *bq)
BQ256XX_WDT_BIT_SHIFT);
ret = power_supply_get_battery_info(bq->charger, &bat_info);
+ if (ret == -ENOMEM)
+ return ret;
+
if (ret) {
dev_warn(bq->dev, "battery info missing, default values will be applied\n");
diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c
index 0c87ad0dbf71..728e2a6cc9c3 100644
--- a/drivers/power/supply/cw2015_battery.c
+++ b/drivers/power/supply/cw2015_battery.c
@@ -689,7 +689,7 @@ static int cw_bat_probe(struct i2c_client *client)
if (ret) {
/* Allocate an empty battery */
cw_bat->battery = devm_kzalloc(&client->dev,
- sizeof(cw_bat->battery),
+ sizeof(*cw_bat->battery),
GFP_KERNEL);
if (!cw_bat->battery)
return -ENOMEM;
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 8242e8c5ed77..515e3ceb3393 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -46,6 +46,7 @@ config IDLE_INJECT
config DTPM
bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
+ depends on OF
help
This enables support for the power capping for the dynamic
thermal power management userspace engine.
@@ -56,4 +57,11 @@ config DTPM_CPU
help
This enables support for CPU power limitation based on
energy model.
+
+config DTPM_DEVFREQ
+ bool "Add device power capping based on the energy model"
+ depends on DTPM && ENERGY_MODEL
+ help
+ This enables support for device power limitation based on
+ energy model.
endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index fabcf388a8d3..494617cdad88 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_DTPM) += dtpm.o
obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
+obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 8cb45f2d3d78..ce920f17f45f 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -23,6 +23,9 @@
#include <linux/powercap.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include "dtpm_subsys.h"
#define DTPM_POWER_LIMIT_FLAG 0
@@ -48,9 +51,7 @@ static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw)
{
struct dtpm *dtpm = to_dtpm(pcz);
- mutex_lock(&dtpm_lock);
*max_power_uw = dtpm->power_max - dtpm->power_min;
- mutex_unlock(&dtpm_lock);
return 0;
}
@@ -80,14 +81,7 @@ static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw)
static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw)
{
- struct dtpm *dtpm = to_dtpm(pcz);
- int ret;
-
- mutex_lock(&dtpm_lock);
- ret = __get_power_uw(dtpm, power_uw);
- mutex_unlock(&dtpm_lock);
-
- return ret;
+ return __get_power_uw(to_dtpm(pcz), power_uw);
}
static void __dtpm_rebalance_weight(struct dtpm *dtpm)
@@ -130,7 +124,16 @@ static void __dtpm_add_power(struct dtpm *dtpm)
}
}
-static int __dtpm_update_power(struct dtpm *dtpm)
+/**
+ * dtpm_update_power - Update the power on the dtpm
+ * @dtpm: a pointer to a dtpm structure to update
+ *
+ * Function to update the power values of the dtpm node specified in
+ * parameter. These new values will be propagated to the tree.
+ *
+ * Return: zero on success, -EINVAL if the values are inconsistent
+ */
+int dtpm_update_power(struct dtpm *dtpm)
{
int ret;
@@ -153,26 +156,6 @@ static int __dtpm_update_power(struct dtpm *dtpm)
}
/**
- * dtpm_update_power - Update the power on the dtpm
- * @dtpm: a pointer to a dtpm structure to update
- *
- * Function to update the power values of the dtpm node specified in
- * parameter. These new values will be propagated to the tree.
- *
- * Return: zero on success, -EINVAL if the values are inconsistent
- */
-int dtpm_update_power(struct dtpm *dtpm)
-{
- int ret;
-
- mutex_lock(&dtpm_lock);
- ret = __dtpm_update_power(dtpm);
- mutex_unlock(&dtpm_lock);
-
- return ret;
-}
-
-/**
* dtpm_release_zone - Cleanup when the node is released
* @pcz: a pointer to a powercap_zone structure
*
@@ -188,48 +171,28 @@ int dtpm_release_zone(struct powercap_zone *pcz)
struct dtpm *dtpm = to_dtpm(pcz);
struct dtpm *parent = dtpm->parent;
- mutex_lock(&dtpm_lock);
-
- if (!list_empty(&dtpm->children)) {
- mutex_unlock(&dtpm_lock);
+ if (!list_empty(&dtpm->children))
return -EBUSY;
- }
if (parent)
list_del(&dtpm->sibling);
__dtpm_sub_power(dtpm);
- mutex_unlock(&dtpm_lock);
-
if (dtpm->ops)
dtpm->ops->release(dtpm);
+ else
+ kfree(dtpm);
- if (root == dtpm)
- root = NULL;
-
- kfree(dtpm);
-
- return 0;
-}
-
-static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit)
-{
- *power_limit = dtpm->power_limit;
return 0;
}
static int get_power_limit_uw(struct powercap_zone *pcz,
int cid, u64 *power_limit)
{
- struct dtpm *dtpm = to_dtpm(pcz);
- int ret;
-
- mutex_lock(&dtpm_lock);
- ret = __get_power_limit_uw(dtpm, cid, power_limit);
- mutex_unlock(&dtpm_lock);
-
- return ret;
+ *power_limit = to_dtpm(pcz)->power_limit;
+
+ return 0;
}
/*
@@ -289,7 +252,7 @@ static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit)
ret = __set_power_limit_uw(child, cid, power);
if (!ret)
- ret = __get_power_limit_uw(child, cid, &power);
+ ret = get_power_limit_uw(&child->zone, cid, &power);
if (ret)
break;
@@ -307,8 +270,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
struct dtpm *dtpm = to_dtpm(pcz);
int ret;
- mutex_lock(&dtpm_lock);
-
/*
* Don't allow values outside of the power range previously
* set when initializing the power numbers.
@@ -320,8 +281,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
pr_debug("%s: power limit: %llu uW, power max: %llu uW\n",
dtpm->zone.name, dtpm->power_limit, dtpm->power_max);
- mutex_unlock(&dtpm_lock);
-
return ret;
}
@@ -332,11 +291,7 @@ static const char *get_constraint_name(struct powercap_zone *pcz, int cid)
static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power)
{
- struct dtpm *dtpm = to_dtpm(pcz);
-
- mutex_lock(&dtpm_lock);
- *max_power = dtpm->power_max;
- mutex_unlock(&dtpm_lock);
+ *max_power = to_dtpm(pcz)->power_max;
return 0;
}
@@ -439,8 +394,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
if (IS_ERR(pcz))
return PTR_ERR(pcz);
- mutex_lock(&dtpm_lock);
-
if (parent) {
list_add_tail(&dtpm->sibling, &parent->children);
dtpm->parent = parent;
@@ -456,19 +409,253 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n",
dtpm->zone.name, dtpm->power_min, dtpm->power_max);
- mutex_unlock(&dtpm_lock);
+ return 0;
+}
+
+static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
+ struct dtpm *parent)
+{
+ struct dtpm *dtpm;
+ int ret;
+
+ dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+ if (!dtpm)
+ return ERR_PTR(-ENOMEM);
+ dtpm_init(dtpm, NULL);
+
+ ret = dtpm_register(hierarchy->name, dtpm, parent);
+ if (ret) {
+ pr_err("Failed to register dtpm node '%s': %d\n",
+ hierarchy->name, ret);
+ kfree(dtpm);
+ return ERR_PTR(ret);
+ }
+
+ return dtpm;
+}
+
+static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
+ struct dtpm *parent)
+{
+ struct device_node *np;
+ int i, ret;
+
+ np = of_find_node_by_path(hierarchy->name);
+ if (!np) {
+ pr_err("Failed to find '%s'\n", hierarchy->name);
+ return ERR_PTR(-ENXIO);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->setup)
+ continue;
+
+ ret = dtpm_subsys[i]->setup(parent, np);
+ if (ret) {
+ pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret);
+ of_node_put(np);
+ return ERR_PTR(ret);
+ }
+ }
+
+ of_node_put(np);
+
+ /*
+ * By returning a NULL pointer, we let know the caller there
+ * is no child for us as we are a leaf of the tree
+ */
+ return NULL;
+}
+
+typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
+
+static dtpm_node_callback_t dtpm_node_callback[] = {
+ [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
+ [DTPM_NODE_DT] = dtpm_setup_dt,
+};
+
+static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
+ const struct dtpm_node *it, struct dtpm *parent)
+{
+ struct dtpm *dtpm;
+ int i, ret;
+
+ for (i = 0; hierarchy[i].name; i++) {
+
+ if (hierarchy[i].parent != it)
+ continue;
+
+ dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
+
+ /*
+ * A NULL pointer means there is no children, hence we
+ * continue without going deeper in the recursivity.
+ */
+ if (!dtpm)
+ continue;
+
+ /*
+ * There are multiple reasons why the callback could
+ * fail. The generic glue is abstracting the backend
+ * and therefore it is not possible to report back or
+ * take a decision based on the error. In any case,
+ * if this call fails, it is not critical in the
+ * hierarchy creation, we can assume the underlying
+ * service is not found, so we continue without this
+ * branch in the tree but with a warning to log the
+ * information the node was not created.
+ */
+ if (IS_ERR(dtpm)) {
+ pr_warn("Failed to create '%s' in the hierarchy\n",
+ hierarchy[i].name);
+ continue;
+ }
+
+ ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
+ if (ret)
+ return ret;
+ }
return 0;
}
-static int __init init_dtpm(void)
+/**
+ * dtpm_create_hierarchy - Create the dtpm hierarchy
+ * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ *
+ * The function is called by the platform specific code with the
+ * description of the different node in the hierarchy. It creates the
+ * tree in the sysfs filesystem under the powercap dtpm entry.
+ *
+ * The expected tree has the format:
+ *
+ * struct dtpm_node hierarchy[] = {
+ * [0] { .name = "topmost", type = DTPM_NODE_VIRTUAL },
+ * [1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] },
+ * [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [6] { }
+ * };
+ *
+ * The last element is always an empty one and marks the end of the
+ * array.
+ *
+ * Return: zero on success, a negative value in case of error. Errors
+ * are reported back from the underlying functions.
+ */
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
{
+ const struct of_device_id *match;
+ const struct dtpm_node *hierarchy;
+ struct device_node *np;
+ int i, ret;
+
+ mutex_lock(&dtpm_lock);
+
+ if (pct) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
pct = powercap_register_control_type(NULL, "dtpm", NULL);
if (IS_ERR(pct)) {
pr_err("Failed to register control type\n");
- return PTR_ERR(pct);
+ ret = PTR_ERR(pct);
+ goto out_pct;
}
+ ret = -ENODEV;
+ np = of_find_node_by_path("/");
+ if (!np)
+ goto out_err;
+
+ match = of_match_node(dtpm_match_table, np);
+
+ of_node_put(np);
+
+ if (!match)
+ goto out_err;
+
+ hierarchy = match->data;
+ if (!hierarchy) {
+ ret = -EFAULT;
+ goto out_err;
+ }
+
+ ret = dtpm_for_each_child(hierarchy, NULL, NULL);
+ if (ret)
+ goto out_err;
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->init)
+ continue;
+
+ ret = dtpm_subsys[i]->init();
+ if (ret)
+ pr_info("Failed to initialize '%s': %d",
+ dtpm_subsys[i]->name, ret);
+ }
+
+ mutex_unlock(&dtpm_lock);
+
return 0;
+
+out_err:
+ powercap_unregister_control_type(pct);
+out_pct:
+ pct = NULL;
+out_unlock:
+ mutex_unlock(&dtpm_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
+
+static void __dtpm_destroy_hierarchy(struct dtpm *dtpm)
+{
+ struct dtpm *child, *aux;
+
+ list_for_each_entry_safe(child, aux, &dtpm->children, sibling)
+ __dtpm_destroy_hierarchy(child);
+
+ /*
+ * At this point, we know all children were removed from the
+ * recursive call before
+ */
+ dtpm_unregister(dtpm);
+}
+
+void dtpm_destroy_hierarchy(void)
+{
+ int i;
+
+ mutex_lock(&dtpm_lock);
+
+ if (!pct)
+ goto out_unlock;
+
+ __dtpm_destroy_hierarchy(root);
+
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->exit)
+ continue;
+
+ dtpm_subsys[i]->exit();
+ }
+
+ powercap_unregister_control_type(pct);
+
+ pct = NULL;
+
+ root = NULL;
+
+out_unlock:
+ mutex_unlock(&dtpm_lock);
}
-late_initcall(init_dtpm);
+EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index b740866b228d..bca2f912d349 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -21,6 +21,7 @@
#include <linux/cpuhotplug.h>
#include <linux/dtpm.h>
#include <linux/energy_model.h>
+#include <linux/of.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/units.h>
@@ -150,10 +151,17 @@ static int update_pd_power_uw(struct dtpm *dtpm)
static void pd_release(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct cpufreq_policy *policy;
if (freq_qos_request_active(&dtpm_cpu->qos_req))
freq_qos_remove_request(&dtpm_cpu->qos_req);
+ policy = cpufreq_cpu_get(dtpm_cpu->cpu);
+ if (policy) {
+ for_each_cpu(dtpm_cpu->cpu, policy->related_cpus)
+ per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL;
+ }
+
kfree(dtpm_cpu);
}
@@ -178,11 +186,26 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
static int cpuhp_dtpm_cpu_online(unsigned int cpu)
{
struct dtpm_cpu *dtpm_cpu;
+
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ return dtpm_update_power(&dtpm_cpu->dtpm);
+
+ return 0;
+}
+
+static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
+{
+ struct dtpm_cpu *dtpm_cpu;
struct cpufreq_policy *policy;
struct em_perf_domain *pd;
char name[CPUFREQ_NAME_LEN];
int ret = -ENOMEM;
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ return 0;
+
policy = cpufreq_cpu_get(cpu);
if (!policy)
return 0;
@@ -191,10 +214,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
if (!pd)
return -EINVAL;
- dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
- if (dtpm_cpu)
- return dtpm_update_power(&dtpm_cpu->dtpm);
-
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
if (!dtpm_cpu)
return -ENOMEM;
@@ -207,7 +226,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
- ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
+ ret = dtpm_register(name, &dtpm_cpu->dtpm, parent);
if (ret)
goto out_kfree_dtpm_cpu;
@@ -231,7 +250,18 @@ out_kfree_dtpm_cpu:
return ret;
}
-static int __init dtpm_cpu_init(void)
+static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np)
+{
+ int cpu;
+
+ cpu = of_cpu_node_to_id(np);
+ if (cpu < 0)
+ return 0;
+
+ return __dtpm_cpu_setup(cpu, dtpm);
+}
+
+static int dtpm_cpu_init(void)
{
int ret;
@@ -269,4 +299,15 @@ static int __init dtpm_cpu_init(void)
return 0;
}
-DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
+static void dtpm_cpu_exit(void)
+{
+ cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
+ cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD);
+}
+
+struct dtpm_subsys_ops dtpm_cpu_ops = {
+ .name = KBUILD_MODNAME,
+ .init = dtpm_cpu_init,
+ .exit = dtpm_cpu_exit,
+ .setup = dtpm_cpu_setup,
+};
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
new file mode 100644
index 000000000000..91276761a31d
--- /dev/null
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The devfreq device combined with the energy model and the load can
+ * give an estimation of the power consumption as well as limiting the
+ * power.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/devfreq.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/of.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+struct dtpm_devfreq {
+ struct dtpm dtpm;
+ struct dev_pm_qos_request qos_req;
+ struct devfreq *devfreq;
+};
+
+static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm)
+{
+ return container_of(dtpm, struct dtpm_devfreq, dtpm);
+}
+
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+
+ dtpm->power_min = pd->table[0].power;
+ dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+
+ dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
+ dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+
+ return 0;
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+ unsigned long freq;
+ u64 power;
+ int i;
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+ if (power > power_limit)
+ break;
+ }
+
+ freq = pd->table[i - 1].frequency;
+
+ dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
+
+ power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT;
+
+ return power_limit;
+}
+
+static void _normalize_load(struct devfreq_dev_status *status)
+{
+ if (status->total_time > 0xfffff) {
+ status->total_time >>= 10;
+ status->busy_time >>= 10;
+ }
+
+ status->busy_time <<= 10;
+ status->busy_time /= status->total_time ? : 1;
+
+ status->busy_time = status->busy_time ? : 1;
+ status->total_time = 1024;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+ struct devfreq_dev_status status;
+ unsigned long freq;
+ u64 power;
+ int i;
+
+ mutex_lock(&devfreq->lock);
+ status = devfreq->last_status;
+ mutex_unlock(&devfreq->lock);
+
+ freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
+ _normalize_load(&status);
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ if (pd->table[i].frequency < freq)
+ continue;
+
+ power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+ power *= status.busy_time;
+ power >>= 10;
+
+ return power;
+ }
+
+ return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+
+ if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req))
+ dev_pm_qos_remove_request(&dtpm_devfreq->qos_req);
+
+ kfree(dtpm_devfreq);
+}
+
+static struct dtpm_ops dtpm_ops = {
+ .set_power_uw = set_pd_power_limit,
+ .get_power_uw = get_pd_power_uw,
+ .update_power_uw = update_pd_power_uw,
+ .release = pd_release,
+};
+
+static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent)
+{
+ struct device *dev = devfreq->dev.parent;
+ struct dtpm_devfreq *dtpm_devfreq;
+ struct em_perf_domain *pd;
+ int ret = -ENOMEM;
+
+ pd = em_pd_get(dev);
+ if (!pd) {
+ ret = dev_pm_opp_of_register_em(dev, NULL);
+ if (ret) {
+ pr_err("No energy model available for '%s'\n", dev_name(dev));
+ return -EINVAL;
+ }
+ }
+
+ dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL);
+ if (!dtpm_devfreq)
+ return -ENOMEM;
+
+ dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops);
+
+ dtpm_devfreq->devfreq = devfreq;
+
+ ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent);
+ if (ret) {
+ pr_err("Failed to register '%s': %d\n", dev_name(dev), ret);
+ kfree(dtpm_devfreq);
+ return ret;
+ }
+
+ ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req,
+ DEV_PM_QOS_MAX_FREQUENCY,
+ PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+ if (ret) {
+ pr_err("Failed to add QoS request: %d\n", ret);
+ goto out_dtpm_unregister;
+ }
+
+ dtpm_update_power(&dtpm_devfreq->dtpm);
+
+ return 0;
+
+out_dtpm_unregister:
+ dtpm_unregister(&dtpm_devfreq->dtpm);
+
+ return ret;
+}
+
+static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np)
+{
+ struct devfreq *devfreq;
+
+ devfreq = devfreq_get_devfreq_by_node(np);
+ if (IS_ERR(devfreq))
+ return 0;
+
+ return __dtpm_devfreq_setup(devfreq, dtpm);
+}
+
+struct dtpm_subsys_ops dtpm_devfreq_ops = {
+ .name = KBUILD_MODNAME,
+ .setup = dtpm_devfreq_setup,
+};
diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h
new file mode 100644
index 000000000000..db1712938a96
--- /dev/null
+++ b/drivers/powercap/dtpm_subsys.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#ifndef ___DTPM_SUBSYS_H__
+#define ___DTPM_SUBSYS_H__
+
+extern struct dtpm_subsys_ops dtpm_cpu_ops;
+extern struct dtpm_subsys_ops dtpm_devfreq_ops;
+
+struct dtpm_subsys_ops *dtpm_subsys[] = {
+#ifdef CONFIG_DTPM_CPU
+ &dtpm_cpu_ops,
+#endif
+#ifdef CONFIG_DTPM_DEVFREQ
+ &dtpm_devfreq_ops,
+#endif
+};
+
+#endif
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 0f1b5a7d2a89..17ad5f0d13b2 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -607,7 +607,7 @@ ptp_ocp_settime(struct ptp_clock_info *ptp_info, const struct timespec64 *ts)
}
static void
-__ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val)
+__ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val)
{
u32 select, ctrl;
@@ -615,7 +615,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val)
iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
iowrite32(adj_val, &bp->reg->offset_ns);
- iowrite32(adj_val & 0x7f, &bp->reg->offset_window_ns);
+ iowrite32(NSEC_PER_SEC, &bp->reg->offset_window_ns);
ctrl = OCP_CTRL_ADJUST_OFFSET | OCP_CTRL_ENABLE;
iowrite32(ctrl, &bp->reg->ctrl);
@@ -624,6 +624,22 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u64 adj_val)
iowrite32(select >> 16, &bp->reg->select);
}
+static void
+ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
+{
+ struct timespec64 ts;
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&bp->lock, flags);
+ err = __ptp_ocp_gettime_locked(bp, &ts, NULL);
+ if (likely(!err)) {
+ timespec64_add_ns(&ts, delta_ns);
+ __ptp_ocp_settime_locked(bp, &ts);
+ }
+ spin_unlock_irqrestore(&bp->lock, flags);
+}
+
static int
ptp_ocp_adjtime(struct ptp_clock_info *ptp_info, s64 delta_ns)
{
@@ -631,6 +647,11 @@ ptp_ocp_adjtime(struct ptp_clock_info *ptp_info, s64 delta_ns)
unsigned long flags;
u32 adj_ns, sign;
+ if (delta_ns > NSEC_PER_SEC || -delta_ns > NSEC_PER_SEC) {
+ ptp_ocp_adjtime_coarse(bp, delta_ns);
+ return 0;
+ }
+
sign = delta_ns < 0 ? BIT(31) : 0;
adj_ns = sign ? -delta_ns : delta_ns;
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 1c35fed20d34..c8ce6e5eea24 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -984,6 +984,7 @@ config REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY
tristate "Raspberry Pi 7-inch touchscreen panel ATTINY regulator"
depends on BACKLIGHT_CLASS_DEVICE
depends on I2C
+ depends on OF_GPIO
select REGMAP_I2C
help
This driver supports ATTINY regulator on the Raspberry Pi 7-inch
@@ -1046,6 +1047,16 @@ config REGULATOR_RT5033
RT5033 PMIC. The device supports multiple regulators like
current source, LDO and Buck.
+config REGULATOR_RT5190A
+ tristate "Richtek RT5190A PMIC"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ This adds support for voltage regulator in Richtek RT5190A PMIC.
+ It integratas 1 channel buck controller, 3 channels high efficiency
+ buck converters, 1 LDO, mute AC OFF depop function, with the general
+ I2C control interface.
+
config REGULATOR_RT6160
tristate "Richtek RT6160 BuckBoost voltage regulator"
depends on I2C
@@ -1263,6 +1274,15 @@ config REGULATOR_TPS62360
high-frequency synchronous step down dc-dc converter optimized
for battery-powered portable applications.
+config REGULATOR_TPS6286X
+ tristate "TI TPS6286x Power Regulator"
+ depends on I2C && OF
+ select REGMAP_I2C
+ help
+ This driver supports TPS6236x voltage regulator chips. These are
+ high-frequency synchronous step-down converters with an I2C
+ interface.
+
config REGULATOR_TPS65023
tristate "TI TPS65023 Power regulators"
depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 2e1b087489fa..1b64ad5767be 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_REGULATOR_ROHM) += rohm-regulator.o
obj-$(CONFIG_REGULATOR_RT4801) += rt4801-regulator.o
obj-$(CONFIG_REGULATOR_RT4831) += rt4831-regulator.o
obj-$(CONFIG_REGULATOR_RT5033) += rt5033-regulator.o
+obj-$(CONFIG_REGULATOR_RT5190A) += rt5190a-regulator.o
obj-$(CONFIG_REGULATOR_RT6160) += rt6160-regulator.o
obj-$(CONFIG_REGULATOR_RT6245) += rt6245-regulator.o
obj-$(CONFIG_REGULATOR_RTMV20) += rtmv20-regulator.o
@@ -149,6 +150,7 @@ obj-$(CONFIG_REGULATOR_SY8827N) += sy8827n.o
obj-$(CONFIG_REGULATOR_TI_ABB) += ti-abb-regulator.o
obj-$(CONFIG_REGULATOR_TPS6105X) += tps6105x-regulator.o
obj-$(CONFIG_REGULATOR_TPS62360) += tps62360-regulator.o
+obj-$(CONFIG_REGULATOR_TPS6286X) += tps6286x-regulator.o
obj-$(CONFIG_REGULATOR_TPS65023) += tps65023-regulator.o
obj-$(CONFIG_REGULATOR_TPS6507X) += tps6507x-regulator.o
obj-$(CONFIG_REGULATOR_TPS65086) += tps65086-regulator.o
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 86aa4141efa9..d2553970a67b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -6014,9 +6014,8 @@ core_initcall(regulator_init);
static int regulator_late_cleanup(struct device *dev, void *data)
{
struct regulator_dev *rdev = dev_to_rdev(dev);
- const struct regulator_ops *ops = rdev->desc->ops;
struct regulation_constraints *c = rdev->constraints;
- int enabled, ret;
+ int ret;
if (c && c->always_on)
return 0;
@@ -6029,14 +6028,8 @@ static int regulator_late_cleanup(struct device *dev, void *data)
if (rdev->use_count)
goto unlock;
- /* If we can't read the status assume it's always on. */
- if (ops->is_enabled)
- enabled = ops->is_enabled(rdev);
- else
- enabled = 1;
-
- /* But if reading the status failed, assume that it's off. */
- if (enabled <= 0)
+ /* If reading the status failed, assume that it's off. */
+ if (_regulator_is_enabled(rdev) <= 0)
goto unlock;
if (have_full_constraints()) {
diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c
index 6f21223a488e..eb9df485bd8a 100644
--- a/drivers/regulator/da9121-regulator.c
+++ b/drivers/regulator/da9121-regulator.c
@@ -87,16 +87,16 @@ static struct da9121_range da9121_3A_1phase_current = {
};
static struct da9121_range da914x_40A_4phase_current = {
- .val_min = 14000000,
- .val_max = 80000000,
- .val_stp = 2000000,
+ .val_min = 26000000,
+ .val_max = 78000000,
+ .val_stp = 4000000,
.reg_min = 1,
.reg_max = 14,
};
static struct da9121_range da914x_20A_2phase_current = {
- .val_min = 7000000,
- .val_max = 40000000,
+ .val_min = 13000000,
+ .val_max = 39000000,
.val_stp = 2000000,
.reg_min = 1,
.reg_max = 14,
@@ -561,7 +561,7 @@ static const struct regulator_desc da9217_reg = {
};
#define DA914X_MIN_MV 500
-#define DA914X_MAX_MV 1000
+#define DA914X_MAX_MV 1300
#define DA914X_STEP_MV 10
#define DA914X_MIN_SEL (DA914X_MIN_MV / DA914X_STEP_MV)
#define DA914X_N_VOLTAGES (((DA914X_MAX_MV - DA914X_MIN_MV) / DA914X_STEP_MV) \
@@ -585,10 +585,6 @@ static const struct regulator_desc da9141_reg = {
.vsel_mask = DA9121_MASK_BUCK_BUCKx_5_CHx_A_VOUT,
.enable_reg = DA9121_REG_BUCK_BUCK1_0,
.enable_mask = DA9121_MASK_BUCK_BUCKx_0_CHx_EN,
- /* Default value of BUCK_BUCK1_0.CH1_SRC_DVC_UP */
- .ramp_delay = 20000,
- /* tBUCK_EN */
- .enable_time = 20,
};
static const struct regulator_desc da9142_reg = {
diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c
index fbc56b043071..b8bf76c170fe 100644
--- a/drivers/regulator/max20086-regulator.c
+++ b/drivers/regulator/max20086-regulator.c
@@ -7,6 +7,7 @@
#include <linux/err.h>
#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/regmap.h>
@@ -140,7 +141,7 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on)
node = of_get_child_by_name(chip->dev->of_node, "regulators");
if (!node) {
dev_err(chip->dev, "regulators node not found\n");
- return PTR_ERR(node);
+ return -ENODEV;
}
for (i = 0; i < chip->info->num_outputs; ++i)
diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 80b65cb87cef..cb7e50003f70 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -459,7 +459,7 @@ static int max8973_thermal_read_temp(void *data, int *temp)
return ret;
}
- /* +1 degC to trigger cool devive */
+ /* +1 degC to trigger cool device */
if (val & MAX77621_CHIPID_TJINT_S)
*temp = mchip->junction_temp_warning + 1000;
else
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index a3bc0eb6ceb8..561de6b2e6e3 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1121,6 +1121,39 @@ static const struct rpmh_vreg_init_data pmx55_vreg_data[] = {
{}
};
+static const struct rpmh_vreg_init_data pmx65_vreg_data[] = {
+ RPMH_VREG("smps1", "smp%s1", &pmic5_ftsmps510, "vdd-s1"),
+ RPMH_VREG("smps2", "smp%s2", &pmic5_hfsmps510, "vdd-s2"),
+ RPMH_VREG("smps3", "smp%s3", &pmic5_hfsmps510, "vdd-s3"),
+ RPMH_VREG("smps4", "smp%s4", &pmic5_hfsmps510, "vdd-s4"),
+ RPMH_VREG("smps5", "smp%s5", &pmic5_hfsmps510, "vdd-s5"),
+ RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps510, "vdd-s6"),
+ RPMH_VREG("smps7", "smp%s7", &pmic5_hfsmps510, "vdd-s7"),
+ RPMH_VREG("smps8", "smp%s8", &pmic5_hfsmps510, "vdd-s8"),
+ RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"),
+ RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2-l18"),
+ RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"),
+ RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l4"),
+ RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l6-l16"),
+ RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l5-l6-l16"),
+ RPMH_VREG("ldo7", "ldo%s7", &pmic5_nldo, "vdd-l7"),
+ RPMH_VREG("ldo8", "ldo%s8", &pmic5_nldo, "vdd-l8-l9"),
+ RPMH_VREG("ldo9", "ldo%s9", &pmic5_nldo, "vdd-l8-l9"),
+ RPMH_VREG("ldo10", "ldo%s10", &pmic5_pldo, "vdd-l10"),
+ RPMH_VREG("ldo11", "ldo%s11", &pmic5_pldo, "vdd-l11-l13"),
+ RPMH_VREG("ldo12", "ldo%s12", &pmic5_nldo, "vdd-l12"),
+ RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l11-l13"),
+ RPMH_VREG("ldo14", "ldo%s14", &pmic5_nldo, "vdd-l14"),
+ RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo, "vdd-l15"),
+ RPMH_VREG("ldo16", "ldo%s16", &pmic5_pldo, "vdd-l5-l6-l16"),
+ RPMH_VREG("ldo17", "ldo%s17", &pmic5_nldo, "vdd-l17"),
+ /* ldo18 not configured */
+ RPMH_VREG("ldo19", "ldo%s19", &pmic5_nldo, "vdd-l19"),
+ RPMH_VREG("ldo20", "ldo%s20", &pmic5_nldo, "vdd-l20"),
+ RPMH_VREG("ldo21", "ldo%s21", &pmic5_nldo, "vdd-l21"),
+ {}
+};
+
static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
RPMH_VREG("smps1", "smp%s1", &pmic5_hfsmps510, "vdd-s1"),
RPMH_VREG("smps2", "smp%s2", &pmic5_ftsmps520, "vdd-s2"),
@@ -1277,6 +1310,10 @@ static const struct of_device_id __maybe_unused rpmh_regulator_match_table[] = {
.data = pmx55_vreg_data,
},
{
+ .compatible = "qcom,pmx65-rpmh-regulators",
+ .data = pmx65_vreg_data,
+ },
+ {
.compatible = "qcom,pm7325-rpmh-regulators",
.data = pm7325_vreg_data,
},
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 9fc666107a06..8490aa8eecb1 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -1317,8 +1317,10 @@ static int rpm_reg_probe(struct platform_device *pdev)
for_each_available_child_of_node(dev->of_node, node) {
vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
- if (!vreg)
+ if (!vreg) {
+ of_node_put(node);
return -ENOMEM;
+ }
ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data);
diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index ee46bfbf5eee..f7df0f4b2f87 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -8,6 +8,7 @@
#include <linux/backlight.h>
#include <linux/err.h>
#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -21,63 +22,146 @@
/* I2C registers of the Atmel microcontroller. */
#define REG_ID 0x80
#define REG_PORTA 0x81
-#define REG_PORTA_HF BIT(2)
-#define REG_PORTA_VF BIT(3)
#define REG_PORTB 0x82
+#define REG_PORTC 0x83
#define REG_POWERON 0x85
#define REG_PWM 0x86
+#define REG_ADDR_L 0x8c
+#define REG_ADDR_H 0x8d
+#define REG_WRITE_DATA_H 0x90
+#define REG_WRITE_DATA_L 0x91
+
+#define PA_LCD_DITHB BIT(0)
+#define PA_LCD_MODE BIT(1)
+#define PA_LCD_LR BIT(2)
+#define PA_LCD_UD BIT(3)
+
+#define PB_BRIDGE_PWRDNX_N BIT(0)
+#define PB_LCD_VCC_N BIT(1)
+#define PB_LCD_MAIN BIT(7)
+
+#define PC_LED_EN BIT(0)
+#define PC_RST_TP_N BIT(1)
+#define PC_RST_LCD_N BIT(2)
+#define PC_RST_BRIDGE_N BIT(3)
+
+enum gpio_signals {
+ RST_BRIDGE_N, /* TC358762 bridge reset */
+ RST_TP_N, /* Touch controller reset */
+ NUM_GPIO
+};
+
+struct gpio_signal_mappings {
+ unsigned int reg;
+ unsigned int mask;
+};
+
+static const struct gpio_signal_mappings mappings[NUM_GPIO] = {
+ [RST_BRIDGE_N] = { REG_PORTC, PC_RST_BRIDGE_N | PC_RST_LCD_N },
+ [RST_TP_N] = { REG_PORTC, PC_RST_TP_N },
+};
+
+struct attiny_lcd {
+ /* lock to serialise overall accesses to the Atmel */
+ struct mutex lock;
+ struct regmap *regmap;
+ bool gpio_states[NUM_GPIO];
+ u8 port_states[3];
+
+ struct gpio_chip gc;
+};
static const struct regmap_config attiny_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
- .max_register = REG_PWM,
- .cache_type = REGCACHE_NONE,
+ .disable_locking = 1,
+ .max_register = REG_WRITE_DATA_L,
+ .cache_type = REGCACHE_RBTREE,
+};
+
+static int attiny_set_port_state(struct attiny_lcd *state, int reg, u8 val)
+{
+ state->port_states[reg - REG_PORTA] = val;
+ return regmap_write(state->regmap, reg, val);
+};
+
+static u8 attiny_get_port_state(struct attiny_lcd *state, int reg)
+{
+ return state->port_states[reg - REG_PORTA];
};
static int attiny_lcd_power_enable(struct regulator_dev *rdev)
{
- unsigned int data;
+ struct attiny_lcd *state = rdev_get_drvdata(rdev);
- regmap_write(rdev->regmap, REG_POWERON, 1);
- /* Wait for nPWRDWN to go low to indicate poweron is done. */
- regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data,
- data & BIT(0), 10, 1000000);
+ mutex_lock(&state->lock);
+
+ /* Ensure bridge, and tp stay in reset */
+ attiny_set_port_state(state, REG_PORTC, 0);
+ usleep_range(5000, 10000);
/* Default to the same orientation as the closed source
* firmware used for the panel. Runtime rotation
* configuration will be supported using VC4's plane
* orientation bits.
*/
- regmap_write(rdev->regmap, REG_PORTA, BIT(2));
+ attiny_set_port_state(state, REG_PORTA, PA_LCD_LR);
+ usleep_range(5000, 10000);
+ /* Main regulator on, and power to the panel (LCD_VCC_N) */
+ attiny_set_port_state(state, REG_PORTB, PB_LCD_MAIN);
+ usleep_range(5000, 10000);
+ /* Bring controllers out of reset */
+ attiny_set_port_state(state, REG_PORTC, PC_LED_EN);
+
+ msleep(80);
+
+ mutex_unlock(&state->lock);
return 0;
}
static int attiny_lcd_power_disable(struct regulator_dev *rdev)
{
+ struct attiny_lcd *state = rdev_get_drvdata(rdev);
+
+ mutex_lock(&state->lock);
+
regmap_write(rdev->regmap, REG_PWM, 0);
- regmap_write(rdev->regmap, REG_POWERON, 0);
- udelay(1);
+ usleep_range(5000, 10000);
+
+ attiny_set_port_state(state, REG_PORTA, 0);
+ usleep_range(5000, 10000);
+ attiny_set_port_state(state, REG_PORTB, PB_LCD_VCC_N);
+ usleep_range(5000, 10000);
+ attiny_set_port_state(state, REG_PORTC, 0);
+ msleep(30);
+
+ mutex_unlock(&state->lock);
+
return 0;
}
static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
{
+ struct attiny_lcd *state = rdev_get_drvdata(rdev);
unsigned int data;
- int ret;
+ int ret, i;
- ret = regmap_read(rdev->regmap, REG_POWERON, &data);
- if (ret < 0)
- return ret;
+ mutex_lock(&state->lock);
- if (!(data & BIT(0)))
- return 0;
+ for (i = 0; i < 10; i++) {
+ ret = regmap_read(rdev->regmap, REG_PORTC, &data);
+ if (!ret)
+ break;
+ usleep_range(10000, 12000);
+ }
+
+ mutex_unlock(&state->lock);
- ret = regmap_read(rdev->regmap, REG_PORTB, &data);
if (ret < 0)
return ret;
- return data & BIT(0);
+ return data & PC_RST_BRIDGE_N;
}
static const struct regulator_init_data attiny_regulator_default = {
@@ -101,33 +185,104 @@ static const struct regulator_desc attiny_regulator = {
static int attiny_update_status(struct backlight_device *bl)
{
- struct regmap *regmap = bl_get_data(bl);
+ struct attiny_lcd *state = bl_get_data(bl);
+ struct regmap *regmap = state->regmap;
int brightness = bl->props.brightness;
+ int ret, i;
+
+ mutex_lock(&state->lock);
if (bl->props.power != FB_BLANK_UNBLANK ||
bl->props.fb_blank != FB_BLANK_UNBLANK)
brightness = 0;
- return regmap_write(regmap, REG_PWM, brightness);
-}
-
-static int attiny_get_brightness(struct backlight_device *bl)
-{
- struct regmap *regmap = bl_get_data(bl);
- int ret, brightness;
+ for (i = 0; i < 10; i++) {
+ ret = regmap_write(regmap, REG_PWM, brightness);
+ if (!ret)
+ break;
+ }
- ret = regmap_read(regmap, REG_PWM, &brightness);
- if (ret)
- return ret;
+ mutex_unlock(&state->lock);
- return brightness;
+ return ret;
}
static const struct backlight_ops attiny_bl = {
.update_status = attiny_update_status,
- .get_brightness = attiny_get_brightness,
};
+static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off)
+{
+ return GPIO_LINE_DIRECTION_OUT;
+}
+
+static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
+{
+ struct attiny_lcd *state = gpiochip_get_data(gc);
+ u8 last_val;
+
+ if (off >= NUM_GPIO)
+ return;
+
+ mutex_lock(&state->lock);
+
+ last_val = attiny_get_port_state(state, mappings[off].reg);
+ if (val)
+ last_val |= mappings[off].mask;
+ else
+ last_val &= ~mappings[off].mask;
+
+ attiny_set_port_state(state, mappings[off].reg, last_val);
+
+ if (off == RST_BRIDGE_N && val) {
+ usleep_range(5000, 8000);
+ regmap_write(state->regmap, REG_ADDR_H, 0x04);
+ usleep_range(5000, 8000);
+ regmap_write(state->regmap, REG_ADDR_L, 0x7c);
+ usleep_range(5000, 8000);
+ regmap_write(state->regmap, REG_WRITE_DATA_H, 0x00);
+ usleep_range(5000, 8000);
+ regmap_write(state->regmap, REG_WRITE_DATA_L, 0x00);
+
+ msleep(100);
+ }
+
+ mutex_unlock(&state->lock);
+}
+
+static int attiny_i2c_read(struct i2c_client *client, u8 reg, unsigned int *buf)
+{
+ struct i2c_msg msgs[1];
+ u8 addr_buf[1] = { reg };
+ u8 data_buf[1] = { 0, };
+ int ret;
+
+ /* Write register address */
+ msgs[0].addr = client->addr;
+ msgs[0].flags = 0;
+ msgs[0].len = ARRAY_SIZE(addr_buf);
+ msgs[0].buf = addr_buf;
+
+ ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+ if (ret != ARRAY_SIZE(msgs))
+ return -EIO;
+
+ usleep_range(5000, 10000);
+
+ /* Read data from register */
+ msgs[0].addr = client->addr;
+ msgs[0].flags = I2C_M_RD;
+ msgs[0].len = 1;
+ msgs[0].buf = data_buf;
+
+ ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+ if (ret != ARRAY_SIZE(msgs))
+ return -EIO;
+
+ *buf = data_buf[0];
+ return 0;
+}
+
/*
* I2C driver interface functions
*/
@@ -138,22 +293,30 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
struct regulator_config config = { };
struct backlight_device *bl;
struct regulator_dev *rdev;
+ struct attiny_lcd *state;
struct regmap *regmap;
unsigned int data;
int ret;
+ state = devm_kzalloc(&i2c->dev, sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ mutex_init(&state->lock);
+ i2c_set_clientdata(i2c, state);
+
regmap = devm_regmap_init_i2c(i2c, &attiny_regmap_config);
if (IS_ERR(regmap)) {
ret = PTR_ERR(regmap);
dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
ret);
- return ret;
+ goto error;
}
- ret = regmap_read(regmap, REG_ID, &data);
+ ret = attiny_i2c_read(i2c, REG_ID, &data);
if (ret < 0) {
dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret);
- return ret;
+ goto error;
}
switch (data) {
@@ -162,34 +325,73 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
break;
default:
dev_err(&i2c->dev, "Unknown Atmel firmware revision: 0x%02x\n", data);
- return -ENODEV;
+ ret = -ENODEV;
+ goto error;
}
regmap_write(regmap, REG_POWERON, 0);
- mdelay(1);
+ msleep(30);
+ regmap_write(regmap, REG_PWM, 0);
config.dev = &i2c->dev;
config.regmap = regmap;
config.of_node = i2c->dev.of_node;
config.init_data = &attiny_regulator_default;
+ config.driver_data = state;
rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config);
if (IS_ERR(rdev)) {
dev_err(&i2c->dev, "Failed to register ATTINY regulator\n");
- return PTR_ERR(rdev);
+ ret = PTR_ERR(rdev);
+ goto error;
}
props.type = BACKLIGHT_RAW;
props.max_brightness = 0xff;
- bl = devm_backlight_device_register(&i2c->dev,
- "7inch-touchscreen-panel-bl",
- &i2c->dev, regmap, &attiny_bl,
+
+ state->regmap = regmap;
+
+ bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev),
+ &i2c->dev, state, &attiny_bl,
&props);
- if (IS_ERR(bl))
- return PTR_ERR(bl);
+ if (IS_ERR(bl)) {
+ ret = PTR_ERR(bl);
+ goto error;
+ }
bl->props.brightness = 0xff;
+ state->gc.parent = &i2c->dev;
+ state->gc.label = i2c->name;
+ state->gc.owner = THIS_MODULE;
+ state->gc.of_node = i2c->dev.of_node;
+ state->gc.base = -1;
+ state->gc.ngpio = NUM_GPIO;
+
+ state->gc.set = attiny_gpio_set;
+ state->gc.get_direction = attiny_gpio_get_direction;
+ state->gc.can_sleep = true;
+
+ ret = devm_gpiochip_add_data(&i2c->dev, &state->gc, state);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to create gpiochip: %d\n", ret);
+ goto error;
+ }
+
+ return 0;
+
+error:
+ mutex_destroy(&state->lock);
+
+ return ret;
+}
+
+static int attiny_i2c_remove(struct i2c_client *client)
+{
+ struct attiny_lcd *state = i2c_get_clientdata(client);
+
+ mutex_destroy(&state->lock);
+
return 0;
}
@@ -205,6 +407,7 @@ static struct i2c_driver attiny_regulator_driver = {
.of_match_table = of_match_ptr(attiny_dt_ids),
},
.probe = attiny_i2c_probe,
+ .remove = attiny_i2c_remove,
};
module_i2c_driver(attiny_regulator_driver);
diff --git a/drivers/regulator/rt5190a-regulator.c b/drivers/regulator/rt5190a-regulator.c
new file mode 100644
index 000000000000..155d4afd00b1
--- /dev/null
+++ b/drivers/regulator/rt5190a-regulator.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <dt-bindings/regulator/richtek,rt5190a-regulator.h>
+#include <linux/bits.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#define RT5190A_REG_MANUFACTURE 0x00
+#define RT5190A_REG_BUCK2VSEL 0x04
+#define RT5190A_REG_BUCK3VSEL 0x05
+#define RT5190A_REG_DCDCCNTL 0x06
+#define RT5190A_REG_ENABLE 0x07
+#define RT5190A_REG_DISCHARGE 0x09
+#define RT5190A_REG_PROTMODE 0x0A
+#define RT5190A_REG_MUTECNTL 0x0B
+#define RT5190A_REG_PGSTAT 0x0F
+#define RT5190A_REG_OVINT 0x10
+#define RT5190A_REG_HOTDIEMASK 0x17
+
+#define RT5190A_VSEL_MASK GENMASK(6, 0)
+#define RT5190A_RID_BITMASK(rid) BIT(rid + 1)
+#define RT5190A_BUCK1_DISCHG_MASK GENMASK(1, 0)
+#define RT5190A_BUCK1_DISCHG_ONVAL 0x01
+#define RT5190A_OVERVOLT_MASK GENMASK(7, 0)
+#define RT5190A_UNDERVOLT_MASK GENMASK(15, 8)
+#define RT5190A_CH234OT_MASK BIT(29)
+#define RT5190A_CHIPOT_MASK BIT(28)
+
+#define RT5190A_BUCK23_MINUV 600000
+#define RT5190A_BUCK23_MAXUV 1400000
+#define RT5190A_BUCK23_STEPUV 10000
+#define RT5190A_BUCK23_STEPNUM ((1400000 - 600000) / 10000 + 1)
+
+enum {
+ RT5190A_IDX_BUCK1 = 0,
+ RT5190A_IDX_BUCK2,
+ RT5190A_IDX_BUCK3,
+ RT5190A_IDX_BUCK4,
+ RT5190A_IDX_LDO,
+ RT5190A_MAX_IDX
+};
+
+struct rt5190a_priv {
+ struct device *dev;
+ struct regmap *regmap;
+ struct regulator_desc rdesc[RT5190A_MAX_IDX];
+ struct regulator_dev *rdev[RT5190A_MAX_IDX];
+};
+
+static int rt5190a_get_error_flags(struct regulator_dev *rdev,
+ unsigned int *flags)
+{
+ struct regmap *regmap = rdev_get_regmap(rdev);
+ int rid = rdev_get_id(rdev);
+ unsigned int pgood_stat;
+ int ret;
+
+ ret = regmap_read(regmap, RT5190A_REG_PGSTAT, &pgood_stat);
+ if (ret)
+ return ret;
+
+ if (!(pgood_stat & RT5190A_RID_BITMASK(rid)))
+ *flags = REGULATOR_ERROR_FAIL;
+ else
+ *flags = 0;
+
+ return 0;
+}
+
+static int rt5190a_fixed_buck_set_mode(struct regulator_dev *rdev,
+ unsigned int mode)
+{
+ struct regmap *regmap = rdev_get_regmap(rdev);
+ int rid = rdev_get_id(rdev);
+ unsigned int mask = RT5190A_RID_BITMASK(rid), val;
+
+ switch (mode) {
+ case REGULATOR_MODE_FAST:
+ val = mask;
+ break;
+ case REGULATOR_MODE_NORMAL:
+ val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(regmap, RT5190A_REG_DCDCCNTL, mask, val);
+}
+
+static unsigned int rt5190a_fixed_buck_get_mode(struct regulator_dev *rdev)
+{
+ struct regmap *regmap = rdev_get_regmap(rdev);
+ int rid = rdev_get_id(rdev);
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read(regmap, RT5190A_REG_DCDCCNTL, &val);
+ if (ret) {
+ dev_err(&rdev->dev, "Failed to get mode [%d]\n", ret);
+ return ret;
+ }
+
+ if (val & RT5190A_RID_BITMASK(rid))
+ return REGULATOR_MODE_FAST;
+
+ return REGULATOR_MODE_NORMAL;
+}
+
+static const struct regulator_ops rt5190a_ranged_buck_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .get_error_flags = rt5190a_get_error_flags,
+};
+
+static const struct regulator_ops rt5190a_fixed_buck_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .set_mode = rt5190a_fixed_buck_set_mode,
+ .get_mode = rt5190a_fixed_buck_get_mode,
+ .get_error_flags = rt5190a_get_error_flags,
+};
+
+static const struct regulator_ops rt5190a_fixed_ldo_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_active_discharge = regulator_set_active_discharge_regmap,
+ .get_error_flags = rt5190a_get_error_flags,
+};
+
+static irqreturn_t rt5190a_irq_handler(int irq, void *data)
+{
+ struct rt5190a_priv *priv = data;
+ __le32 raws;
+ unsigned int events, fields;
+ static const struct {
+ unsigned int bitmask;
+ unsigned int report;
+ } event_tbl[] = {
+ { RT5190A_OVERVOLT_MASK, REGULATOR_ERROR_REGULATION_OUT },
+ { RT5190A_UNDERVOLT_MASK, REGULATOR_ERROR_UNDER_VOLTAGE }
+ };
+ int i, j, ret;
+
+ ret = regmap_raw_read(priv->regmap, RT5190A_REG_OVINT, &raws,
+ sizeof(raws));
+ if (ret) {
+ dev_err(priv->dev, "Failed to read events\n");
+ return IRQ_NONE;
+ }
+
+ events = le32_to_cpu(raws);
+
+ ret = regmap_raw_write(priv->regmap, RT5190A_REG_OVINT, &raws,
+ sizeof(raws));
+ if (ret)
+ dev_err(priv->dev, "Failed to write-clear events\n");
+
+ /* Handle OV,UV events */
+ for (i = 0; i < ARRAY_SIZE(event_tbl); i++) {
+ fields = events & event_tbl[i].bitmask;
+ fields >>= ffs(event_tbl[i].bitmask) - 1;
+
+ for (j = 0; j < RT5190A_MAX_IDX; j++) {
+ if (!(fields & RT5190A_RID_BITMASK(j)))
+ continue;
+
+ regulator_notifier_call_chain(priv->rdev[j],
+ event_tbl[i].report,
+ NULL);
+ }
+ }
+
+ /* Handle CH234 OT event */
+ if (events & RT5190A_CH234OT_MASK) {
+ for (j = RT5190A_IDX_BUCK2; j < RT5190A_IDX_LDO; j++) {
+ regulator_notifier_call_chain(priv->rdev[j],
+ REGULATOR_ERROR_OVER_TEMP,
+ NULL);
+ }
+ }
+
+ /* Warning if CHIP OT occur */
+ if (events & RT5190A_CHIPOT_MASK)
+ dev_warn(priv->dev, "CHIP overheat\n");
+
+ return IRQ_HANDLED;
+}
+
+static unsigned int rt5190a_of_map_mode(unsigned int mode)
+{
+ switch (mode) {
+ case RT5190A_OPMODE_AUTO:
+ return REGULATOR_MODE_NORMAL;
+ case RT5190A_OPMODE_FPWM:
+ return REGULATOR_MODE_FAST;
+ default:
+ return REGULATOR_MODE_INVALID;
+ }
+}
+
+static int rt5190a_of_parse_cb(struct rt5190a_priv *priv, int rid,
+ struct of_regulator_match *match)
+{
+ struct regulator_desc *desc = priv->rdesc + rid;
+ struct regulator_init_data *init_data = match->init_data;
+ struct device_node *np = match->of_node;
+ bool latchup_enable;
+ unsigned int mask = RT5190A_RID_BITMASK(rid), val;
+
+ switch (rid) {
+ case RT5190A_IDX_BUCK1:
+ case RT5190A_IDX_BUCK4:
+ case RT5190A_IDX_LDO:
+ init_data->constraints.apply_uV = 0;
+
+ if (init_data->constraints.min_uV ==
+ init_data->constraints.max_uV)
+ desc->fixed_uV = init_data->constraints.min_uV;
+ else {
+ dev_err(priv->dev,
+ "Variable voltage for fixed regulator\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ latchup_enable = of_property_read_bool(np, "richtek,latchup-enable");
+
+ /* latchup: 0, default hiccup: 1 */
+ val = !latchup_enable ? mask : 0;
+
+ return regmap_update_bits(priv->regmap, RT5190A_REG_PROTMODE, mask, val);
+}
+
+static void rt5190a_fillin_regulator_desc(struct regulator_desc *desc, int rid)
+{
+ static const char * const regu_name[] = { "buck1", "buck2",
+ "buck3", "buck4",
+ "ldo" };
+ static const char * const supply[] = { NULL, "vin2", "vin3", "vin4",
+ "vinldo" };
+
+ desc->name = regu_name[rid];
+ desc->supply_name = supply[rid];
+ desc->owner = THIS_MODULE;
+ desc->type = REGULATOR_VOLTAGE;
+ desc->id = rid;
+ desc->enable_reg = RT5190A_REG_ENABLE;
+ desc->enable_mask = RT5190A_RID_BITMASK(rid);
+ desc->active_discharge_reg = RT5190A_REG_DISCHARGE;
+ desc->active_discharge_mask = RT5190A_RID_BITMASK(rid);
+ desc->active_discharge_on = RT5190A_RID_BITMASK(rid);
+
+ switch (rid) {
+ case RT5190A_IDX_BUCK1:
+ desc->active_discharge_mask = RT5190A_BUCK1_DISCHG_MASK;
+ desc->active_discharge_on = RT5190A_BUCK1_DISCHG_ONVAL;
+ desc->n_voltages = 1;
+ desc->ops = &rt5190a_fixed_buck_ops;
+ desc->of_map_mode = rt5190a_of_map_mode;
+ break;
+ case RT5190A_IDX_BUCK2:
+ desc->vsel_reg = RT5190A_REG_BUCK2VSEL;
+ desc->vsel_mask = RT5190A_VSEL_MASK;
+ desc->min_uV = RT5190A_BUCK23_MINUV;
+ desc->uV_step = RT5190A_BUCK23_STEPUV;
+ desc->n_voltages = RT5190A_BUCK23_STEPNUM;
+ desc->ops = &rt5190a_ranged_buck_ops;
+ break;
+ case RT5190A_IDX_BUCK3:
+ desc->vsel_reg = RT5190A_REG_BUCK3VSEL;
+ desc->vsel_mask = RT5190A_VSEL_MASK;
+ desc->min_uV = RT5190A_BUCK23_MINUV;
+ desc->uV_step = RT5190A_BUCK23_STEPUV;
+ desc->n_voltages = RT5190A_BUCK23_STEPNUM;
+ desc->ops = &rt5190a_ranged_buck_ops;
+ break;
+ case RT5190A_IDX_BUCK4:
+ desc->n_voltages = 1;
+ desc->ops = &rt5190a_fixed_buck_ops;
+ desc->of_map_mode = rt5190a_of_map_mode;
+ break;
+ case RT5190A_IDX_LDO:
+ desc->n_voltages = 1;
+ desc->ops = &rt5190a_fixed_ldo_ops;
+ break;
+ }
+}
+
+static struct of_regulator_match rt5190a_regulator_match[] = {
+ { .name = "buck1", },
+ { .name = "buck2", },
+ { .name = "buck3", },
+ { .name = "buck4", },
+ { .name = "ldo", }
+};
+
+static int rt5190a_parse_regulator_dt_data(struct rt5190a_priv *priv)
+{
+ struct device_node *regulator_np;
+ struct regulator_desc *reg_desc;
+ struct of_regulator_match *match;
+ int i, ret;
+
+ for (i = 0; i < RT5190A_MAX_IDX; i++) {
+ reg_desc = priv->rdesc + i;
+ match = rt5190a_regulator_match + i;
+
+ rt5190a_fillin_regulator_desc(reg_desc, i);
+
+ match->desc = reg_desc;
+ }
+
+ regulator_np = of_get_child_by_name(priv->dev->of_node, "regulators");
+ if (!regulator_np) {
+ dev_err(priv->dev, "Could not find 'regulators' node\n");
+ return -ENODEV;
+ }
+
+ ret = of_regulator_match(priv->dev, regulator_np,
+ rt5190a_regulator_match,
+ ARRAY_SIZE(rt5190a_regulator_match));
+
+ of_node_put(regulator_np);
+
+ if (ret < 0) {
+ dev_err(priv->dev,
+ "Error parsing regulator init data: %d\n", ret);
+ return ret;
+ }
+
+ for (i = 0; i < RT5190A_MAX_IDX; i++) {
+ match = rt5190a_regulator_match + i;
+
+ ret = rt5190a_of_parse_cb(priv, i, match);
+ if (ret) {
+ dev_err(priv->dev, "Failed in [%d] of_parse_cb\n", i);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct reg_sequence rt5190a_init_patch[] = {
+ { 0x09, 0x3d, },
+ { 0x0a, 0x3e, },
+ { 0x0b, 0x01, },
+ { 0x10, 0xff, },
+ { 0x11, 0xff, },
+ { 0x12, 0xff, },
+ { 0x13, 0xff, },
+ { 0x14, 0, },
+ { 0x15, 0, },
+ { 0x16, 0x3e, },
+ { 0x17, 0, }
+};
+
+static int rt5190a_device_initialize(struct rt5190a_priv *priv)
+{
+ bool mute_enable;
+ int ret;
+
+ ret = regmap_register_patch(priv->regmap, rt5190a_init_patch,
+ ARRAY_SIZE(rt5190a_init_patch));
+ if (ret) {
+ dev_err(priv->dev, "Failed to do register patch\n");
+ return ret;
+ }
+
+ mute_enable = device_property_read_bool(priv->dev,
+ "richtek,mute-enable");
+
+ if (mute_enable) {
+ ret = regmap_write(priv->regmap, RT5190A_REG_MUTECNTL, 0x00);
+ if (ret) {
+ dev_err(priv->dev, "Failed to enable mute function\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int rt5190a_device_check(struct rt5190a_priv *priv)
+{
+ u16 devid;
+ int ret;
+
+ ret = regmap_raw_read(priv->regmap, RT5190A_REG_MANUFACTURE, &devid,
+ sizeof(devid));
+ if (ret)
+ return ret;
+
+ if (devid) {
+ dev_err(priv->dev, "Incorrect device id 0x%04x\n", devid);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static const struct regmap_config rt5190a_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = RT5190A_REG_HOTDIEMASK,
+};
+
+static int rt5190a_probe(struct i2c_client *i2c)
+{
+ struct rt5190a_priv *priv;
+ struct regulator_config cfg = {};
+ int i, ret;
+
+ priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = &i2c->dev;
+
+ priv->regmap = devm_regmap_init_i2c(i2c, &rt5190a_regmap_config);
+ if (IS_ERR(priv->regmap)) {
+ dev_err(&i2c->dev, "Failed to allocate regmap\n");
+ return PTR_ERR(priv->regmap);
+ }
+
+ ret = rt5190a_device_check(priv);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to check device %d\n", ret);
+ return ret;
+ }
+
+ ret = rt5190a_device_initialize(priv);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to initialize the device\n");
+ return ret;
+ }
+
+ ret = rt5190a_parse_regulator_dt_data(priv);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to parse regulator dt\n");
+ return ret;
+ }
+
+ cfg.dev = &i2c->dev;
+ cfg.regmap = priv->regmap;
+
+ for (i = 0; i < RT5190A_MAX_IDX; i++) {
+ struct regulator_desc *desc = priv->rdesc + i;
+ struct of_regulator_match *match = rt5190a_regulator_match + i;
+
+ cfg.init_data = match->init_data;
+ cfg.of_node = match->of_node;
+
+ priv->rdev[i] = devm_regulator_register(&i2c->dev, desc, &cfg);
+ if (IS_ERR(priv->rdev[i])) {
+ dev_err(&i2c->dev, "Failed to register regulator %s\n",
+ desc->name);
+ return PTR_ERR(priv->rdev[i]);
+ }
+ }
+
+ if (i2c->irq) {
+ ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+ rt5190a_irq_handler,
+ IRQF_ONESHOT,
+ dev_name(&i2c->dev), priv);
+ if (ret) {
+ dev_err(&i2c->dev, "Failed to register interrupt\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct of_device_id __maybe_unused rt5190a_device_table[] = {
+ { .compatible = "richtek,rt5190a", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rt5190a_device_table);
+
+static struct i2c_driver rt5190a_driver = {
+ .driver = {
+ .name = "rt5190a",
+ .of_match_table = rt5190a_device_table,
+ },
+ .probe_new = rt5190a_probe,
+};
+module_i2c_driver(rt5190a_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RT5190A Regulator Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/sc2731-regulator.c b/drivers/regulator/sc2731-regulator.c
index 0f21f95c8981..71e5ceb679f4 100644
--- a/drivers/regulator/sc2731-regulator.c
+++ b/drivers/regulator/sc2731-regulator.c
@@ -1,4 +1,4 @@
- //SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017 Spreadtrum Communications Inc.
*/
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c
index 2931a0b89bff..bd7b2f287250 100644
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -42,7 +42,7 @@
/**
* struct ti_abb_info - ABB information per voltage setting
* @opp_sel: one of TI_ABB macro
- * @vset: (optional) vset value that LDOVBB needs to be overriden with.
+ * @vset: (optional) vset value that LDOVBB needs to be overridden with.
*
* Array of per voltage entries organized in the same order as regulator_desc's
* volt_table list. (selector is used to index from this array)
@@ -484,7 +484,7 @@ static int ti_abb_init_timings(struct device *dev, struct ti_abb *abb)
/* Calculate cycle rate */
cycle_rate = DIV_ROUND_CLOSEST(clock_cycles * 10, clk_rate);
- /* Calulate SR2_WTCNT_VALUE */
+ /* Calculate SR2_WTCNT_VALUE */
sr2_wt_cnt_val = DIV_ROUND_CLOSEST(abb->settling_time * 10, cycle_rate);
dev_dbg(dev, "%s: Clk_rate=%ld, sr2_cnt=0x%08x\n", __func__,
@@ -688,7 +688,7 @@ MODULE_DEVICE_TABLE(of, ti_abb_of_match);
* @pdev: ABB platform device
*
* Initializes an individual ABB LDO for required Body-Bias. ABB is used to
- * addional bias supply to SoC modules for power savings or mandatory stability
+ * additional bias supply to SoC modules for power savings or mandatory stability
* configuration at certain Operating Performance Points(OPPs).
*
* Return: 0 on success or appropriate error value when fails
diff --git a/drivers/regulator/tps6286x-regulator.c b/drivers/regulator/tps6286x-regulator.c
new file mode 100644
index 000000000000..e29deda30d75
--- /dev/null
+++ b/drivers/regulator/tps6286x-regulator.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright Axis Communications AB
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/driver.h>
+
+#include <dt-bindings/regulator/ti,tps62864.h>
+
+#define TPS6286X_VOUT1 0x01
+#define TPS6286X_VOUT1_VO1_SET GENMASK(7, 0)
+
+#define TPS6286X_CONTROL 0x03
+#define TPS6286X_CONTROL_FPWM BIT(4)
+#define TPS6286X_CONTROL_SWEN BIT(5)
+
+#define TPS6286X_MIN_MV 400
+#define TPS6286X_MAX_MV 1675
+#define TPS6286X_STEP_MV 5
+
+static const struct regmap_config tps6286x_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+};
+
+static int tps6286x_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+ unsigned int val;
+
+ switch (mode) {
+ case REGULATOR_MODE_NORMAL:
+ val = 0;
+ break;
+ case REGULATOR_MODE_FAST:
+ val = TPS6286X_CONTROL_FPWM;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(rdev->regmap, TPS6286X_CONTROL,
+ TPS6286X_CONTROL_FPWM, val);
+}
+
+static unsigned int tps6286x_get_mode(struct regulator_dev *rdev)
+{
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read(rdev->regmap, TPS6286X_CONTROL, &val);
+ if (ret < 0)
+ return 0;
+
+ return (val & TPS6286X_CONTROL_FPWM) ? REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL;
+}
+
+static const struct regulator_ops tps6286x_regulator_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .set_mode = tps6286x_set_mode,
+ .get_mode = tps6286x_get_mode,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear,
+};
+
+static unsigned int tps6286x_of_map_mode(unsigned int mode)
+{
+ switch (mode) {
+ case TPS62864_MODE_NORMAL:
+ return REGULATOR_MODE_NORMAL;
+ case TPS62864_MODE_FPWM:
+ return REGULATOR_MODE_FAST;
+ default:
+ return REGULATOR_MODE_INVALID;
+ }
+}
+
+static const struct regulator_desc tps6286x_reg = {
+ .name = "tps6286x",
+ .of_match = of_match_ptr("SW"),
+ .owner = THIS_MODULE,
+ .ops = &tps6286x_regulator_ops,
+ .of_map_mode = tps6286x_of_map_mode,
+ .regulators_node = of_match_ptr("regulators"),
+ .type = REGULATOR_VOLTAGE,
+ .n_voltages = ((TPS6286X_MAX_MV - TPS6286X_MIN_MV) / TPS6286X_STEP_MV) + 1,
+ .min_uV = TPS6286X_MIN_MV * 1000,
+ .uV_step = TPS6286X_STEP_MV * 1000,
+ .vsel_reg = TPS6286X_VOUT1,
+ .vsel_mask = TPS6286X_VOUT1_VO1_SET,
+ .enable_reg = TPS6286X_CONTROL,
+ .enable_mask = TPS6286X_CONTROL_SWEN,
+ .ramp_delay = 1000,
+ /* tDelay + tRamp, rounded up */
+ .enable_time = 3000,
+};
+
+static const struct of_device_id tps6286x_dt_ids[] = {
+ { .compatible = "ti,tps62864", },
+ { .compatible = "ti,tps62866", },
+ { .compatible = "ti,tps62868", },
+ { .compatible = "ti,tps62869", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, tps6286x_dt_ids);
+
+static int tps6286x_i2c_probe(struct i2c_client *i2c,
+ const struct i2c_device_id *id)
+{
+ struct device *dev = &i2c->dev;
+ struct regulator_config config = {};
+ struct regulator_dev *rdev;
+ struct regmap *regmap;
+
+ regmap = devm_regmap_init_i2c(i2c, &tps6286x_regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ config.dev = &i2c->dev;
+ config.of_node = dev->of_node;
+ config.regmap = regmap;
+
+ rdev = devm_regulator_register(&i2c->dev, &tps6286x_reg, &config);
+ if (IS_ERR(rdev)) {
+ dev_err(&i2c->dev, "Failed to register tps6286x regulator\n");
+ return PTR_ERR(rdev);
+ }
+
+ return 0;
+}
+
+static const struct i2c_device_id tps6286x_i2c_id[] = {
+ { "tps62864", 0 },
+ { "tps62866", 0 },
+ { "tps62868", 0 },
+ { "tps62869", 0 },
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, tps6286x_i2c_id);
+
+static struct i2c_driver tps6286x_regulator_driver = {
+ .driver = {
+ .name = "tps6286x",
+ .of_match_table = of_match_ptr(tps6286x_dt_ids),
+ },
+ .probe = tps6286x_i2c_probe,
+ .id_table = tps6286x_i2c_id,
+};
+
+module_i2c_driver(tps6286x_regulator_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c
index d2a37978fc3a..aac7be3b33f7 100644
--- a/drivers/regulator/vctrl-regulator.c
+++ b/drivers/regulator/vctrl-regulator.c
@@ -185,10 +185,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev,
unsigned int next_sel;
int delay;
- if (selector >= vctrl->vtable[vctrl->sel].ovp_min_sel)
- next_sel = selector;
- else
- next_sel = vctrl->vtable[vctrl->sel].ovp_min_sel;
+ next_sel = max_t(unsigned int, selector, vctrl->vtable[vctrl->sel].ovp_min_sel);
ret = regulator_set_voltage_rdev(rdev->supply->rdev,
vctrl->vtable[next_sel].ctrl,
diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 52c5a0e0acd8..5d32628a5011 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -13,6 +13,7 @@
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/of.h>
struct virtual_consumer_data {
struct mutex lock;
@@ -281,26 +282,53 @@ static const struct attribute_group regulator_virtual_attr_group = {
.attrs = regulator_virtual_attributes,
};
+#ifdef CONFIG_OF
+static const struct of_device_id regulator_virtual_consumer_of_match[] = {
+ { .compatible = "regulator-virtual-consumer" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, regulator_virtual_consumer_of_match);
+#endif
+
static int regulator_virtual_probe(struct platform_device *pdev)
{
char *reg_id = dev_get_platdata(&pdev->dev);
struct virtual_consumer_data *drvdata;
+ static bool warned;
int ret;
+ if (!warned) {
+ warned = true;
+ pr_warn("**********************************************************\n");
+ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warn("** **\n");
+ pr_warn("** regulator-virtual-consumer is only for testing and **\n");
+ pr_warn("** debugging. Do not use it in a production kernel. **\n");
+ pr_warn("** **\n");
+ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
+ pr_warn("**********************************************************\n");
+ }
+
drvdata = devm_kzalloc(&pdev->dev, sizeof(struct virtual_consumer_data),
GFP_KERNEL);
if (drvdata == NULL)
return -ENOMEM;
+ /*
+ * This virtual consumer does not have any hardware-defined supply
+ * name, so just allow the regulator to be specified in a property
+ * named "default-supply" when we're being probed from devicetree.
+ */
+ if (!reg_id && pdev->dev.of_node)
+ reg_id = "default";
+
mutex_init(&drvdata->lock);
drvdata->regulator = devm_regulator_get(&pdev->dev, reg_id);
- if (IS_ERR(drvdata->regulator)) {
- ret = PTR_ERR(drvdata->regulator);
- dev_err(&pdev->dev, "Failed to obtain supply '%s': %d\n",
- reg_id, ret);
- return ret;
- }
+ if (IS_ERR(drvdata->regulator))
+ return dev_err_probe(&pdev->dev, PTR_ERR(drvdata->regulator),
+ "Failed to obtain supply '%s'\n",
+ reg_id);
ret = sysfs_create_group(&pdev->dev.kobj,
&regulator_virtual_attr_group);
@@ -334,6 +362,7 @@ static struct platform_driver regulator_virtual_consumer_driver = {
.remove = regulator_virtual_remove,
.driver = {
.name = "reg-virt-consumer",
+ .of_match_table = of_match_ptr(regulator_virtual_consumer_of_match),
},
};
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 6579bfdb0c26..b1d5aac8917d 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1112,7 +1112,7 @@ static int wm8350_regulator_probe(struct platform_device *pdev)
if (pdev->id < WM8350_DCDC_1 || pdev->id > WM8350_ISINK_B)
return -ENODEV;
- /* do any regulatior specific init */
+ /* do any regulator specific init */
switch (pdev->id) {
case WM8350_DCDC_1:
val = wm8350_reg_read(wm8350, WM8350_DCDC1_LOW_POWER);
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 3ddd426fc969..166019786653 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -180,6 +180,7 @@ config QCOM_Q6V5_ADSP
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_PIL_INFO
select QCOM_MDT_LOADER
@@ -199,6 +200,7 @@ config QCOM_Q6V5_MSS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_MDT_LOADER
select QCOM_PIL_INFO
@@ -218,6 +220,7 @@ config QCOM_Q6V5_PAS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_PIL_INFO
select QCOM_MDT_LOADER
@@ -239,6 +242,7 @@ config QCOM_Q6V5_WCSS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
+ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
select MFD_SYSCON
select QCOM_MDT_LOADER
select QCOM_PIL_INFO
diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c
index eada7e34f3af..442a388f8102 100644
--- a/drivers/remoteproc/qcom_q6v5.c
+++ b/drivers/remoteproc/qcom_q6v5.c
@@ -10,6 +10,7 @@
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/soc/qcom/qcom_aoss.h>
#include <linux/soc/qcom/smem.h>
#include <linux/soc/qcom/smem_state.h>
#include <linux/remoteproc.h>
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index d6214cb66026..5663cf799c95 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -93,7 +93,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data)
/* wake up any blocked readers */
wake_up_interruptible(&eptdev->readq);
- device_del(&eptdev->dev);
+ cdev_device_del(&eptdev->cdev, &eptdev->dev);
put_device(&eptdev->dev);
return 0;
@@ -336,7 +336,6 @@ static void rpmsg_eptdev_release_device(struct device *dev)
ida_simple_remove(&rpmsg_ept_ida, dev->id);
ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt));
- cdev_del(&eptdev->cdev);
kfree(eptdev);
}
@@ -381,19 +380,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev,
dev->id = ret;
dev_set_name(dev, "rpmsg%d", ret);
- ret = cdev_add(&eptdev->cdev, dev->devt, 1);
+ ret = cdev_device_add(&eptdev->cdev, &eptdev->dev);
if (ret)
goto free_ept_ida;
/* We can now rely on the release function for cleanup */
dev->release = rpmsg_eptdev_release_device;
- ret = device_add(dev);
- if (ret) {
- dev_err(dev, "device_add failed: %d\n", ret);
- put_device(dev);
- }
-
return ret;
free_ept_ida:
@@ -462,7 +455,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev)
ida_simple_remove(&rpmsg_ctrl_ida, dev->id);
ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt));
- cdev_del(&ctrldev->cdev);
kfree(ctrldev);
}
@@ -497,19 +489,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev)
dev->id = ret;
dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret);
- ret = cdev_add(&ctrldev->cdev, dev->devt, 1);
+ ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev);
if (ret)
goto free_ctrl_ida;
/* We can now rely on the release function for cleanup */
dev->release = rpmsg_ctrldev_release_device;
- ret = device_add(dev);
- if (ret) {
- dev_err(&rpdev->dev, "device_add failed: %d\n", ret);
- put_device(dev);
- }
-
dev_set_drvdata(&rpdev->dev, ctrldev);
return ret;
@@ -535,7 +521,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev)
if (ret)
dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret);
- device_del(&ctrldev->dev);
+ cdev_device_del(&ctrldev->cdev, &ctrldev->dev);
put_device(&ctrldev->dev);
}
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 2f83adef966e..6d66ab5a8b17 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -185,10 +185,9 @@ static int ds1302_probe(struct spi_device *spi)
return 0;
}
-static int ds1302_remove(struct spi_device *spi)
+static void ds1302_remove(struct spi_device *spi)
{
spi_set_drvdata(spi, NULL);
- return 0;
}
#ifdef CONFIG_OF
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 9ef107b99b65..ed9360486953 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -720,7 +720,7 @@ static int ds1305_probe(struct spi_device *spi)
return 0;
}
-static int ds1305_remove(struct spi_device *spi)
+static void ds1305_remove(struct spi_device *spi)
{
struct ds1305 *ds1305 = spi_get_drvdata(spi);
@@ -730,8 +730,6 @@ static int ds1305_remove(struct spi_device *spi)
devm_free_irq(&spi->dev, spi->irq, ds1305);
cancel_work_sync(&ds1305->work);
}
-
- return 0;
}
static struct spi_driver ds1305_driver = {
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index f14ed6c96437..ed5a6ba89a3e 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -434,11 +434,9 @@ static int ds1343_probe(struct spi_device *spi)
return 0;
}
-static int ds1343_remove(struct spi_device *spi)
+static void ds1343_remove(struct spi_device *spi)
{
dev_pm_clear_wake_irq(&spi->dev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 8b458010f88a..3b7af00a7825 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -47,7 +47,6 @@
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
-#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/interrupt.h>
#include <linux/log2.h>
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 61ecdcb2cc6a..2a9c0ddcade5 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -15,7 +15,6 @@
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
-#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <asm/eadm.h>
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index a05a4297cfae..af82b3214774 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -6,7 +6,6 @@
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
-#include <linux/genhd.h>
#include <linux/list.h>
#include <asm/debug.h>
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index cd938a26b76c..3b1cd0c96a74 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1180,7 +1180,7 @@ static int io_subchannel_chp_event(struct subchannel *sch,
else
path_event[chpid] = PE_NONE;
}
- if (cdev)
+ if (cdev && cdev->drv && cdev->drv->path_event)
cdev->drv->path_event(cdev, path_event);
break;
}
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index d24cafe02708..511bf8e0a436 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data)
goto out;
}
+ /* re-init to undo drop from zfcp_fc_adisc() */
+ port->d_id = ntoh24(adisc_resp->adisc_port_id);
/* port is good, unblock rport without going through erp */
zfcp_scsi_schedule_rport_register(port);
out:
@@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
struct zfcp_fc_req *fc_req;
struct zfcp_adapter *adapter = port->adapter;
struct Scsi_Host *shost = adapter->scsi_host;
+ u32 d_id;
int ret;
fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC);
@@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
fc_req->u.adisc.req.adisc_cmd = ELS_ADISC;
hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost));
- ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els,
+ d_id = port->d_id; /* remember as destination for send els below */
+ /*
+ * Force fresh GID_PN lookup on next port recovery.
+ * Must happen after request setup and before sending request,
+ * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler().
+ */
+ port->d_id = 0;
+
+ ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els,
ZFCP_FC_CTELS_TMO);
if (ret)
kmem_cache_free(zfcp_fc_req_cache, fc_req);
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index b9482da79512..3ebe66151dcb 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
pci_try_set_mwi(pdev);
retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (retval)
- retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (retval) {
TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask");
retval = -ENODEV;
@@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev)
pci_try_set_mwi(pdev);
retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (retval)
- retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (retval) {
TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume");
retval = -ENODEV;
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 3ad3ebaca8e9..ad4972c0fc53 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id)
struct scsi_cmnd *SCp = hostdata->cmd;
handled = 1;
- SCp = hostdata->cmd;
if(istat & SCSI_INT_PENDING) {
udelay(10);
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 440ef32be048..e5aa982ffedc 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad)
pci_set_master(pdev);
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (rc)
- rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-
if (rc) {
rc = -ENODEV;
printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev);
@@ -1560,9 +1557,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev)
rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64));
if (rc)
- rc = dma_set_mask_and_coherent(&bfad->pcidev->dev,
- DMA_BIT_MASK(32));
- if (rc)
goto out_disable_device;
if (restart_bfa(bfad) == -1)
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 71fa62bd3083..a826456c6075 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba);
static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba);
static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
struct device *parent, int npiv);
-static void bnx2fc_destroy_work(struct work_struct *work);
+static void bnx2fc_port_destroy(struct fcoe_port *port);
static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev);
static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device
@@ -508,7 +508,8 @@ static int bnx2fc_l2_rcv_thread(void *arg)
static void bnx2fc_recv_frame(struct sk_buff *skb)
{
- u32 fr_len;
+ u64 crc_err;
+ u32 fr_len, fr_crc;
struct fc_lport *lport;
struct fcoe_rcv_info *fr;
struct fc_stats *stats;
@@ -542,6 +543,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
skb_pull(skb, sizeof(struct fcoe_hdr));
fr_len = skb->len - sizeof(struct fcoe_crc_eof);
+ stats = per_cpu_ptr(lport->stats, get_cpu());
+ stats->RxFrames++;
+ stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+ put_cpu();
+
fp = (struct fc_frame *)skb;
fc_frame_init(fp);
fr_dev(fp) = lport;
@@ -624,16 +630,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
return;
}
- stats = per_cpu_ptr(lport->stats, smp_processor_id());
- stats->RxFrames++;
- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+ fr_crc = le32_to_cpu(fr_crc(fp));
- if (le32_to_cpu(fr_crc(fp)) !=
- ~crc32(~0, skb->data, fr_len)) {
- if (stats->InvalidCRCCount < 5)
+ if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) {
+ stats = per_cpu_ptr(lport->stats, get_cpu());
+ crc_err = (stats->InvalidCRCCount++);
+ put_cpu();
+ if (crc_err < 5)
printk(KERN_WARNING PFX "dropping frame with "
"CRC error\n");
- stats->InvalidCRCCount++;
kfree_skb(skb);
return;
}
@@ -907,9 +912,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event,
__bnx2fc_destroy(interface);
}
mutex_unlock(&bnx2fc_dev_lock);
-
- /* Ensure ALL destroy work has been completed before return */
- flush_workqueue(bnx2fc_wq);
return;
default:
@@ -1215,8 +1217,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport)
mutex_unlock(&n_port->lp_mutex);
bnx2fc_free_vport(interface->hba, port->lport);
bnx2fc_port_shutdown(port->lport);
+ bnx2fc_port_destroy(port);
bnx2fc_interface_put(interface);
- queue_work(bnx2fc_wq, &port->destroy_work);
return 0;
}
@@ -1525,7 +1527,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
port->lport = lport;
port->priv = interface;
port->get_netdev = bnx2fc_netdev;
- INIT_WORK(&port->destroy_work, bnx2fc_destroy_work);
/* Configure fcoe_port */
rc = bnx2fc_lport_config(lport);
@@ -1653,8 +1654,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface)
bnx2fc_interface_cleanup(interface);
bnx2fc_stop(interface);
list_del(&interface->list);
+ bnx2fc_port_destroy(port);
bnx2fc_interface_put(interface);
- queue_work(bnx2fc_wq, &port->destroy_work);
}
/**
@@ -1694,15 +1695,12 @@ netdev_err:
return rc;
}
-static void bnx2fc_destroy_work(struct work_struct *work)
+static void bnx2fc_port_destroy(struct fcoe_port *port)
{
- struct fcoe_port *port;
struct fc_lport *lport;
- port = container_of(work, struct fcoe_port, destroy_work);
lport = port->lport;
-
- BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n");
+ BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport);
bnx2fc_if_destroy(lport);
}
@@ -2556,9 +2554,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev)
__bnx2fc_destroy(interface);
mutex_unlock(&bnx2fc_dev_lock);
- /* Ensure ALL destroy work has been completed before return */
- flush_workqueue(bnx2fc_wq);
-
bnx2fc_ulp_stop(hba);
/* unregister cnic device */
if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic))
diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c
index 7bb4f9aad2c8..84bc81d7ce76 100644
--- a/drivers/scsi/elx/libefc/efc_els.c
+++ b/drivers/scsi/elx/libefc/efc_els.c
@@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
efc = node->efc;
- spin_lock_irqsave(&node->els_ios_lock, flags);
-
if (!node->els_io_enabled) {
efc_log_err(efc, "els io alloc disabled\n");
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC);
if (!els) {
atomic_add_return(1, &efc->els_io_alloc_failed_count);
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
@@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
&els->io.req.phys, GFP_KERNEL);
if (!els->io.req.virt) {
mempool_free(els, efc->els_io_pool);
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return NULL;
}
@@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
/* add els structure to ELS IO list */
INIT_LIST_HEAD(&els->list_entry);
+ spin_lock_irqsave(&node->els_ios_lock, flags);
list_add_tail(&els->list_entry, &node->els_ios_list);
+ spin_unlock_irqrestore(&node->els_ios_lock, flags);
}
- spin_unlock_irqrestore(&node->els_ios_lock, flags);
return els;
}
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 88c549f257db..40a52feb315d 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -986,8 +986,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
CMD_SP(sc) = NULL;
CMD_FLAGS(sc) |= FNIC_IO_DONE;
- spin_unlock_irqrestore(io_lock, flags);
-
if (hdr_status != FCPIO_SUCCESS) {
atomic64_inc(&fnic_stats->io_stats.io_failures);
shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
@@ -996,8 +994,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
fnic_release_ioreq_buf(fnic, io_req, sc);
- mempool_free(io_req, fnic->io_req_pool);
-
cmd_trace = ((u64)hdr_status << 56) |
(u64)icmnd_cmpl->scsi_status << 48 |
(u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 |
@@ -1021,6 +1017,12 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
} else
fnic->lport->host_stats.fcp_control_requests++;
+ /* Call SCSI completion function to complete the IO */
+ scsi_done(sc);
+ spin_unlock_irqrestore(io_lock, flags);
+
+ mempool_free(io_req, fnic->io_req_pool);
+
atomic64_dec(&fnic_stats->io_stats.active_ios);
if (atomic64_read(&fnic->io_cmpl_skip))
atomic64_dec(&fnic->io_cmpl_skip);
@@ -1049,9 +1051,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
if(io_duration_time > atomic64_read(&fnic_stats->io_stats.current_max_io_time))
atomic64_set(&fnic_stats->io_stats.current_max_io_time, io_duration_time);
}
-
- /* Call SCSI completion function to complete the IO */
- scsi_done(sc);
}
/* fnic_fcpio_itmf_cmpl_handler
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index a05ec7aece5a..ebf5ec38891b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -400,8 +400,7 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba,
struct hisi_sas_slot *slot,
struct hisi_sas_dq *dq,
struct hisi_sas_device *sas_dev,
- struct hisi_sas_internal_abort *abort,
- struct hisi_sas_tmf_task *tmf)
+ struct hisi_sas_internal_abort *abort)
{
struct hisi_sas_cmd_hdr *cmd_hdr_base;
int dlvry_queue_slot, dlvry_queue;
@@ -427,8 +426,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba,
cmd_hdr_base = hisi_hba->cmd_hdr[dlvry_queue];
slot->cmd_hdr = &cmd_hdr_base[dlvry_queue_slot];
- slot->tmf = tmf;
- slot->is_internal = tmf;
task->lldd_task = slot;
memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));
@@ -587,7 +584,7 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
slot->is_internal = tmf;
/* protect task_prep and start_delivery sequence */
- hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL, tmf);
+ hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL);
return 0;
@@ -1380,12 +1377,13 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device)
struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
struct device *dev = hisi_hba->dev;
int s = sizeof(struct host_to_dev_fis);
+ struct hisi_sas_tmf_task tmf = {};
ata_for_each_link(link, ap, EDGE) {
int pmp = sata_srst_pmp(link);
hisi_sas_fill_ata_reset_cmd(link->device, 1, pmp, fis);
- rc = hisi_sas_exec_internal_tmf_task(device, fis, s, NULL);
+ rc = hisi_sas_exec_internal_tmf_task(device, fis, s, &tmf);
if (rc != TMF_RESP_FUNC_COMPLETE)
break;
}
@@ -1396,7 +1394,7 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device)
hisi_sas_fill_ata_reset_cmd(link->device, 0, pmp, fis);
rc = hisi_sas_exec_internal_tmf_task(device, fis,
- s, NULL);
+ s, &tmf);
if (rc != TMF_RESP_FUNC_COMPLETE)
dev_err(dev, "ata disk %016llx de-reset failed\n",
SAS_ADDR(device->sas_addr));
@@ -2067,7 +2065,7 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
slot->port = port;
slot->is_internal = true;
- hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort, NULL);
+ hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort);
return 0;
@@ -2666,9 +2664,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev,
goto err_out;
error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
- if (error)
- error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
if (error) {
dev_err(dev, "No usable DMA addressing method\n");
goto err_out;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a45ef9a5e12e..a01a3a7b706b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_out;
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
- if (rc)
- rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(dev, "No usable DMA addressing method\n");
rc = -ENODEV;
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 4878c94761f9..98cabe09c040 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -592,6 +592,7 @@ struct lpfc_vport {
#define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */
#define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */
#define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/
+#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */
#define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */
#define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */
#define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */
@@ -1161,6 +1162,16 @@ struct lpfc_hba {
uint32_t cfg_hostmem_hgp;
uint32_t cfg_log_verbose;
uint32_t cfg_enable_fc4_type;
+#define LPFC_ENABLE_FCP 1
+#define LPFC_ENABLE_NVME 2
+#define LPFC_ENABLE_BOTH 3
+#if (IS_ENABLED(CONFIG_NVME_FC))
+#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
+#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
+#else
+#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP
+#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP
+#endif
uint32_t cfg_aer_support;
uint32_t cfg_sriov_nr_virtfn;
uint32_t cfg_request_firmware_upgrade;
@@ -1182,9 +1193,6 @@ struct lpfc_hba {
uint32_t cfg_ras_fwlog_func;
uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */
uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */
-#define LPFC_ENABLE_FCP 1
-#define LPFC_ENABLE_NVME 2
-#define LPFC_ENABLE_BOTH 3
uint32_t cfg_enable_pbde;
uint32_t cfg_enable_mi;
struct nvmet_fc_target_port *targetport;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 7a7f17d71811..fa8415259cb8 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1315,6 +1315,9 @@ lpfc_issue_lip(struct Scsi_Host *shost)
pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK;
pmboxq->u.mb.mbxOwner = OWN_HOST;
+ if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME))
+ vport->fc_flag &= ~FC_PT2PT_NO_NVME;
+
mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
if ((mbxstatus == MBX_SUCCESS) &&
@@ -3978,8 +3981,8 @@ LPFC_ATTR_R(nvmet_mrq_post,
* 3 - register both FCP and NVME
* Supported values are [1,3]. Default value is 3
*/
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
- LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_DEF_ENBL_FC4_TYPE,
+ LPFC_ENABLE_FCP, LPFC_MAX_ENBL_FC4_TYPE,
"Enable FC4 Protocol support - FCP / NVME");
/*
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index db5ccae1b63d..f936833c9909 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1072,7 +1072,8 @@ stop_rr_fcf_flogi:
/* FLOGI failed, so there is no fabric */
spin_lock_irq(shost->host_lock);
- vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP |
+ FC_PT2PT_NO_NVME);
spin_unlock_irq(shost->host_lock);
/* If private loop, then allow max outstanding els to be
@@ -4607,6 +4608,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
/* Added for Vendor specifc support
* Just keep retrying for these Rsn / Exp codes
*/
+ if ((vport->fc_flag & FC_PT2PT) &&
+ cmd == ELS_CMD_NVMEPRLI) {
+ switch (stat.un.b.lsRjtRsnCode) {
+ case LSRJT_UNABLE_TPC:
+ case LSRJT_INVALID_CMD:
+ case LSRJT_LOGICAL_ERR:
+ case LSRJT_CMD_UNSUPPORTED:
+ lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+ "0168 NVME PRLI LS_RJT "
+ "reason %x port doesn't "
+ "support NVME, disabling NVME\n",
+ stat.un.b.lsRjtRsnCode);
+ retry = 0;
+ vport->fc_flag |= FC_PT2PT_NO_NVME;
+ goto out_retry;
+ }
+ }
switch (stat.un.b.lsRjtRsnCode) {
case LSRJT_UNABLE_TPC:
/* The driver has a VALID PLOGI but the rport has
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a56f01f659f8..558f7d2559c4 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2104,7 +2104,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
}
if (reg_err1 == SLIPORT_ERR1_REG_ERR_CODE_2 &&
reg_err2 == SLIPORT_ERR2_REG_FW_RESTART) {
- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"3143 Port Down: Firmware Update "
"Detected\n");
en_rn_msg = false;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 7d717a4ac14d..fdf5e777bf11 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1961,8 +1961,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
* is configured try it.
*/
ndlp->nlp_fc4_type |= NLP_FC4_FCP;
- if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
- (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+ if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) &&
+ (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH ||
+ vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
ndlp->nlp_fc4_type |= NLP_FC4_NVME;
/* We need to update the localport also */
lpfc_nvme_update_localport(vport);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1bc0db572d9e..430abebf99f1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13363,6 +13363,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba)
uint32_t uerr_sta_hi, uerr_sta_lo;
uint32_t if_type, portsmphr;
struct lpfc_register portstat_reg;
+ u32 logmask;
/*
* For now, use the SLI4 device internal unrecoverable error
@@ -13413,7 +13414,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba)
readl(phba->sli4_hba.u.if_type2.ERR1regaddr);
phba->work_status[1] =
readl(phba->sli4_hba.u.if_type2.ERR2regaddr);
- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ logmask = LOG_TRACE_EVENT;
+ if (phba->work_status[0] ==
+ SLIPORT_ERR1_REG_ERR_CODE_2 &&
+ phba->work_status[1] == SLIPORT_ERR2_REG_FW_RESTART)
+ logmask = LOG_SLI;
+ lpfc_printf_log(phba, KERN_ERR, logmask,
"2885 Port Status Event: "
"port status reg 0x%x, "
"port smphr reg 0x%x, "
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 511726f92d9a..76229b839560 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2011,9 +2011,10 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
enable_irq(reply_q->os_irq);
}
}
+
+ if (poll)
+ _base_process_reply_queue(reply_q);
}
- if (poll)
- _base_process_reply_queue(reply_q);
}
/**
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index 253ceca54a84..7eb8c39da366 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs)
myrs_unmap(cs);
if (cs->mmio_base) {
- cs->disable_intr(cs);
+ if (cs->disable_intr)
+ cs->disable_intr(cs);
iounmap(cs->mmio_base);
cs->mmio_base = NULL;
}
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index c814e5071712..9ec310b795c3 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2692,7 +2692,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
u32 tag = le32_to_cpu(psataPayload->tag);
u32 port_id = le32_to_cpu(psataPayload->port_id);
u32 dev_id = le32_to_cpu(psataPayload->device_id);
- unsigned long flags;
if (event)
pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event);
@@ -2724,8 +2723,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_DATA_OVERRUN;
ts->residual = 0;
- if (pm8001_dev)
- atomic_dec(&pm8001_dev->running_req);
break;
case IO_XFER_ERROR_BREAK:
pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
@@ -2767,7 +2764,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_QUEUE_FULL;
- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
return;
}
break;
@@ -2853,20 +2849,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
ts->stat = SAS_OPEN_TO;
break;
}
- spin_lock_irqsave(&t->task_state_lock, flags);
- t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
- t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
- t->task_state_flags |= SAS_TASK_STATE_DONE;
- if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
- spin_unlock_irqrestore(&t->task_state_lock, flags);
- pm8001_dbg(pm8001_ha, FAIL,
- "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
- t, event, ts->resp, ts->stat);
- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
- } else {
- spin_unlock_irqrestore(&t->task_state_lock, flags);
- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
- }
}
/*See the comments for mpi_ssp_completion */
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 160ee8b228c9..32edda3e55c6 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -769,8 +769,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
res = -TMF_RESP_FUNC_FAILED;
/* Even TMF timed out, return direct. */
if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+ struct pm8001_ccb_info *ccb = task->lldd_task;
+
pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
tmf->tmf);
+
+ if (ccb)
+ ccb->task = NULL;
goto ex_err;
}
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index bbf538fe15b3..9d20f8009b89 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -2185,9 +2185,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
pm8001_dbg(pm8001_ha, FAIL,
"task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
t, status, ts->resp, ts->stat);
+ pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
if (t->slow_task)
complete(&t->slow_task->completion);
- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
} else {
spin_unlock_irqrestore(&t->task_state_lock, flags);
pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
@@ -2794,9 +2794,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
pm8001_dbg(pm8001_ha, FAIL,
"task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
t, status, ts->resp, ts->stat);
+ pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
if (t->slow_task)
complete(&t->slow_task->completion);
- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
} else {
spin_unlock_irqrestore(&t->task_state_lock, flags);
spin_unlock_irqrestore(&circularQ->oq_lock,
@@ -2821,7 +2821,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
u32 tag = le32_to_cpu(psataPayload->tag);
u32 port_id = le32_to_cpu(psataPayload->port_id);
u32 dev_id = le32_to_cpu(psataPayload->device_id);
- unsigned long flags;
if (event)
pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event);
@@ -2854,8 +2853,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_DATA_OVERRUN;
ts->residual = 0;
- if (pm8001_dev)
- atomic_dec(&pm8001_dev->running_req);
break;
case IO_XFER_ERROR_BREAK:
pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
@@ -2904,11 +2901,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_QUEUE_FULL;
- spin_unlock_irqrestore(&circularQ->oq_lock,
- circularQ->lock_flags);
- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
- spin_lock_irqsave(&circularQ->oq_lock,
- circularQ->lock_flags);
return;
}
break;
@@ -3008,24 +3000,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
ts->stat = SAS_OPEN_TO;
break;
}
- spin_lock_irqsave(&t->task_state_lock, flags);
- t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
- t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
- t->task_state_flags |= SAS_TASK_STATE_DONE;
- if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
- spin_unlock_irqrestore(&t->task_state_lock, flags);
- pm8001_dbg(pm8001_ha, FAIL,
- "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
- t, event, ts->resp, ts->stat);
- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
- } else {
- spin_unlock_irqrestore(&t->task_state_lock, flags);
- spin_unlock_irqrestore(&circularQ->oq_lock,
- circularQ->lock_flags);
- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
- spin_lock_irqsave(&circularQ->oq_lock,
- circularQ->lock_flags);
- }
}
/*See the comments for mpi_ssp_completion */
@@ -3931,6 +3905,7 @@ static int ssp_coalesced_comp_resp(struct pm8001_hba_info *pm8001_ha,
/**
* process_one_iomb - process one outbound Queue memory block
* @pm8001_ha: our hba card information
+ * @circularQ: outbound circular queue
* @piomb: IO message buffer
*/
static void process_one_iomb(struct pm8001_hba_info *pm8001_ha,
@@ -4151,10 +4126,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
u32 ret = MPI_IO_STATUS_FAIL;
u32 regval;
+ /*
+ * Fatal errors are programmed to be signalled in irq vector
+ * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl.
+ * fatal_err_interrupt
+ */
if (vec == (pm8001_ha->max_q_num - 1)) {
+ u32 mipsall_ready;
+
+ if (pm8001_ha->chip_id == chip_8008 ||
+ pm8001_ha->chip_id == chip_8009)
+ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT;
+ else
+ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT;
+
regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
- if ((regval & SCRATCH_PAD_MIPSALL_READY) !=
- SCRATCH_PAD_MIPSALL_READY) {
+ if ((regval & mipsall_ready) != mipsall_ready) {
pm8001_ha->controller_fatal_error = true;
pm8001_dbg(pm8001_ha, FAIL,
"Firmware Fatal error! Regval:0x%x\n",
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
index c7e5d93bea92..c41ed039c92a 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.h
+++ b/drivers/scsi/pm8001/pm80xx_hwi.h
@@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t;
#define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0
#define SCRATCH_PAD_IOP0_READY 0xC00
#define SCRATCH_PAD_IOP1_READY 0x3000
-#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \
+#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \
SCRATCH_PAD_IOP0_READY | \
+ SCRATCH_PAD_ILA_READY | \
+ SCRATCH_PAD_RAAE_READY)
+#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \
+ SCRATCH_PAD_ILA_READY | \
SCRATCH_PAD_RAAE_READY)
/* boot loader state */
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 99a56ca1fb16..fab43dabe5b3 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -2250,6 +2250,7 @@ process_els:
io_req->tm_flags == FCP_TMF_TGT_RESET) {
clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
io_req->sc_cmd = NULL;
+ kref_put(&io_req->refcount, qedf_release_cmd);
complete(&io_req->tm_done);
}
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index cdc66e2a9488..6ad28bc8e948 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
struct qed_link_output if_link;
if (lport->vport) {
- QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n");
+ printk_ratelimited("Cannot issue host reset on NPIV port.\n");
return;
}
@@ -1864,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
vport_qedf->cmd_mgr = base_qedf->cmd_mgr;
init_completion(&vport_qedf->flogi_compl);
INIT_LIST_HEAD(&vport_qedf->fcports);
+ INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work);
rc = qedf_vport_libfc_config(vport, vn_port);
if (rc) {
@@ -3980,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work)
struct qedf_ctx *qedf =
container_of(work, struct qedf_ctx, stag_work.work);
- QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n");
+ printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.",
+ dev_name(&qedf->pdev->dev), __func__, __LINE__,
+ qedf->dbg_ctx.host_no);
qedf_ctx_soft_reset(qedf->lport);
}
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 5916ed7662d5..4eb89aa4a39d 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -771,11 +771,10 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
qedi_cmd->list_tmf_work = NULL;
}
}
+ spin_unlock_bh(&qedi_conn->tmf_work_lock);
- if (!found) {
- spin_unlock_bh(&qedi_conn->tmf_work_lock);
+ if (!found)
goto check_cleanup_reqs;
- }
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
"TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
@@ -806,7 +805,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
qedi_cmd->state = CLEANUP_RECV;
unlock:
spin_unlock_bh(&conn->session->back_lock);
- spin_unlock_bh(&qedi_conn->tmf_work_lock);
wake_up_interruptible(&qedi_conn->wait_queue);
return;
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 2104973a35cd..911cc72dd7ac 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -23,7 +23,6 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/string.h>
-#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0a70aa763a96..e30bc51578e9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1276,7 +1276,7 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req)
* power management commands.
*/
if (req && !(req->rq_flags & RQF_PM))
- return BLK_STS_IOERR;
+ return BLK_STS_OFFLINE;
return BLK_STS_OK;
}
}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3520b9384428..f4e6c68ac99e 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -214,6 +214,48 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
SCSI_TIMEOUT, 3, NULL);
}
+static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
+ unsigned int depth)
+{
+ int new_shift = sbitmap_calculate_shift(depth);
+ bool need_alloc = !sdev->budget_map.map;
+ bool need_free = false;
+ int ret;
+ struct sbitmap sb_backup;
+
+ /*
+ * realloc if new shift is calculated, which is caused by setting
+ * up one new default queue depth after calling ->slave_configure
+ */
+ if (!need_alloc && new_shift != sdev->budget_map.shift)
+ need_alloc = need_free = true;
+
+ if (!need_alloc)
+ return 0;
+
+ /*
+ * Request queue has to be frozen for reallocating budget map,
+ * and here disk isn't added yet, so freezing is pretty fast
+ */
+ if (need_free) {
+ blk_mq_freeze_queue(sdev->request_queue);
+ sb_backup = sdev->budget_map;
+ }
+ ret = sbitmap_init_node(&sdev->budget_map,
+ scsi_device_max_queue_depth(sdev),
+ new_shift, GFP_KERNEL,
+ sdev->request_queue->node, false, true);
+ if (need_free) {
+ if (ret)
+ sdev->budget_map = sb_backup;
+ else
+ sbitmap_free(&sb_backup);
+ ret = 0;
+ blk_mq_unfreeze_queue(sdev->request_queue);
+ }
+ return ret;
+}
+
/**
* scsi_alloc_sdev - allocate and setup a scsi_Device
* @starget: which target to allocate a &scsi_device for
@@ -306,11 +348,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
* default device queue depth to figure out sbitmap shift
* since we use this queue depth most of times.
*/
- if (sbitmap_init_node(&sdev->budget_map,
- scsi_device_max_queue_depth(sdev),
- sbitmap_calculate_shift(depth),
- GFP_KERNEL, sdev->request_queue->node,
- false, true)) {
+ if (scsi_realloc_sdev_budget_map(sdev, depth)) {
put_device(&starget->dev);
kfree(sdev);
goto out;
@@ -1017,6 +1055,13 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
}
return SCSI_SCAN_NO_RESPONSE;
}
+
+ /*
+ * The queue_depth is often changed in ->slave_configure.
+ * Set up budget map again since memory consumption of
+ * the map depends on actual queue depth.
+ */
+ scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth);
}
if (sdev->scsi_level >= SCSI_3)
diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index 0ffdb8f2995f..acdc0aceca5e 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/pagemap.h>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 62eb9921cc94..73e6f5f0f37c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -38,7 +38,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/bio.h>
-#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/errno.h>
#include <linux/idr.h>
@@ -122,11 +121,6 @@ static void scsi_disk_release(struct device *cdev);
static DEFINE_IDA(sd_index_ida);
-/* This semaphore is used to mediate the 0->1 reference get in the
- * face of object destruction (i.e. we can't allow a get on an
- * object after last put) */
-static DEFINE_MUTEX(sd_ref_mutex);
-
static struct kmem_cache *sd_cdb_cache;
static mempool_t *sd_cdb_pool;
static mempool_t *sd_page_pool;
@@ -664,33 +658,6 @@ static int sd_major(int major_idx)
}
}
-static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
-{
- struct scsi_disk *sdkp = NULL;
-
- mutex_lock(&sd_ref_mutex);
-
- if (disk->private_data) {
- sdkp = scsi_disk(disk);
- if (scsi_device_get(sdkp->device) == 0)
- get_device(&sdkp->dev);
- else
- sdkp = NULL;
- }
- mutex_unlock(&sd_ref_mutex);
- return sdkp;
-}
-
-static void scsi_disk_put(struct scsi_disk *sdkp)
-{
- struct scsi_device *sdev = sdkp->device;
-
- mutex_lock(&sd_ref_mutex);
- put_device(&sdkp->dev);
- scsi_device_put(sdev);
- mutex_unlock(&sd_ref_mutex);
-}
-
#ifdef CONFIG_BLK_SED_OPAL
static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
size_t len, bool send)
@@ -1419,17 +1386,15 @@ static bool sd_need_revalidate(struct block_device *bdev,
**/
static int sd_open(struct block_device *bdev, fmode_t mode)
{
- struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk);
- struct scsi_device *sdev;
+ struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
+ struct scsi_device *sdev = sdkp->device;
int retval;
- if (!sdkp)
+ if (scsi_device_get(sdev))
return -ENXIO;
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n"));
- sdev = sdkp->device;
-
/*
* If the device is in error recovery, wait until it is done.
* If the device is offline, then disallow any access to it.
@@ -1474,7 +1439,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
return 0;
error_out:
- scsi_disk_put(sdkp);
+ scsi_device_put(sdev);
return retval;
}
@@ -1503,7 +1468,7 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
}
- scsi_disk_put(sdkp);
+ scsi_device_put(sdev);
}
static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1617,7 +1582,7 @@ static int media_not_present(struct scsi_disk *sdkp,
**/
static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
{
- struct scsi_disk *sdkp = scsi_disk_get(disk);
+ struct scsi_disk *sdkp = disk->private_data;
struct scsi_device *sdp;
int retval;
bool disk_changed;
@@ -1680,7 +1645,6 @@ out:
*/
disk_changed = sdp->changed;
sdp->changed = 0;
- scsi_disk_put(sdkp);
return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
}
@@ -1888,6 +1852,13 @@ static const struct pr_ops sd_pr_ops = {
.pr_clear = sd_pr_clear,
};
+static void scsi_disk_free_disk(struct gendisk *disk)
+{
+ struct scsi_disk *sdkp = scsi_disk(disk);
+
+ put_device(&sdkp->disk_dev);
+}
+
static const struct block_device_operations sd_fops = {
.owner = THIS_MODULE,
.open = sd_open,
@@ -1899,6 +1870,7 @@ static const struct block_device_operations sd_fops = {
.unlock_native_capacity = sd_unlock_native_capacity,
.report_zones = sd_zbc_report_zones,
.get_unique_id = sd_get_unique_id,
+ .free_disk = scsi_disk_free_disk,
.pr_ops = &sd_pr_ops,
};
@@ -3516,7 +3488,6 @@ static int sd_probe(struct device *dev)
}
sdkp->device = sdp;
- sdkp->driver = &sd_template;
sdkp->disk = gd;
sdkp->index = index;
sdkp->max_retries = SD_MAX_RETRIES;
@@ -3531,14 +3502,14 @@ static int sd_probe(struct device *dev)
SD_MOD_TIMEOUT);
}
- device_initialize(&sdkp->dev);
- sdkp->dev.parent = get_device(dev);
- sdkp->dev.class = &sd_disk_class;
- dev_set_name(&sdkp->dev, "%s", dev_name(dev));
+ device_initialize(&sdkp->disk_dev);
+ sdkp->disk_dev.parent = get_device(dev);
+ sdkp->disk_dev.class = &sd_disk_class;
+ dev_set_name(&sdkp->disk_dev, "%s", dev_name(dev));
- error = device_add(&sdkp->dev);
+ error = device_add(&sdkp->disk_dev);
if (error) {
- put_device(&sdkp->dev);
+ put_device(&sdkp->disk_dev);
goto out;
}
@@ -3549,7 +3520,7 @@ static int sd_probe(struct device *dev)
gd->minors = SD_MINORS;
gd->fops = &sd_fops;
- gd->private_data = &sdkp->driver;
+ gd->private_data = sdkp;
/* defaults, until the device tells us otherwise */
sdp->sector_size = 512;
@@ -3579,7 +3550,7 @@ static int sd_probe(struct device *dev)
error = device_add_disk(dev, gd, NULL);
if (error) {
- put_device(&sdkp->dev);
+ put_device(&sdkp->disk_dev);
goto out;
}
@@ -3625,58 +3596,26 @@ static int sd_probe(struct device *dev)
**/
static int sd_remove(struct device *dev)
{
- struct scsi_disk *sdkp;
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
- sdkp = dev_get_drvdata(dev);
scsi_autopm_get_device(sdkp->device);
- device_del(&sdkp->dev);
+ device_del(&sdkp->disk_dev);
del_gendisk(sdkp->disk);
sd_shutdown(dev);
- free_opal_dev(sdkp->opal_dev);
-
- mutex_lock(&sd_ref_mutex);
- dev_set_drvdata(dev, NULL);
- put_device(&sdkp->dev);
- mutex_unlock(&sd_ref_mutex);
-
+ put_disk(sdkp->disk);
return 0;
}
-/**
- * scsi_disk_release - Called to free the scsi_disk structure
- * @dev: pointer to embedded class device
- *
- * sd_ref_mutex must be held entering this routine. Because it is
- * called on last put, you should always use the scsi_disk_get()
- * scsi_disk_put() helpers which manipulate the semaphore directly
- * and never do a direct put_device.
- **/
static void scsi_disk_release(struct device *dev)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
- struct gendisk *disk = sdkp->disk;
- struct request_queue *q = disk->queue;
ida_free(&sd_index_ida, sdkp->index);
-
- /*
- * Wait until all requests that are in progress have completed.
- * This is necessary to avoid that e.g. scsi_end_request() crashes
- * due to clearing the disk->private_data pointer. Wait from inside
- * scsi_disk_release() instead of from sd_release() to avoid that
- * freezing and unfreezing the request queue affects user space I/O
- * in case multiple processes open a /dev/sd... node concurrently.
- */
- blk_mq_freeze_queue(q);
- blk_mq_unfreeze_queue(q);
-
- disk->private_data = NULL;
- put_disk(disk);
- put_device(&sdkp->device->sdev_gendev);
-
sd_zbc_release_disk(sdkp);
+ put_device(&sdkp->device->sdev_gendev);
+ free_opal_dev(sdkp->opal_dev);
kfree(sdkp);
}
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 2e5932bde43d..0a33a4b68ffb 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -68,9 +68,13 @@ enum {
};
struct scsi_disk {
- struct scsi_driver *driver; /* always &sd_template */
struct scsi_device *device;
- struct device dev;
+
+ /*
+ * disk_dev is used to show attributes in /sys/class/scsi_disk/,
+ * but otherwise not really needed. Do not use for refcounting.
+ */
+ struct device disk_dev;
struct gendisk *disk;
struct opal_dev *opal_dev;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -127,11 +131,11 @@ struct scsi_disk {
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
};
-#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
+#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
{
- return container_of(disk->private_data, struct scsi_disk, driver);
+ return disk->private_data;
}
#define sd_printk(prefix, sdsk, fmt, a...) \
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index f925b1f1f9ad..641552d6330b 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -109,11 +109,6 @@ static DEFINE_SPINLOCK(sr_index_lock);
static struct lock_class_key sr_bio_compl_lkclass;
-/* This semaphore is used to mediate the 0->1 reference get in the
- * face of object destruction (i.e. we can't allow a get on an
- * object after last put) */
-static DEFINE_MUTEX(sr_ref_mutex);
-
static int sr_open(struct cdrom_device_info *, int);
static void sr_release(struct cdrom_device_info *);
@@ -143,11 +138,9 @@ static const struct cdrom_device_ops sr_dops = {
.capability = SR_CAPABILITIES,
};
-static void sr_kref_release(struct kref *kref);
-
static inline struct scsi_cd *scsi_cd(struct gendisk *disk)
{
- return container_of(disk->private_data, struct scsi_cd, driver);
+ return disk->private_data;
}
static int sr_runtime_suspend(struct device *dev)
@@ -163,38 +156,6 @@ static int sr_runtime_suspend(struct device *dev)
return 0;
}
-/*
- * The get and put routines for the struct scsi_cd. Note this entity
- * has a scsi_device pointer and owns a reference to this.
- */
-static inline struct scsi_cd *scsi_cd_get(struct gendisk *disk)
-{
- struct scsi_cd *cd = NULL;
-
- mutex_lock(&sr_ref_mutex);
- if (disk->private_data == NULL)
- goto out;
- cd = scsi_cd(disk);
- kref_get(&cd->kref);
- if (scsi_device_get(cd->device)) {
- kref_put(&cd->kref, sr_kref_release);
- cd = NULL;
- }
- out:
- mutex_unlock(&sr_ref_mutex);
- return cd;
-}
-
-static void scsi_cd_put(struct scsi_cd *cd)
-{
- struct scsi_device *sdev = cd->device;
-
- mutex_lock(&sr_ref_mutex);
- kref_put(&cd->kref, sr_kref_release);
- scsi_device_put(sdev);
- mutex_unlock(&sr_ref_mutex);
-}
-
static unsigned int sr_get_events(struct scsi_device *sdev)
{
u8 buf[8];
@@ -522,15 +483,13 @@ static void sr_revalidate_disk(struct scsi_cd *cd)
static int sr_block_open(struct block_device *bdev, fmode_t mode)
{
- struct scsi_cd *cd;
- struct scsi_device *sdev;
- int ret = -ENXIO;
+ struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
+ struct scsi_device *sdev = cd->device;
+ int ret;
- cd = scsi_cd_get(bdev->bd_disk);
- if (!cd)
- goto out;
+ if (scsi_device_get(cd->device))
+ return -ENXIO;
- sdev = cd->device;
scsi_autopm_get_device(sdev);
if (bdev_check_media_change(bdev))
sr_revalidate_disk(cd);
@@ -541,9 +500,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
scsi_autopm_put_device(sdev);
if (ret)
- scsi_cd_put(cd);
-
-out:
+ scsi_device_put(cd->device);
return ret;
}
@@ -555,7 +512,7 @@ static void sr_block_release(struct gendisk *disk, fmode_t mode)
cdrom_release(&cd->cdi, mode);
mutex_unlock(&cd->lock);
- scsi_cd_put(cd);
+ scsi_device_put(cd->device);
}
static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
@@ -595,18 +552,24 @@ out:
static unsigned int sr_block_check_events(struct gendisk *disk,
unsigned int clearing)
{
- unsigned int ret = 0;
- struct scsi_cd *cd;
+ struct scsi_cd *cd = disk->private_data;
- cd = scsi_cd_get(disk);
- if (!cd)
+ if (atomic_read(&cd->device->disk_events_disable_depth))
return 0;
+ return cdrom_check_events(&cd->cdi, clearing);
+}
- if (!atomic_read(&cd->device->disk_events_disable_depth))
- ret = cdrom_check_events(&cd->cdi, clearing);
+static void sr_free_disk(struct gendisk *disk)
+{
+ struct scsi_cd *cd = disk->private_data;
- scsi_cd_put(cd);
- return ret;
+ spin_lock(&sr_index_lock);
+ clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
+ spin_unlock(&sr_index_lock);
+
+ unregister_cdrom(&cd->cdi);
+ mutex_destroy(&cd->lock);
+ kfree(cd);
}
static const struct block_device_operations sr_bdops =
@@ -617,6 +580,7 @@ static const struct block_device_operations sr_bdops =
.ioctl = sr_block_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.check_events = sr_block_check_events,
+ .free_disk = sr_free_disk,
};
static int sr_open(struct cdrom_device_info *cdi, int purpose)
@@ -660,8 +624,6 @@ static int sr_probe(struct device *dev)
if (!cd)
goto fail;
- kref_init(&cd->kref);
-
disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
&sr_bio_compl_lkclass);
if (!disk)
@@ -692,7 +654,6 @@ static int sr_probe(struct device *dev)
cd->device = sdev;
cd->disk = disk;
- cd->driver = &sr_template;
cd->capacity = 0x1fffff;
cd->device->changed = 1; /* force recheck CD type */
cd->media_present = 1;
@@ -713,7 +674,7 @@ static int sr_probe(struct device *dev)
sr_vendor_init(cd);
set_capacity(disk, cd->capacity);
- disk->private_data = &cd->driver;
+ disk->private_data = cd;
if (register_cdrom(disk, &cd->cdi))
goto fail_minor;
@@ -728,10 +689,8 @@ static int sr_probe(struct device *dev)
sr_revalidate_disk(cd);
error = device_add_disk(&sdev->sdev_gendev, disk, NULL);
- if (error) {
- kref_put(&cd->kref, sr_kref_release);
- goto fail;
- }
+ if (error)
+ goto unregister_cdrom;
sdev_printk(KERN_DEBUG, sdev,
"Attached scsi CD-ROM %s\n", cd->cdi.name);
@@ -739,6 +698,8 @@ static int sr_probe(struct device *dev)
return 0;
+unregister_cdrom:
+ unregister_cdrom(&cd->cdi);
fail_minor:
spin_lock(&sr_index_lock);
clear_bit(minor, sr_index_bits);
@@ -1010,36 +971,6 @@ out_put_request:
return ret;
}
-
-/**
- * sr_kref_release - Called to free the scsi_cd structure
- * @kref: pointer to embedded kref
- *
- * sr_ref_mutex must be held entering this routine. Because it is
- * called on last put, you should always use the scsi_cd_get()
- * scsi_cd_put() helpers which manipulate the semaphore directly
- * and never do a direct kref_put().
- **/
-static void sr_kref_release(struct kref *kref)
-{
- struct scsi_cd *cd = container_of(kref, struct scsi_cd, kref);
- struct gendisk *disk = cd->disk;
-
- spin_lock(&sr_index_lock);
- clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
- spin_unlock(&sr_index_lock);
-
- unregister_cdrom(&cd->cdi);
-
- disk->private_data = NULL;
-
- put_disk(disk);
-
- mutex_destroy(&cd->lock);
-
- kfree(cd);
-}
-
static int sr_remove(struct device *dev)
{
struct scsi_cd *cd = dev_get_drvdata(dev);
@@ -1047,11 +978,7 @@ static int sr_remove(struct device *dev)
scsi_autopm_get_device(cd->device);
del_gendisk(cd->disk);
- dev_set_drvdata(dev, NULL);
-
- mutex_lock(&sr_ref_mutex);
- kref_put(&cd->kref, sr_kref_release);
- mutex_unlock(&sr_ref_mutex);
+ put_disk(cd->disk);
return 0;
}
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 339c624e04d8..1175f2e213b5 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -18,8 +18,6 @@
#ifndef _SR_H
#define _SR_H
-#include <linux/genhd.h>
-#include <linux/kref.h>
#include <linux/mutex.h>
#define MAX_RETRIES 3
@@ -33,7 +31,6 @@ struct scsi_device;
typedef struct scsi_cd {
- struct scsi_driver *driver;
unsigned capacity; /* size in blocks */
struct scsi_device *device;
unsigned int vendor; /* vendor code, see sr_vendor.c */
@@ -53,9 +50,6 @@ typedef struct scsi_cd {
struct cdrom_device_info cdi;
struct mutex lock;
- /* We hold gendisk and scsi_device references on probe and use
- * the refs on this kref to decide when to release them */
- struct kref kref;
struct gendisk *disk;
} Scsi_CD;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e869e90e05af..ebe9412c86f4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4276,7 +4276,6 @@ static int st_probe(struct device *dev)
goto out_buffer_free;
}
kref_init(&tpnt->kref);
- tpnt->driver = &st_template;
tpnt->device = SDp;
if (SDp->scsi_level <= 2)
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index c0ef0d9aaf8a..7a68eaba7e81 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -117,7 +117,6 @@ struct scsi_tape_stats {
/* The tape drive descriptor */
struct scsi_tape {
- struct scsi_driver *driver;
struct scsi_device *device;
struct mutex lock; /* For serialization */
struct completion wait; /* For SCSI commands */
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 8b16bbbcb806..87975d1a21c8 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba)
clki->min_freq = clkfreq[i];
clki->max_freq = clkfreq[i+1];
clki->name = devm_kstrdup(dev, name, GFP_KERNEL);
+ if (!clki->name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (!strcmp(name, "ref_clk"))
clki->keep_link_active = true;
dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz",
@@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
return -ENOMEM;
vreg->name = devm_kstrdup(dev, name, GFP_KERNEL);
+ if (!vreg->name)
+ return -ENOMEM;
snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name);
if (of_property_read_u32(np, prop_name, &vreg->max_uA)) {
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 460d2b440d2e..9349557b8a01 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2681,7 +2681,7 @@ static int ufshcd_map_queues(struct Scsi_Host *shost)
break;
case HCTX_TYPE_READ:
map->nr_queues = 0;
- break;
+ continue;
default:
WARN_ON_ONCE(true);
}
@@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
* @pwr_mode: device power mode to set
*
* Returns 0 if requested power mode is set successfully
- * Returns non-zero if failed to set the requested power mode
+ * Returns < 0 if failed to set the requested power mode
*/
static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
enum ufs_dev_pwr_mode pwr_mode)
@@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
sdev_printk(KERN_WARNING, sdp,
"START_STOP failed for power mode: %d, result %x\n",
pwr_mode, ret);
- if (ret > 0 && scsi_sense_valid(&sshdr))
- scsi_print_sense_hdr(sdp, NULL, &sshdr);
+ if (ret > 0) {
+ if (scsi_sense_valid(&sshdr))
+ scsi_print_sense_hdr(sdp, NULL, &sshdr);
+ ret = -EIO;
+ }
}
if (!ret)
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 6a295c88d850..a7ff0e5b5494 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor)
#define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\
CONTROLLER_FATAL_ERROR |\
SYSTEM_BUS_FATAL_ERROR |\
- CRYPTO_ENGINE_FATAL_ERROR)
+ CRYPTO_ENGINE_FATAL_ERROR |\
+ UIC_LINK_LOST)
/* HCS - Host Controller Status 30h */
#define DEVICE_PRESENT 0x1
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 2d36a0715fca..8970068314ef 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -494,7 +494,7 @@ static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb,
if (!map_req)
return NULL;
- bio = bio_alloc(GFP_KERNEL, hpb->pages_per_srgn);
+ bio = bio_alloc(NULL, hpb->pages_per_srgn, 0, GFP_KERNEL);
if (!bio) {
ufshpb_put_req(hpb, map_req);
return NULL;
@@ -2050,7 +2050,7 @@ static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
INIT_LIST_HEAD(&pre_req->list_req);
pre_req->req = NULL;
- pre_req->bio = bio_alloc(GFP_KERNEL, 1);
+ pre_req->bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
if (!pre_req->bio)
goto release_mem;
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 12c10a5e3d93..7f421600cb66 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info,
return;
for (i = 0; i < shadow->nr_grants; i++) {
- if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) {
+ if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
"grant still in use by backend\n");
BUG();
}
- gnttab_end_foreign_access(shadow->gref[i], 0, 0UL);
}
kfree(shadow->sg);
diff --git a/drivers/soc/aspeed/aspeed-lpc-ctrl.c b/drivers/soc/aspeed/aspeed-lpc-ctrl.c
index 72771e018c42..258894ed234b 100644
--- a/drivers/soc/aspeed/aspeed-lpc-ctrl.c
+++ b/drivers/soc/aspeed/aspeed-lpc-ctrl.c
@@ -306,10 +306,9 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev)
}
lpc_ctrl->clk = devm_clk_get(dev, NULL);
- if (IS_ERR(lpc_ctrl->clk)) {
- dev_err(dev, "couldn't get clock\n");
- return PTR_ERR(lpc_ctrl->clk);
- }
+ if (IS_ERR(lpc_ctrl->clk))
+ return dev_err_probe(dev, PTR_ERR(lpc_ctrl->clk),
+ "couldn't get clock\n");
rc = clk_prepare_enable(lpc_ctrl->clk);
if (rc) {
dev_err(dev, "couldn't enable clock\n");
diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c
index 072473a16f4d..5ed2fc1c53a0 100644
--- a/drivers/soc/fsl/guts.c
+++ b/drivers/soc/fsl/guts.c
@@ -28,7 +28,6 @@ struct fsl_soc_die_attr {
static struct guts *guts;
static struct soc_device_attribute soc_dev_attr;
static struct soc_device *soc_dev;
-static struct device_node *root;
/* SoC die attribute definition for QorIQ platform */
@@ -138,7 +137,7 @@ static u32 fsl_guts_get_svr(void)
static int fsl_guts_probe(struct platform_device *pdev)
{
- struct device_node *np = pdev->dev.of_node;
+ struct device_node *root, *np = pdev->dev.of_node;
struct device *dev = &pdev->dev;
const struct fsl_soc_die_attr *soc_die;
const char *machine;
@@ -159,8 +158,14 @@ static int fsl_guts_probe(struct platform_device *pdev)
root = of_find_node_by_path("/");
if (of_property_read_string(root, "model", &machine))
of_property_read_string_index(root, "compatible", 0, &machine);
- if (machine)
- soc_dev_attr.machine = machine;
+ if (machine) {
+ soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL);
+ if (!soc_dev_attr.machine) {
+ of_node_put(root);
+ return -ENOMEM;
+ }
+ }
+ of_node_put(root);
svr = fsl_guts_get_svr();
soc_die = fsl_soc_die_match(svr, fsl_soc_die);
@@ -195,7 +200,6 @@ static int fsl_guts_probe(struct platform_device *pdev)
static int fsl_guts_remove(struct platform_device *dev)
{
soc_device_unregister(soc_dev);
- of_node_put(root);
return 0;
}
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 4d38c80f8be8..b3c226eb5292 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -147,7 +147,7 @@ EXPORT_SYMBOL(qe_issue_cmd);
* memory mapped space.
* The BRG clock is the QE clock divided by 2.
* It was set up long ago during the initial boot phase and is
- * is given to us.
+ * given to us.
* Baud rate clocks are zero-based in the driver code (as that maps
* to port numbers). Documentation uses 1-based numbering.
*/
@@ -421,7 +421,7 @@ static void qe_upload_microcode(const void *base,
for (i = 0; i < be32_to_cpu(ucode->count); i++)
iowrite32be(be32_to_cpu(code[i]), &qe_immr->iram.idata);
-
+
/* Set I-RAM Ready Register */
iowrite32be(QE_IRAM_READY, &qe_immr->iram.iready);
}
diff --git a/drivers/soc/fsl/qe/qe_io.c b/drivers/soc/fsl/qe/qe_io.c
index e277c827bdf3..a5e2d0e5ab51 100644
--- a/drivers/soc/fsl/qe/qe_io.c
+++ b/drivers/soc/fsl/qe/qe_io.c
@@ -35,6 +35,8 @@ int par_io_init(struct device_node *np)
if (ret)
return ret;
par_io = ioremap(res.start, resource_size(&res));
+ if (!par_io)
+ return -ENOMEM;
if (!of_property_read_u32(np, "num-ports", &num_ports))
num_par_io_ports = num_ports;
diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c
index 3e59d479d001..3cb123016b3e 100644
--- a/drivers/soc/imx/gpcv2.c
+++ b/drivers/soc/imx/gpcv2.c
@@ -382,7 +382,8 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd)
return 0;
out_clk_disable:
- clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
+ if (!domain->keep_clocks)
+ clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
return ret;
}
diff --git a/drivers/soc/mediatek/mt8192-mmsys.h b/drivers/soc/mediatek/mt8192-mmsys.h
index 6f0a57044a7b..6aae0b12b6ff 100644
--- a/drivers/soc/mediatek/mt8192-mmsys.h
+++ b/drivers/soc/mediatek/mt8192-mmsys.h
@@ -53,7 +53,8 @@ static const struct mtk_mmsys_routes mmsys_mt8192_routing_table[] = {
MT8192_AAL0_SEL_IN_CCORR0
}, {
DDP_COMPONENT_DITHER, DDP_COMPONENT_DSI0,
- MT8192_DISP_DSI0_SEL_IN, MT8192_DSI0_SEL_IN_DITHER0
+ MT8192_DISP_DSI0_SEL_IN, MT8192_DSI0_SEL_IN_DITHER0,
+ MT8192_DSI0_SEL_IN_DITHER0
}, {
DDP_COMPONENT_RDMA0, DDP_COMPONENT_COLOR0,
MT8192_DISP_RDMA0_SOUT_SEL, MT8192_RDMA0_SOUT_COLOR0,
diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c
index 670cc82d17dc..ca75b14931ec 100644
--- a/drivers/soc/mediatek/mtk-scpsys.c
+++ b/drivers/soc/mediatek/mtk-scpsys.c
@@ -411,17 +411,12 @@ out:
return ret;
}
-static int init_clks(struct platform_device *pdev, struct clk **clk)
+static void init_clks(struct platform_device *pdev, struct clk **clk)
{
int i;
- for (i = CLK_NONE + 1; i < CLK_MAX; i++) {
+ for (i = CLK_NONE + 1; i < CLK_MAX; i++)
clk[i] = devm_clk_get(&pdev->dev, clk_names[i]);
- if (IS_ERR(clk[i]))
- return PTR_ERR(clk[i]);
- }
-
- return 0;
}
static struct scp *init_scp(struct platform_device *pdev,
@@ -431,7 +426,7 @@ static struct scp *init_scp(struct platform_device *pdev,
{
struct genpd_onecell_data *pd_data;
struct resource *res;
- int i, j, ret;
+ int i, j;
struct scp *scp;
struct clk *clk[CLK_MAX];
@@ -486,9 +481,7 @@ static struct scp *init_scp(struct platform_device *pdev,
pd_data->num_domains = num;
- ret = init_clks(pdev, clk);
- if (ret)
- return ERR_PTR(ret);
+ init_clks(pdev, clk);
for (i = 0; i < num; i++) {
struct scp_domain *scpd = &scp->domains[i];
diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig
index 25eb2c1e31bb..156ac0e0c8fe 100644
--- a/drivers/soc/rockchip/Kconfig
+++ b/drivers/soc/rockchip/Kconfig
@@ -34,4 +34,12 @@ config ROCKCHIP_PM_DOMAINS
If unsure, say N.
+config ROCKCHIP_DTPM
+ tristate "Rockchip DTPM hierarchy"
+ depends on DTPM && m
+ help
+ Describe the hierarchy for the Dynamic Thermal Power
+ Management tree on this platform. That will create all the
+ power capping capable devices.
+
endif
diff --git a/drivers/soc/rockchip/Makefile b/drivers/soc/rockchip/Makefile
index 875032f7344e..05f31a4e743c 100644
--- a/drivers/soc/rockchip/Makefile
+++ b/drivers/soc/rockchip/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_ROCKCHIP_GRF) += grf.o
obj-$(CONFIG_ROCKCHIP_IODOMAIN) += io-domain.o
obj-$(CONFIG_ROCKCHIP_PM_DOMAINS) += pm_domains.o
+obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o
diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c
new file mode 100644
index 000000000000..5a23784b5221
--- /dev/null
+++ b/drivers/soc/rockchip/dtpm.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * DTPM hierarchy description
+ */
+#include <linux/dtpm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static struct dtpm_node __initdata rk3399_hierarchy[] = {
+ [0]{ .name = "rk3399",
+ .type = DTPM_NODE_VIRTUAL },
+ [1]{ .name = "package",
+ .type = DTPM_NODE_VIRTUAL,
+ .parent = &rk3399_hierarchy[0] },
+ [2]{ .name = "/cpus/cpu@0",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [3]{ .name = "/cpus/cpu@1",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [4]{ .name = "/cpus/cpu@2",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [5]{ .name = "/cpus/cpu@3",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [6]{ .name = "/cpus/cpu@100",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [7]{ .name = "/cpus/cpu@101",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [8]{ .name = "/gpu@ff9a0000",
+ .type = DTPM_NODE_DT,
+ .parent = &rk3399_hierarchy[1] },
+ [9]{ /* sentinel */ }
+};
+
+static struct of_device_id __initdata rockchip_dtpm_match_table[] = {
+ { .compatible = "rockchip,rk3399", .data = rk3399_hierarchy },
+ {},
+};
+
+static int __init rockchip_dtpm_init(void)
+{
+ return dtpm_create_hierarchy(rockchip_dtpm_match_table);
+}
+module_init(rockchip_dtpm_init);
+
+static void __exit rockchip_dtpm_exit(void)
+{
+ return dtpm_destroy_hierarchy();
+}
+module_exit(rockchip_dtpm_exit);
+
+MODULE_SOFTDEP("pre: panfrost cpufreq-dt");
+MODULE_DESCRIPTION("Rockchip DTPM driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dtpm");
+MODULE_AUTHOR("Daniel Lezcano <daniel.lezcano@kernel.org");
diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig
index a9f8b224322e..02e319508cc6 100644
--- a/drivers/soc/samsung/Kconfig
+++ b/drivers/soc/samsung/Kconfig
@@ -31,7 +31,7 @@ config EXYNOS_USI
help
Enable support for USI block. USI (Universal Serial Interface) is an
IP-core found in modern Samsung Exynos SoCs, like Exynos850 and
- ExynosAutoV0. USI block can be configured to provide one of the
+ ExynosAutoV9. USI block can be configured to provide one of the
following serial protocols: UART, SPI or High Speed I2C.
This driver allows one to configure USI for desired protocol, which
diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index 2746d05936d3..0fb3631e7346 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -204,7 +204,7 @@ module_platform_driver(exynos_chipid_driver);
MODULE_DESCRIPTION("Samsung Exynos ChipID controller and ASV driver");
MODULE_AUTHOR("Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>");
-MODULE_AUTHOR("Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>");
+MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
MODULE_AUTHOR("Pankaj Dubey <pankaj.dubey@samsung.com>");
MODULE_AUTHOR("Sylwester Nawrocki <s.nawrocki@samsung.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b2a8821971e1..31a2cef3790c 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -427,6 +427,45 @@ config SPI_INGENIC
To compile this driver as a module, choose M here: the module
will be called spi-ingenic.
+config SPI_INTEL
+ tristate
+
+config SPI_INTEL_PCI
+ tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)"
+ depends on PCI
+ depends on X86 || COMPILE_TEST
+ depends on SPI_MEM
+ select SPI_INTEL
+ help
+ This enables PCI support for the Intel PCH/PCU SPI controller in
+ master mode. This controller is present in modern Intel hardware
+ and is used to hold BIOS and other persistent settings. Using
+ this driver it is possible to upgrade BIOS directly from Linux.
+
+ Say N here unless you know what you are doing. Overwriting the
+ SPI flash may render the system unbootable.
+
+ To compile this driver as a module, choose M here: the module
+ will be called spi-intel-pci.
+
+config SPI_INTEL_PLATFORM
+ tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)"
+ depends on X86 || COMPILE_TEST
+ depends on SPI_MEM
+ select SPI_INTEL
+ help
+ This enables platform support for the Intel PCH/PCU SPI
+ controller in master mode. This controller is present in modern
+ Intel hardware and is used to hold BIOS and other persistent
+ settings. Using this driver it is possible to upgrade BIOS
+ directly from Linux.
+
+ Say N here unless you know what you are doing. Overwriting the
+ SPI flash may render the system unbootable.
+
+ To compile this driver as a module, choose M here: the module
+ will be called spi-intel-platform.
+
config SPI_JCORE
tristate "J-Core SPI Master"
depends on OF && (SUPERH || COMPILE_TEST)
@@ -866,6 +905,17 @@ config SPI_SUN6I
help
This enables using the SPI controller on the Allwinner A31 SoCs.
+config SPI_SUNPLUS_SP7021
+ tristate "Sunplus SP7021 SPI controller"
+ depends on SOC_SP7021 || COMPILE_TEST
+ help
+ This enables Sunplus SP7021 SPI controller driver on the SP7021 SoCs.
+ This driver can also be built as a module. If so, the module will be
+ called as spi-sunplus-sp7021.
+
+ If you have a Sunplus SP7021 platform say Y here.
+ If unsure, say N.
+
config SPI_SYNQUACER
tristate "Socionext's SynQuacer HighSpeed SPI controller"
depends on ARCH_SYNQUACER || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index dd7393a6046f..3aa28ed3f761 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -61,6 +61,9 @@ obj-$(CONFIG_SPI_HISI_SFC_V3XX) += spi-hisi-sfc-v3xx.o
obj-$(CONFIG_SPI_IMG_SPFI) += spi-img-spfi.o
obj-$(CONFIG_SPI_IMX) += spi-imx.o
obj-$(CONFIG_SPI_INGENIC) += spi-ingenic.o
+obj-$(CONFIG_SPI_INTEL) += spi-intel.o
+obj-$(CONFIG_SPI_INTEL_PCI) += spi-intel-pci.o
+obj-$(CONFIG_SPI_INTEL_PLATFORM) += spi-intel-platform.o
obj-$(CONFIG_SPI_LANTIQ_SSC) += spi-lantiq-ssc.o
obj-$(CONFIG_SPI_JCORE) += spi-jcore.o
obj-$(CONFIG_SPI_LM70_LLP) += spi-lm70llp.o
@@ -119,6 +122,7 @@ obj-$(CONFIG_SPI_STM32_QSPI) += spi-stm32-qspi.o
obj-$(CONFIG_SPI_ST_SSC4) += spi-st-ssc4.o
obj-$(CONFIG_SPI_SUN4I) += spi-sun4i.o
obj-$(CONFIG_SPI_SUN6I) += spi-sun6i.o
+obj-$(CONFIG_SPI_SUNPLUS_SP7021) += spi-sunplus-sp7021.o
obj-$(CONFIG_SPI_SYNQUACER) += spi-synquacer.o
obj-$(CONFIG_SPI_TEGRA210_QUAD) += spi-tegra210-quad.o
obj-$(CONFIG_SPI_TEGRA114) += spi-tegra114.o
diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index 4b3ac7aceaf6..cba6a4486c24 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -12,12 +12,17 @@
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/spi/spi.h>
+#include <linux/iopoll.h>
#define AMD_SPI_CTRL0_REG 0x00
#define AMD_SPI_EXEC_CMD BIT(16)
#define AMD_SPI_FIFO_CLEAR BIT(20)
#define AMD_SPI_BUSY BIT(31)
+#define AMD_SPI_OPCODE_REG 0x45
+#define AMD_SPI_CMD_TRIGGER_REG 0x47
+#define AMD_SPI_TRIGGER_CMD BIT(7)
+
#define AMD_SPI_OPCODE_MASK 0xFF
#define AMD_SPI_ALT_CS_REG 0x1D
@@ -34,10 +39,15 @@
#define AMD_SPI_XFER_TX 1
#define AMD_SPI_XFER_RX 2
+enum amd_spi_versions {
+ AMD_SPI_V1 = 1, /* AMDI0061 */
+ AMD_SPI_V2, /* AMDI0062 */
+};
+
struct amd_spi {
void __iomem *io_remap_addr;
unsigned long io_base_addr;
- u32 rom_addr;
+ enum amd_spi_versions version;
};
static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx)
@@ -81,14 +91,29 @@ static void amd_spi_select_chip(struct amd_spi *amd_spi, u8 cs)
amd_spi_setclear_reg8(amd_spi, AMD_SPI_ALT_CS_REG, cs, AMD_SPI_ALT_CS_MASK);
}
+static inline void amd_spi_clear_chip(struct amd_spi *amd_spi, u8 chip_select)
+{
+ amd_spi_writereg8(amd_spi, AMD_SPI_ALT_CS_REG, chip_select & ~AMD_SPI_ALT_CS_MASK);
+}
+
static void amd_spi_clear_fifo_ptr(struct amd_spi *amd_spi)
{
amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_FIFO_CLEAR, AMD_SPI_FIFO_CLEAR);
}
-static void amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode)
+static int amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode)
{
- amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode, AMD_SPI_OPCODE_MASK);
+ switch (amd_spi->version) {
+ case AMD_SPI_V1:
+ amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode,
+ AMD_SPI_OPCODE_MASK);
+ return 0;
+ case AMD_SPI_V2:
+ amd_spi_writereg8(amd_spi, AMD_SPI_OPCODE_REG, cmd_opcode);
+ return 0;
+ default:
+ return -ENODEV;
+ }
}
static inline void amd_spi_set_rx_count(struct amd_spi *amd_spi, u8 rx_count)
@@ -103,16 +128,22 @@ static inline void amd_spi_set_tx_count(struct amd_spi *amd_spi, u8 tx_count)
static int amd_spi_busy_wait(struct amd_spi *amd_spi)
{
- int timeout = 100000;
-
- /* poll for SPI bus to become idle */
- while (amd_spi_readreg32(amd_spi, AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) {
- usleep_range(10, 20);
- if (timeout-- < 0)
- return -ETIMEDOUT;
+ u32 val;
+ int reg;
+
+ switch (amd_spi->version) {
+ case AMD_SPI_V1:
+ reg = AMD_SPI_CTRL0_REG;
+ break;
+ case AMD_SPI_V2:
+ reg = AMD_SPI_STATUS_REG;
+ break;
+ default:
+ return -ENODEV;
}
- return 0;
+ return readl_poll_timeout(amd_spi->io_remap_addr + reg, val,
+ !(val & AMD_SPI_BUSY), 20, 2000000);
}
static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
@@ -123,10 +154,20 @@ static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
if (ret)
return ret;
- /* Set ExecuteOpCode bit in the CTRL0 register */
- amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD, AMD_SPI_EXEC_CMD);
-
- return 0;
+ switch (amd_spi->version) {
+ case AMD_SPI_V1:
+ /* Set ExecuteOpCode bit in the CTRL0 register */
+ amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD,
+ AMD_SPI_EXEC_CMD);
+ return 0;
+ case AMD_SPI_V2:
+ /* Trigger the command execution */
+ amd_spi_setclear_reg8(amd_spi, AMD_SPI_CMD_TRIGGER_REG,
+ AMD_SPI_TRIGGER_CMD, AMD_SPI_TRIGGER_CMD);
+ return 0;
+ default:
+ return -ENODEV;
+ }
}
static int amd_spi_master_setup(struct spi_device *spi)
@@ -196,6 +237,17 @@ static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi,
message->actual_length = tx_len + rx_len + 1;
/* complete the transaction */
message->status = 0;
+
+ switch (amd_spi->version) {
+ case AMD_SPI_V1:
+ break;
+ case AMD_SPI_V2:
+ amd_spi_clear_chip(amd_spi, message->spi->chip_select);
+ break;
+ default:
+ return -ENODEV;
+ }
+
spi_finalize_current_message(master);
return 0;
@@ -241,6 +293,8 @@ static int amd_spi_probe(struct platform_device *pdev)
}
dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
+ amd_spi->version = (enum amd_spi_versions) device_get_match_data(dev);
+
/* Initialize the spi_master fields */
master->bus_num = 0;
master->num_chipselect = 4;
@@ -266,7 +320,8 @@ err_free_master:
#ifdef CONFIG_ACPI
static const struct acpi_device_id spi_acpi_match[] = {
- { "AMDI0061", 0 },
+ { "AMDI0061", AMD_SPI_V1 },
+ { "AMDI0062", AMD_SPI_V2 },
{},
};
MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index d1e287d2d9cd..607e7a49fb89 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
#include <linux/spi/spi_bitbang.h>
#include <linux/bitops.h>
#include <linux/clk.h>
@@ -133,6 +134,38 @@ static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs,
return ath79_spi_rr(sp, AR71XX_SPI_REG_RDS);
}
+static int ath79_exec_mem_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
+{
+ struct ath79_spi *sp = ath79_spidev_to_sp(mem->spi);
+
+ /* Ensures that reading is performed on device connected to hardware cs0 */
+ if (mem->spi->chip_select || mem->spi->cs_gpiod)
+ return -ENOTSUPP;
+
+ /* Only use for fast-read op. */
+ if (op->cmd.opcode != 0x0b || op->data.dir != SPI_MEM_DATA_IN ||
+ op->addr.nbytes != 3 || op->dummy.nbytes != 1)
+ return -ENOTSUPP;
+
+ /* disable GPIO mode */
+ ath79_spi_wr(sp, AR71XX_SPI_REG_FS, 0);
+
+ memcpy_fromio(op->data.buf.in, sp->base + op->addr.val, op->data.nbytes);
+
+ /* enable GPIO mode */
+ ath79_spi_wr(sp, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO);
+
+ /* restore IOC register */
+ ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
+
+ return 0;
+}
+
+static const struct spi_controller_mem_ops ath79_mem_ops = {
+ .exec_op = ath79_exec_mem_op,
+};
+
static int ath79_spi_probe(struct platform_device *pdev)
{
struct spi_master *master;
@@ -154,6 +187,7 @@ static int ath79_spi_probe(struct platform_device *pdev)
master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
master->flags = SPI_MASTER_GPIO_SS;
master->num_chipselect = 3;
+ master->mem_ops = &ath79_mem_ops;
sp->bitbang.master = master;
sp->bitbang.chipselect = ath79_spi_chipselect;
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index c9a769b8594b..86c76211b3d3 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -585,7 +585,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
u32 rd = 0;
u32 wr = 0;
- if (qspi->base[CHIP_SELECT]) {
+ if (cs >= 0 && qspi->base[CHIP_SELECT]) {
rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
wr = (rd & ~0xff) | (1 << cs);
if (rd == wr)
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 7d709a8c833b..e28521922330 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -22,7 +22,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/regmap.h>
#include <linux/spi/spi.h>
@@ -445,25 +444,12 @@ static void bcm2835aux_spi_handle_err(struct spi_master *master,
static int bcm2835aux_spi_setup(struct spi_device *spi)
{
- int ret;
-
/* sanity check for native cs */
if (spi->mode & SPI_NO_CS)
return 0;
- if (gpio_is_valid(spi->cs_gpio)) {
- /* with gpio-cs set the GPIO to the correct level
- * and as output (in case the dt has the gpio not configured
- * as output but native cs)
- */
- ret = gpio_direction_output(spi->cs_gpio,
- (spi->mode & SPI_CS_HIGH) ? 0 : 1);
- if (ret)
- dev_err(&spi->dev,
- "could not set gpio %i as output: %i\n",
- spi->cs_gpio, ret);
-
- return ret;
- }
+
+ if (spi->cs_gpiod)
+ return 0;
/* for dt-backwards compatibility: only support native on CS0
* known things not supported with broken native CS:
@@ -519,6 +505,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
master->prepare_message = bcm2835aux_spi_prepare_message;
master->unprepare_message = bcm2835aux_spi_unprepare_message;
master->dev.of_node = pdev->dev.of_node;
+ master->use_gpio_descriptors = true;
bs = spi_master_get_devdata(master);
diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index ae61d72c7d28..267342dfa738 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -41,6 +41,8 @@
* chips need ... there may be several reasons you'd need to tweak timings
* in these routines, not just to make it faster or slower to match a
* particular CPU clock rate.
+ *
+ * ToDo: Maybe the bitrev macros can be used to improve the code?
*/
static inline u32
@@ -106,3 +108,67 @@ bitbang_txrx_be_cpha1(struct spi_device *spi,
}
return word;
}
+
+static inline u32
+bitbang_txrx_le_cpha0(struct spi_device *spi,
+ unsigned int nsecs, unsigned int cpol, unsigned int flags,
+ u32 word, u8 bits)
+{
+ /* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
+
+ u32 oldbit = !(word & 1);
+ /* clock starts at inactive polarity */
+ for (; likely(bits); bits--) {
+
+ /* setup LSB (to slave) on trailing edge */
+ if ((flags & SPI_MASTER_NO_TX) == 0) {
+ if ((word & 1) != oldbit) {
+ setmosi(spi, word & 1);
+ oldbit = word & 1;
+ }
+ }
+ spidelay(nsecs); /* T(setup) */
+
+ setsck(spi, !cpol);
+ spidelay(nsecs);
+
+ /* sample LSB (from slave) on leading edge */
+ word >>= 1;
+ if ((flags & SPI_MASTER_NO_RX) == 0)
+ word |= getmiso(spi) << (bits - 1);
+ setsck(spi, cpol);
+ }
+ return word;
+}
+
+static inline u32
+bitbang_txrx_le_cpha1(struct spi_device *spi,
+ unsigned int nsecs, unsigned int cpol, unsigned int flags,
+ u32 word, u8 bits)
+{
+ /* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
+
+ u32 oldbit = !(word & 1);
+ /* clock starts at inactive polarity */
+ for (; likely(bits); bits--) {
+
+ /* setup LSB (to slave) on leading edge */
+ setsck(spi, !cpol);
+ if ((flags & SPI_MASTER_NO_TX) == 0) {
+ if ((word & 1) != oldbit) {
+ setmosi(spi, word & 1);
+ oldbit = word & 1;
+ }
+ }
+ spidelay(nsecs); /* T(setup) */
+
+ setsck(spi, cpol);
+ spidelay(nsecs);
+
+ /* sample LSB (from slave) on trailing edge */
+ word >>= 1;
+ if ((flags & SPI_MASTER_NO_RX) == 0)
+ word |= getmiso(spi) << (bits - 1);
+ }
+ return word;
+}
diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 4bc1b93fc276..3ab19be83095 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -578,10 +578,8 @@ static int cdns_xspi_probe(struct platform_device *pdev)
}
cdns_xspi->irq = platform_get_irq(pdev, 0);
- if (cdns_xspi->irq < 0) {
- dev_err(dev, "Failed to get IRQ\n");
+ if (cdns_xspi->irq < 0)
return -ENXIO;
- }
ret = devm_request_irq(dev, cdns_xspi->irq, cdns_xspi_irq_handler,
IRQF_SHARED, pdev->name, cdns_xspi);
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
index b6c7467f0b59..d403a7a3021d 100644
--- a/drivers/spi/spi-fsi.c
+++ b/drivers/spi/spi-fsi.c
@@ -25,6 +25,7 @@
#define SPI_FSI_BASE 0x70000
#define SPI_FSI_INIT_TIMEOUT_MS 1000
+#define SPI_FSI_STATUS_TIMEOUT_MS 100
#define SPI_FSI_MAX_RX_SIZE 8
#define SPI_FSI_MAX_TX_SIZE 40
@@ -299,6 +300,7 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
struct spi_transfer *transfer)
{
int rc = 0;
+ unsigned long end;
u64 status = 0ULL;
if (transfer->tx_buf) {
@@ -315,10 +317,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
if (rc)
return rc;
+ end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS);
do {
rc = fsi_spi_status(ctx, &status, "TX");
if (rc)
return rc;
+
+ if (time_after(jiffies, end))
+ return -ETIMEDOUT;
} while (status & SPI_FSI_STATUS_TDR_FULL);
sent += nb;
@@ -329,10 +335,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
u8 *rx = transfer->rx_buf;
while (transfer->len > recv) {
+ end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS);
do {
rc = fsi_spi_status(ctx, &status, "RX");
if (rc)
return rc;
+
+ if (time_after(jiffies, end))
+ return -ETIMEDOUT;
} while (!(status & SPI_FSI_STATUS_RDR_FULL));
rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in);
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index f7d905d2a90f..4e83cc5b445d 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -898,11 +898,8 @@ static int spi_geni_probe(struct platform_device *pdev)
return irq;
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
- if (ret) {
- ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
- if (ret)
- return dev_err_probe(dev, ret, "could not set DMA mask\n");
- }
+ if (ret)
+ return dev_err_probe(dev, ret, "could not set DMA mask\n");
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 0584f4d2fde2..4b12c4964a66 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -135,25 +135,37 @@ static inline int getmiso(const struct spi_device *spi)
static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
- return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
}
static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
- return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
}
static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
- return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
}
static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
- return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
}
/*
@@ -170,28 +182,40 @@ static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
flags = spi->master->flags;
- return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
}
static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
flags = spi->master->flags;
- return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
}
static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
flags = spi->master->flags;
- return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
}
static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi,
unsigned nsecs, u32 word, u8 bits, unsigned flags)
{
flags = spi->master->flags;
- return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
+ if (unlikely(spi->mode & SPI_LSB_FIRST))
+ return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits);
+ else
+ return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
}
/*----------------------------------------------------------------------*/
@@ -378,7 +402,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL |
- SPI_CS_HIGH;
+ SPI_CS_HIGH | SPI_LSB_FIRST;
if (!spi_gpio->mosi) {
/* HW configuration without MOSI pin
*
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/spi/spi-intel-pci.c
index 1bc53b8bb88a..a5ef7a526a7f 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -2,34 +2,48 @@
/*
* Intel PCH/PCU SPI flash PCI driver.
*
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
* Author: Mika Westerberg <mika.westerberg@linux.intel.com>
*/
-#include <linux/ioport.h>
-#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include "intel-spi.h"
+#include "spi-intel.h"
#define BCR 0xdc
#define BCR_WPD BIT(0)
+static bool intel_spi_pci_set_writeable(void __iomem *base, void *data)
+{
+ struct pci_dev *pdev = data;
+ u32 bcr;
+
+ /* Try to make the chip read/write */
+ pci_read_config_dword(pdev, BCR, &bcr);
+ if (!(bcr & BCR_WPD)) {
+ bcr |= BCR_WPD;
+ pci_write_config_dword(pdev, BCR, bcr);
+ pci_read_config_dword(pdev, BCR, &bcr);
+ }
+
+ return bcr & BCR_WPD;
+}
+
static const struct intel_spi_boardinfo bxt_info = {
.type = INTEL_SPI_BXT,
+ .set_writeable = intel_spi_pci_set_writeable,
};
static const struct intel_spi_boardinfo cnl_info = {
.type = INTEL_SPI_CNL,
+ .set_writeable = intel_spi_pci_set_writeable,
};
static int intel_spi_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct intel_spi_boardinfo *info;
- struct intel_spi *ispi;
- u32 bcr;
int ret;
ret = pcim_enable_device(pdev);
@@ -41,26 +55,8 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
if (!info)
return -ENOMEM;
- /* Try to make the chip read/write */
- pci_read_config_dword(pdev, BCR, &bcr);
- if (!(bcr & BCR_WPD)) {
- bcr |= BCR_WPD;
- pci_write_config_dword(pdev, BCR, bcr);
- pci_read_config_dword(pdev, BCR, &bcr);
- }
- info->writeable = !!(bcr & BCR_WPD);
-
- ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
- if (IS_ERR(ispi))
- return PTR_ERR(ispi);
-
- pci_set_drvdata(pdev, ispi);
- return 0;
-}
-
-static void intel_spi_pci_remove(struct pci_dev *pdev)
-{
- intel_spi_remove(pci_get_drvdata(pdev));
+ info->data = pdev;
+ return intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
}
static const struct pci_device_id intel_spi_pci_ids[] = {
@@ -70,6 +66,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x1bca), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info },
+ { PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
@@ -89,7 +86,6 @@ static struct pci_driver intel_spi_pci_driver = {
.name = "intel-spi",
.id_table = intel_spi_pci_ids,
.probe = intel_spi_pci_probe,
- .remove = intel_spi_pci_remove,
};
module_pci_driver(intel_spi_pci_driver);
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c b/drivers/spi/spi-intel-platform.c
index f80f1086f928..2ef09fa35661 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c
+++ b/drivers/spi/spi-intel-platform.c
@@ -2,20 +2,18 @@
/*
* Intel PCH/PCU SPI flash platform driver.
*
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
* Author: Mika Westerberg <mika.westerberg@linux.intel.com>
*/
-#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/platform_device.h>
-#include "intel-spi.h"
+#include "spi-intel.h"
static int intel_spi_platform_probe(struct platform_device *pdev)
{
struct intel_spi_boardinfo *info;
- struct intel_spi *ispi;
struct resource *mem;
info = dev_get_platdata(&pdev->dev);
@@ -23,24 +21,11 @@ static int intel_spi_platform_probe(struct platform_device *pdev)
return -EINVAL;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ispi = intel_spi_probe(&pdev->dev, mem, info);
- if (IS_ERR(ispi))
- return PTR_ERR(ispi);
-
- platform_set_drvdata(pdev, ispi);
- return 0;
-}
-
-static int intel_spi_platform_remove(struct platform_device *pdev)
-{
- struct intel_spi *ispi = platform_get_drvdata(pdev);
-
- return intel_spi_remove(ispi);
+ return intel_spi_probe(&pdev->dev, mem, info);
}
static struct platform_driver intel_spi_platform_driver = {
.probe = intel_spi_platform_probe,
- .remove = intel_spi_platform_remove,
.driver = {
.name = "intel-spi",
},
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/spi/spi-intel.c
index a413892ff449..e937cfe85559 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi.c
+++ b/drivers/spi/spi-intel.c
@@ -2,21 +2,21 @@
/*
* Intel PCH/PCU SPI flash driver.
*
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
* Author: Mika Westerberg <mika.westerberg@linux.intel.com>
*/
-#include <linux/err.h>
-#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sizes.h>
-#include <linux/mtd/mtd.h>
+
#include <linux/mtd/partitions.h>
#include <linux/mtd/spi-nor.h>
-#include "intel-spi.h"
+#include <linux/spi/flash.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#include "spi-intel.h"
/* Offsets are from @ispi->base */
#define BFPREG 0x00
@@ -92,8 +92,6 @@
/* CPU specifics */
#define BYT_PR 0x74
#define BYT_SSFSTS_CTL 0x90
-#define BYT_BCR 0xfc
-#define BYT_BCR_WPD BIT(0)
#define BYT_FREG_NUM 5
#define BYT_PR_NUM 5
@@ -125,37 +123,43 @@
* struct intel_spi - Driver private data
* @dev: Device pointer
* @info: Pointer to board specific info
- * @nor: SPI NOR layer structure
* @base: Beginning of MMIO space
* @pregs: Start of protection registers
* @sregs: Start of software sequencer registers
+ * @master: Pointer to the SPI controller structure
* @nregions: Maximum number of regions
* @pr_num: Maximum number of protected range registers
- * @writeable: Is the chip writeable
* @locked: Is SPI setting locked
* @swseq_reg: Use SW sequencer in register reads/writes
* @swseq_erase: Use SW sequencer in erase operation
- * @erase_64k: 64k erase supported
* @atomic_preopcode: Holds preopcode when atomic sequence is requested
* @opcodes: Opcodes which are supported. This are programmed by BIOS
* before it locks down the controller.
+ * @mem_ops: Pointer to SPI MEM ops supported by the controller
*/
struct intel_spi {
struct device *dev;
const struct intel_spi_boardinfo *info;
- struct spi_nor nor;
void __iomem *base;
void __iomem *pregs;
void __iomem *sregs;
+ struct spi_controller *master;
size_t nregions;
size_t pr_num;
- bool writeable;
bool locked;
bool swseq_reg;
bool swseq_erase;
- bool erase_64k;
u8 atomic_preopcode;
u8 opcodes[8];
+ const struct intel_spi_mem_op *mem_ops;
+};
+
+struct intel_spi_mem_op {
+ struct spi_mem_op mem_op;
+ u32 replacement_op;
+ int (*exec_op)(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op);
};
static bool writeable;
@@ -201,9 +205,6 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
readl(ispi->sregs + OPMENU1));
}
- if (ispi->info->type == INTEL_SPI_BYT)
- dev_dbg(ispi->dev, "BCR=0x%08x\n", readl(ispi->base + BYT_BCR));
-
dev_dbg(ispi->dev, "LVSCC=0x%08x\n", readl(ispi->base + LVSCC));
dev_dbg(ispi->dev, "UVSCC=0x%08x\n", readl(ispi->base + UVSCC));
@@ -219,9 +220,8 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
base = value & PR_BASE_MASK;
dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x [%c%c]\n",
- i, base << 12, (limit << 12) | 0xfff,
- value & PR_WPE ? 'W' : '.',
- value & PR_RPE ? 'R' : '.');
+ i, base << 12, (limit << 12) | 0xfff,
+ value & PR_WPE ? 'W' : '.', value & PR_RPE ? 'R' : '.');
}
dev_dbg(ispi->dev, "Flash regions:\n");
@@ -236,7 +236,7 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
dev_dbg(ispi->dev, " %02d disabled\n", i);
else
dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x\n",
- i, base << 12, (limit << 12) | 0xfff);
+ i, base << 12, (limit << 12) | 0xfff);
}
dev_dbg(ispi->dev, "Using %cW sequencer for register access\n",
@@ -304,124 +304,12 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi)
INTEL_SPI_TIMEOUT * 1000);
}
-static int intel_spi_init(struct intel_spi *ispi)
+static bool intel_spi_set_writeable(struct intel_spi *ispi)
{
- u32 opmenu0, opmenu1, lvscc, uvscc, val;
- int i;
-
- switch (ispi->info->type) {
- case INTEL_SPI_BYT:
- ispi->sregs = ispi->base + BYT_SSFSTS_CTL;
- ispi->pregs = ispi->base + BYT_PR;
- ispi->nregions = BYT_FREG_NUM;
- ispi->pr_num = BYT_PR_NUM;
- ispi->swseq_reg = true;
-
- if (writeable) {
- /* Disable write protection */
- val = readl(ispi->base + BYT_BCR);
- if (!(val & BYT_BCR_WPD)) {
- val |= BYT_BCR_WPD;
- writel(val, ispi->base + BYT_BCR);
- val = readl(ispi->base + BYT_BCR);
- }
-
- ispi->writeable = !!(val & BYT_BCR_WPD);
- }
-
- break;
-
- case INTEL_SPI_LPT:
- ispi->sregs = ispi->base + LPT_SSFSTS_CTL;
- ispi->pregs = ispi->base + LPT_PR;
- ispi->nregions = LPT_FREG_NUM;
- ispi->pr_num = LPT_PR_NUM;
- ispi->swseq_reg = true;
- break;
-
- case INTEL_SPI_BXT:
- ispi->sregs = ispi->base + BXT_SSFSTS_CTL;
- ispi->pregs = ispi->base + BXT_PR;
- ispi->nregions = BXT_FREG_NUM;
- ispi->pr_num = BXT_PR_NUM;
- ispi->erase_64k = true;
- break;
-
- case INTEL_SPI_CNL:
- ispi->sregs = NULL;
- ispi->pregs = ispi->base + CNL_PR;
- ispi->nregions = CNL_FREG_NUM;
- ispi->pr_num = CNL_PR_NUM;
- break;
+ if (!ispi->info->set_writeable)
+ return false;
- default:
- return -EINVAL;
- }
-
- /* Disable #SMI generation from HW sequencer */
- val = readl(ispi->base + HSFSTS_CTL);
- val &= ~HSFSTS_CTL_FSMIE;
- writel(val, ispi->base + HSFSTS_CTL);
-
- /*
- * Determine whether erase operation should use HW or SW sequencer.
- *
- * The HW sequencer has a predefined list of opcodes, with only the
- * erase opcode being programmable in LVSCC and UVSCC registers.
- * If these registers don't contain a valid erase opcode, erase
- * cannot be done using HW sequencer.
- */
- lvscc = readl(ispi->base + LVSCC);
- uvscc = readl(ispi->base + UVSCC);
- if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK))
- ispi->swseq_erase = true;
- /* SPI controller on Intel BXT supports 64K erase opcode */
- if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase)
- if (!(lvscc & ERASE_64K_OPCODE_MASK) ||
- !(uvscc & ERASE_64K_OPCODE_MASK))
- ispi->erase_64k = false;
-
- if (ispi->sregs == NULL && (ispi->swseq_reg || ispi->swseq_erase)) {
- dev_err(ispi->dev, "software sequencer not supported, but required\n");
- return -EINVAL;
- }
-
- /*
- * Some controllers can only do basic operations using hardware
- * sequencer. All other operations are supposed to be carried out
- * using software sequencer.
- */
- if (ispi->swseq_reg) {
- /* Disable #SMI generation from SW sequencer */
- val = readl(ispi->sregs + SSFSTS_CTL);
- val &= ~SSFSTS_CTL_FSMIE;
- writel(val, ispi->sregs + SSFSTS_CTL);
- }
-
- /* Check controller's lock status */
- val = readl(ispi->base + HSFSTS_CTL);
- ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN);
-
- if (ispi->locked && ispi->sregs) {
- /*
- * BIOS programs allowed opcodes and then locks down the
- * register. So read back what opcodes it decided to support.
- * That's the set we are going to support as well.
- */
- opmenu0 = readl(ispi->sregs + OPMENU0);
- opmenu1 = readl(ispi->sregs + OPMENU1);
-
- if (opmenu0 && opmenu1) {
- for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) {
- ispi->opcodes[i] = opmenu0 >> i * 8;
- ispi->opcodes[i + 4] = opmenu1 >> i * 8;
- }
- }
- }
-
- intel_spi_dump_regs(ispi);
-
- return 0;
+ return ispi->info->set_writeable(ispi->base, ispi->info->data);
}
static int intel_spi_opcode_index(struct intel_spi *ispi, u8 opcode, int optype)
@@ -537,7 +425,6 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len,
default:
return -EINVAL;
}
-
}
writel(val, ispi->sregs + SSFSTS_CTL);
@@ -554,31 +441,35 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len,
return 0;
}
-static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
- size_t len)
+static int intel_spi_read_reg(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
{
- struct intel_spi *ispi = nor->priv;
+ size_t nbytes = op->data.nbytes;
+ u8 opcode = op->cmd.opcode;
int ret;
/* Address of the first chip */
writel(0, ispi->base + FADDR);
if (ispi->swseq_reg)
- ret = intel_spi_sw_cycle(ispi, opcode, len,
+ ret = intel_spi_sw_cycle(ispi, opcode, nbytes,
OPTYPE_READ_NO_ADDR);
else
- ret = intel_spi_hw_cycle(ispi, opcode, len);
+ ret = intel_spi_hw_cycle(ispi, opcode, nbytes);
if (ret)
return ret;
- return intel_spi_read_block(ispi, buf, len);
+ return intel_spi_read_block(ispi, op->data.buf.in, nbytes);
}
-static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
- size_t len)
+static int intel_spi_write_reg(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
{
- struct intel_spi *ispi = nor->priv;
+ size_t nbytes = op->data.nbytes;
+ u8 opcode = op->cmd.opcode;
int ret;
/*
@@ -623,23 +514,25 @@ static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
writel(0, ispi->base + FADDR);
/* Write the value beforehand */
- ret = intel_spi_write_block(ispi, buf, len);
+ ret = intel_spi_write_block(ispi, op->data.buf.out, nbytes);
if (ret)
return ret;
if (ispi->swseq_reg)
- return intel_spi_sw_cycle(ispi, opcode, len,
+ return intel_spi_sw_cycle(ispi, opcode, nbytes,
OPTYPE_WRITE_NO_ADDR);
- return intel_spi_hw_cycle(ispi, opcode, len);
+ return intel_spi_hw_cycle(ispi, opcode, nbytes);
}
-static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
- u_char *read_buf)
+static int intel_spi_read(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
{
- struct intel_spi *ispi = nor->priv;
- size_t block_size, retlen = 0;
+ void *read_buf = op->data.buf.in;
+ size_t block_size, nbytes = op->data.nbytes;
+ u32 addr = op->addr.val;
u32 val, status;
- ssize_t ret;
+ int ret;
/*
* Atomic sequence is not expected with HW sequencer reads. Make
@@ -648,24 +541,14 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
if (WARN_ON_ONCE(ispi->atomic_preopcode))
ispi->atomic_preopcode = 0;
- switch (nor->read_opcode) {
- case SPINOR_OP_READ:
- case SPINOR_OP_READ_FAST:
- case SPINOR_OP_READ_4B:
- case SPINOR_OP_READ_FAST_4B:
- break;
- default:
- return -EINVAL;
- }
-
- while (len > 0) {
- block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ);
+ while (nbytes > 0) {
+ block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ);
/* Read cannot cross 4K boundary */
- block_size = min_t(loff_t, from + block_size,
- round_up(from + 1, SZ_4K)) - from;
+ block_size = min_t(loff_t, addr + block_size,
+ round_up(addr + 1, SZ_4K)) - addr;
- writel(from, ispi->base + FADDR);
+ writel(addr, ispi->base + FADDR);
val = readl(ispi->base + HSFSTS_CTL);
val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
@@ -686,8 +569,7 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
ret = -EACCES;
if (ret < 0) {
- dev_err(ispi->dev, "read error: %llx: %#x\n", from,
- status);
+ dev_err(ispi->dev, "read error: %x: %#x\n", addr, status);
return ret;
}
@@ -695,34 +577,35 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
if (ret)
return ret;
- len -= block_size;
- from += block_size;
- retlen += block_size;
+ nbytes -= block_size;
+ addr += block_size;
read_buf += block_size;
}
- return retlen;
+ return 0;
}
-static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len,
- const u_char *write_buf)
+static int intel_spi_write(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
{
- struct intel_spi *ispi = nor->priv;
- size_t block_size, retlen = 0;
+ size_t block_size, nbytes = op->data.nbytes;
+ const void *write_buf = op->data.buf.out;
+ u32 addr = op->addr.val;
u32 val, status;
- ssize_t ret;
+ int ret;
/* Not needed with HW sequencer write, make sure it is cleared */
ispi->atomic_preopcode = 0;
- while (len > 0) {
- block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ);
+ while (nbytes > 0) {
+ block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ);
/* Write cannot cross 4K boundary */
- block_size = min_t(loff_t, to + block_size,
- round_up(to + 1, SZ_4K)) - to;
+ block_size = min_t(loff_t, addr + block_size,
+ round_up(addr + 1, SZ_4K)) - addr;
- writel(to, ispi->base + FADDR);
+ writel(addr, ispi->base + FADDR);
val = readl(ispi->base + HSFSTS_CTL);
val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
@@ -753,79 +636,476 @@ static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len,
ret = -EACCES;
if (ret < 0) {
- dev_err(ispi->dev, "write error: %llx: %#x\n", to,
- status);
+ dev_err(ispi->dev, "write error: %x: %#x\n", addr, status);
return ret;
}
- len -= block_size;
- to += block_size;
- retlen += block_size;
+ nbytes -= block_size;
+ addr += block_size;
write_buf += block_size;
}
- return retlen;
+ return 0;
}
-static int intel_spi_erase(struct spi_nor *nor, loff_t offs)
+static int intel_spi_erase(struct intel_spi *ispi,
+ const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
{
- size_t erase_size, len = nor->mtd.erasesize;
- struct intel_spi *ispi = nor->priv;
- u32 val, status, cmd;
+ u8 opcode = op->cmd.opcode;
+ u32 addr = op->addr.val;
+ u32 val, status;
int ret;
- /* If the hardware can do 64k erase use that when possible */
- if (len >= SZ_64K && ispi->erase_64k) {
- cmd = HSFSTS_CTL_FCYCLE_ERASE_64K;
- erase_size = SZ_64K;
- } else {
- cmd = HSFSTS_CTL_FCYCLE_ERASE;
- erase_size = SZ_4K;
+ writel(addr, ispi->base + FADDR);
+
+ if (ispi->swseq_erase)
+ return intel_spi_sw_cycle(ispi, opcode, 0,
+ OPTYPE_WRITE_WITH_ADDR);
+
+ /* Not needed with HW sequencer erase, make sure it is cleared */
+ ispi->atomic_preopcode = 0;
+
+ val = readl(ispi->base + HSFSTS_CTL);
+ val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
+ val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE;
+ val |= HSFSTS_CTL_FGO;
+ val |= iop->replacement_op;
+ writel(val, ispi->base + HSFSTS_CTL);
+
+ ret = intel_spi_wait_hw_busy(ispi);
+ if (ret)
+ return ret;
+
+ status = readl(ispi->base + HSFSTS_CTL);
+ if (status & HSFSTS_CTL_FCERR)
+ return -EIO;
+ if (status & HSFSTS_CTL_AEL)
+ return -EACCES;
+
+ return 0;
+}
+
+static bool intel_spi_cmp_mem_op(const struct intel_spi_mem_op *iop,
+ const struct spi_mem_op *op)
+{
+ if (iop->mem_op.cmd.nbytes != op->cmd.nbytes ||
+ iop->mem_op.cmd.buswidth != op->cmd.buswidth ||
+ iop->mem_op.cmd.dtr != op->cmd.dtr ||
+ iop->mem_op.cmd.opcode != op->cmd.opcode)
+ return false;
+
+ if (iop->mem_op.addr.nbytes != op->addr.nbytes ||
+ iop->mem_op.addr.dtr != op->addr.dtr)
+ return false;
+
+ if (iop->mem_op.data.dir != op->data.dir ||
+ iop->mem_op.data.dtr != op->data.dtr)
+ return false;
+
+ if (iop->mem_op.data.dir != SPI_MEM_NO_DATA) {
+ if (iop->mem_op.data.buswidth != op->data.buswidth)
+ return false;
+ }
+
+ return true;
+}
+
+static const struct intel_spi_mem_op *
+intel_spi_match_mem_op(struct intel_spi *ispi, const struct spi_mem_op *op)
+{
+ const struct intel_spi_mem_op *iop;
+
+ for (iop = ispi->mem_ops; iop->mem_op.cmd.opcode; iop++) {
+ if (intel_spi_cmp_mem_op(iop, op))
+ break;
}
- if (ispi->swseq_erase) {
- while (len > 0) {
- writel(offs, ispi->base + FADDR);
+ return iop->mem_op.cmd.opcode ? iop : NULL;
+}
+
+static bool intel_spi_supports_mem_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
+{
+ struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+ const struct intel_spi_mem_op *iop;
+
+ iop = intel_spi_match_mem_op(ispi, op);
+ if (!iop) {
+ dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode);
+ return false;
+ }
- ret = intel_spi_sw_cycle(ispi, nor->erase_opcode,
- 0, OPTYPE_WRITE_WITH_ADDR);
- if (ret)
- return ret;
+ /*
+ * For software sequencer check that the opcode is actually
+ * present in the opmenu if it is locked.
+ */
+ if (ispi->swseq_reg && ispi->locked) {
+ int i;
- offs += erase_size;
- len -= erase_size;
+ /* Check if it is in the locked opcodes list */
+ for (i = 0; i < ARRAY_SIZE(ispi->opcodes); i++) {
+ if (ispi->opcodes[i] == op->cmd.opcode)
+ return true;
}
- return 0;
+ dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode);
+ return false;
}
- /* Not needed with HW sequencer erase, make sure it is cleared */
- ispi->atomic_preopcode = 0;
+ return true;
+}
- while (len > 0) {
- writel(offs, ispi->base + FADDR);
+static int intel_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+ struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+ const struct intel_spi_mem_op *iop;
- val = readl(ispi->base + HSFSTS_CTL);
- val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
- val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE;
- val |= cmd;
- val |= HSFSTS_CTL_FGO;
- writel(val, ispi->base + HSFSTS_CTL);
+ iop = intel_spi_match_mem_op(ispi, op);
+ if (!iop)
+ return -EOPNOTSUPP;
- ret = intel_spi_wait_hw_busy(ispi);
- if (ret)
- return ret;
+ return iop->exec_op(ispi, iop, op);
+}
- status = readl(ispi->base + HSFSTS_CTL);
- if (status & HSFSTS_CTL_FCERR)
- return -EIO;
- else if (status & HSFSTS_CTL_AEL)
- return -EACCES;
+static const char *intel_spi_get_name(struct spi_mem *mem)
+{
+ const struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+
+ /*
+ * Return name of the flash controller device to be compatible
+ * with the MTD version.
+ */
+ return dev_name(ispi->dev);
+}
+
+static const struct spi_controller_mem_ops intel_spi_mem_ops = {
+ .supports_op = intel_spi_supports_mem_op,
+ .exec_op = intel_spi_exec_mem_op,
+ .get_name = intel_spi_get_name,
+};
+
+#define INTEL_SPI_OP_ADDR(__nbytes) \
+ { \
+ .nbytes = __nbytes, \
+ }
+
+#define INTEL_SPI_OP_NO_DATA \
+ { \
+ .dir = SPI_MEM_NO_DATA, \
+ }
+
+#define INTEL_SPI_OP_DATA_IN(__buswidth) \
+ { \
+ .dir = SPI_MEM_DATA_IN, \
+ .buswidth = __buswidth, \
+ }
+
+#define INTEL_SPI_OP_DATA_OUT(__buswidth) \
+ { \
+ .dir = SPI_MEM_DATA_OUT, \
+ .buswidth = __buswidth, \
+ }
+
+#define INTEL_SPI_MEM_OP(__cmd, __addr, __data, __exec_op) \
+ { \
+ .mem_op = { \
+ .cmd = __cmd, \
+ .addr = __addr, \
+ .data = __data, \
+ }, \
+ .exec_op = __exec_op, \
+ }
+
+#define INTEL_SPI_MEM_OP_REPL(__cmd, __addr, __data, __exec_op, __repl) \
+ { \
+ .mem_op = { \
+ .cmd = __cmd, \
+ .addr = __addr, \
+ .data = __data, \
+ }, \
+ .exec_op = __exec_op, \
+ .replacement_op = __repl, \
+ }
+
+/*
+ * The controller handles pretty much everything internally based on the
+ * SFDP data but we want to make sure we only support the operations
+ * actually possible. Only check buswidth and transfer direction, the
+ * core validates data.
+ */
+#define INTEL_SPI_GENERIC_OPS \
+ /* Status register operations */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1), \
+ SPI_MEM_OP_NO_ADDR, \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read_reg), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1), \
+ SPI_MEM_OP_NO_ADDR, \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read_reg), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1), \
+ SPI_MEM_OP_NO_ADDR, \
+ INTEL_SPI_OP_DATA_OUT(1), \
+ intel_spi_write_reg), \
+ /* Normal read */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ /* Fast read */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ /* Read with 4-byte address opcode */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ /* Fast read with 4-byte address opcode */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(1), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(2), \
+ intel_spi_read), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_IN(4), \
+ intel_spi_read), \
+ /* Write operations */ \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ INTEL_SPI_OP_DATA_OUT(1), \
+ intel_spi_write), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_OUT(1), \
+ intel_spi_write), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ INTEL_SPI_OP_DATA_OUT(1), \
+ intel_spi_write), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1), \
+ SPI_MEM_OP_NO_ADDR, \
+ SPI_MEM_OP_NO_DATA, \
+ intel_spi_write_reg), \
+ INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1), \
+ SPI_MEM_OP_NO_ADDR, \
+ SPI_MEM_OP_NO_DATA, \
+ intel_spi_write_reg), \
+ /* Erase operations */ \
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1), \
+ INTEL_SPI_OP_ADDR(3), \
+ SPI_MEM_OP_NO_DATA, \
+ intel_spi_erase, \
+ HSFSTS_CTL_FCYCLE_ERASE), \
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ SPI_MEM_OP_NO_DATA, \
+ intel_spi_erase, \
+ HSFSTS_CTL_FCYCLE_ERASE), \
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K_4B, 1), \
+ INTEL_SPI_OP_ADDR(4), \
+ SPI_MEM_OP_NO_DATA, \
+ intel_spi_erase, \
+ HSFSTS_CTL_FCYCLE_ERASE) \
+
+static const struct intel_spi_mem_op generic_mem_ops[] = {
+ INTEL_SPI_GENERIC_OPS,
+ { },
+};
+
+static const struct intel_spi_mem_op erase_64k_mem_ops[] = {
+ INTEL_SPI_GENERIC_OPS,
+ /* 64k sector erase operations */
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1),
+ INTEL_SPI_OP_ADDR(3),
+ SPI_MEM_OP_NO_DATA,
+ intel_spi_erase,
+ HSFSTS_CTL_FCYCLE_ERASE_64K),
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1),
+ INTEL_SPI_OP_ADDR(4),
+ SPI_MEM_OP_NO_DATA,
+ intel_spi_erase,
+ HSFSTS_CTL_FCYCLE_ERASE_64K),
+ INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE_4B, 1),
+ INTEL_SPI_OP_ADDR(4),
+ SPI_MEM_OP_NO_DATA,
+ intel_spi_erase,
+ HSFSTS_CTL_FCYCLE_ERASE_64K),
+ { },
+};
+
+static int intel_spi_init(struct intel_spi *ispi)
+{
+ u32 opmenu0, opmenu1, lvscc, uvscc, val;
+ bool erase_64k = false;
+ int i;
+
+ switch (ispi->info->type) {
+ case INTEL_SPI_BYT:
+ ispi->sregs = ispi->base + BYT_SSFSTS_CTL;
+ ispi->pregs = ispi->base + BYT_PR;
+ ispi->nregions = BYT_FREG_NUM;
+ ispi->pr_num = BYT_PR_NUM;
+ ispi->swseq_reg = true;
+ break;
+
+ case INTEL_SPI_LPT:
+ ispi->sregs = ispi->base + LPT_SSFSTS_CTL;
+ ispi->pregs = ispi->base + LPT_PR;
+ ispi->nregions = LPT_FREG_NUM;
+ ispi->pr_num = LPT_PR_NUM;
+ ispi->swseq_reg = true;
+ break;
+
+ case INTEL_SPI_BXT:
+ ispi->sregs = ispi->base + BXT_SSFSTS_CTL;
+ ispi->pregs = ispi->base + BXT_PR;
+ ispi->nregions = BXT_FREG_NUM;
+ ispi->pr_num = BXT_PR_NUM;
+ erase_64k = true;
+ break;
+
+ case INTEL_SPI_CNL:
+ ispi->sregs = NULL;
+ ispi->pregs = ispi->base + CNL_PR;
+ ispi->nregions = CNL_FREG_NUM;
+ ispi->pr_num = CNL_PR_NUM;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ /* Try to disable write protection if user asked to do so */
+ if (writeable && !intel_spi_set_writeable(ispi)) {
+ dev_warn(ispi->dev, "can't disable chip write protection\n");
+ writeable = false;
+ }
+
+ /* Disable #SMI generation from HW sequencer */
+ val = readl(ispi->base + HSFSTS_CTL);
+ val &= ~HSFSTS_CTL_FSMIE;
+ writel(val, ispi->base + HSFSTS_CTL);
- offs += erase_size;
- len -= erase_size;
+ /*
+ * Determine whether erase operation should use HW or SW sequencer.
+ *
+ * The HW sequencer has a predefined list of opcodes, with only the
+ * erase opcode being programmable in LVSCC and UVSCC registers.
+ * If these registers don't contain a valid erase opcode, erase
+ * cannot be done using HW sequencer.
+ */
+ lvscc = readl(ispi->base + LVSCC);
+ uvscc = readl(ispi->base + UVSCC);
+ if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK))
+ ispi->swseq_erase = true;
+ /* SPI controller on Intel BXT supports 64K erase opcode */
+ if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase)
+ if (!(lvscc & ERASE_64K_OPCODE_MASK) ||
+ !(uvscc & ERASE_64K_OPCODE_MASK))
+ erase_64k = false;
+
+ if (!ispi->sregs && (ispi->swseq_reg || ispi->swseq_erase)) {
+ dev_err(ispi->dev, "software sequencer not supported, but required\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Some controllers can only do basic operations using hardware
+ * sequencer. All other operations are supposed to be carried out
+ * using software sequencer.
+ */
+ if (ispi->swseq_reg) {
+ /* Disable #SMI generation from SW sequencer */
+ val = readl(ispi->sregs + SSFSTS_CTL);
+ val &= ~SSFSTS_CTL_FSMIE;
+ writel(val, ispi->sregs + SSFSTS_CTL);
}
+ /* Check controller's lock status */
+ val = readl(ispi->base + HSFSTS_CTL);
+ ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN);
+
+ if (ispi->locked && ispi->sregs) {
+ /*
+ * BIOS programs allowed opcodes and then locks down the
+ * register. So read back what opcodes it decided to support.
+ * That's the set we are going to support as well.
+ */
+ opmenu0 = readl(ispi->sregs + OPMENU0);
+ opmenu1 = readl(ispi->sregs + OPMENU1);
+
+ if (opmenu0 && opmenu1) {
+ for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) {
+ ispi->opcodes[i] = opmenu0 >> i * 8;
+ ispi->opcodes[i + 4] = opmenu1 >> i * 8;
+ }
+ }
+ }
+
+ if (erase_64k) {
+ dev_dbg(ispi->dev, "Using erase_64k memory operations");
+ ispi->mem_ops = erase_64k_mem_ops;
+ } else {
+ dev_dbg(ispi->dev, "Using generic memory operations");
+ ispi->mem_ops = generic_mem_ops;
+ }
+
+ intel_spi_dump_regs(ispi);
return 0;
}
@@ -884,9 +1164,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
/*
* If any of the regions have protection bits set, make the
* whole partition read-only to be on the safe side.
+ *
+ * Also if the user did not ask the chip to be writeable
+ * mask the bit too.
*/
- if (intel_spi_is_protected(ispi, base, limit))
- ispi->writeable = false;
+ if (!writeable || intel_spi_is_protected(ispi, base, limit))
+ part->mask_flags |= MTD_WRITEABLE;
end = (limit << 12) + 4096;
if (end > part->size)
@@ -894,75 +1177,74 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
}
}
-static const struct spi_nor_controller_ops intel_spi_controller_ops = {
- .read_reg = intel_spi_read_reg,
- .write_reg = intel_spi_write_reg,
- .read = intel_spi_read,
- .write = intel_spi_write,
- .erase = intel_spi_erase,
-};
+static int intel_spi_populate_chip(struct intel_spi *ispi)
+{
+ struct flash_platform_data *pdata;
+ struct spi_board_info chip;
-struct intel_spi *intel_spi_probe(struct device *dev,
- struct resource *mem, const struct intel_spi_boardinfo *info)
+ pdata = devm_kzalloc(ispi->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ pdata->nr_parts = 1;
+ pdata->parts = devm_kcalloc(ispi->dev, sizeof(*pdata->parts),
+ pdata->nr_parts, GFP_KERNEL);
+ if (!pdata->parts)
+ return -ENOMEM;
+
+ intel_spi_fill_partition(ispi, pdata->parts);
+
+ memset(&chip, 0, sizeof(chip));
+ snprintf(chip.modalias, 8, "spi-nor");
+ chip.platform_data = pdata;
+
+ return spi_new_device(ispi->master, &chip) ? 0 : -ENODEV;
+}
+
+/**
+ * intel_spi_probe() - Probe the Intel SPI flash controller
+ * @dev: Pointer to the parent device
+ * @mem: MMIO resource
+ * @info: Platform spefific information
+ *
+ * Probes Intel SPI flash controller and creates the flash chip device.
+ * Returns %0 on success and negative errno in case of failure.
+ */
+int intel_spi_probe(struct device *dev, struct resource *mem,
+ const struct intel_spi_boardinfo *info)
{
- const struct spi_nor_hwcaps hwcaps = {
- .mask = SNOR_HWCAPS_READ |
- SNOR_HWCAPS_READ_FAST |
- SNOR_HWCAPS_PP,
- };
- struct mtd_partition part;
+ struct spi_controller *master;
struct intel_spi *ispi;
int ret;
- if (!info || !mem)
- return ERR_PTR(-EINVAL);
+ master = devm_spi_alloc_master(dev, sizeof(*ispi));
+ if (!master)
+ return -ENOMEM;
+
+ master->mem_ops = &intel_spi_mem_ops;
- ispi = devm_kzalloc(dev, sizeof(*ispi), GFP_KERNEL);
- if (!ispi)
- return ERR_PTR(-ENOMEM);
+ ispi = spi_master_get_devdata(master);
ispi->base = devm_ioremap_resource(dev, mem);
if (IS_ERR(ispi->base))
- return ERR_CAST(ispi->base);
+ return PTR_ERR(ispi->base);
ispi->dev = dev;
+ ispi->master = master;
ispi->info = info;
- ispi->writeable = info->writeable;
ret = intel_spi_init(ispi);
if (ret)
- return ERR_PTR(ret);
-
- ispi->nor.dev = ispi->dev;
- ispi->nor.priv = ispi;
- ispi->nor.controller_ops = &intel_spi_controller_ops;
-
- ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
- if (ret) {
- dev_info(dev, "failed to locate the chip\n");
- return ERR_PTR(ret);
- }
-
- intel_spi_fill_partition(ispi, &part);
-
- /* Prevent writes if not explicitly enabled */
- if (!ispi->writeable || !writeable)
- ispi->nor.mtd.flags &= ~MTD_WRITEABLE;
+ return ret;
- ret = mtd_device_register(&ispi->nor.mtd, &part, 1);
+ ret = devm_spi_register_master(dev, master);
if (ret)
- return ERR_PTR(ret);
+ return ret;
- return ispi;
+ return intel_spi_populate_chip(ispi);
}
EXPORT_SYMBOL_GPL(intel_spi_probe);
-int intel_spi_remove(struct intel_spi *ispi)
-{
- return mtd_device_unregister(&ispi->nor.mtd);
-}
-EXPORT_SYMBOL_GPL(intel_spi_remove);
-
MODULE_DESCRIPTION("Intel PCH/PCU SPI flash core driver");
MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-intel.h b/drivers/spi/spi-intel.h
new file mode 100644
index 000000000000..a4f0327a46ff
--- /dev/null
+++ b/drivers/spi/spi-intel.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Intel PCH/PCU SPI flash driver.
+ *
+ * Copyright (C) 2016 - 2022, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ */
+
+#ifndef SPI_INTEL_H
+#define SPI_INTEL_H
+
+#include <linux/platform_data/x86/spi-intel.h>
+
+struct resource;
+
+int intel_spi_probe(struct device *dev, struct resource *mem,
+ const struct intel_spi_boardinfo *info);
+
+#endif /* SPI_INTEL_H */
diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index bcb52601804a..aae26f62ea87 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -906,17 +906,11 @@ static int lantiq_ssc_probe(struct platform_device *pdev)
struct spi_master *master;
struct lantiq_ssc_spi *spi;
const struct lantiq_ssc_hwcfg *hwcfg;
- const struct of_device_id *match;
u32 id, supports_dma, revision;
unsigned int num_cs;
int err;
- match = of_match_device(lantiq_ssc_match, dev);
- if (!match) {
- dev_err(dev, "no device match\n");
- return -EINVAL;
- }
- hwcfg = match->data;
+ hwcfg = of_device_get_match_data(dev);
master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi));
if (!master)
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 37f4443ce9a0..e9d83d65873b 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -854,15 +854,13 @@ static int spi_mem_probe(struct spi_device *spi)
return memdrv->probe(mem);
}
-static int spi_mem_remove(struct spi_device *spi)
+static void spi_mem_remove(struct spi_device *spi)
{
struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
struct spi_mem *mem = spi_get_drvdata(spi);
if (memdrv->remove)
- return memdrv->remove(mem);
-
- return 0;
+ memdrv->remove(mem);
}
static void spi_mem_shutdown(struct spi_device *spi)
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index c208efeadd18..0bc7daa7afc8 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -693,6 +693,11 @@ static int meson_spicc_probe(struct platform_device *pdev)
writel_relaxed(0, spicc->base + SPICC_INTREG);
irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto out_master;
+ }
+
ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq,
0, NULL, spicc);
if (ret) {
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 78a9bca8cc68..03630359ce70 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -23,7 +23,6 @@
#include <linux/clk.h>
#include <linux/spi/spi.h>
#include <linux/fsl_devices.h>
-#include <linux/gpio.h>
#include <asm/mpc52xx_psc.h>
enum {
@@ -128,17 +127,28 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi)
out_be32(psc_addr(mps, ccr), ccr);
mps->bits_per_word = cs->bits_per_word;
- if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
- mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+ if (spi->cs_gpiod) {
+ if (mps->cs_control)
+ /* boardfile override */
+ mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+ else
+ /* gpiolib will deal with the inversion */
+ gpiod_set_value(spi->cs_gpiod, 1);
+ }
}
static void mpc512x_psc_spi_deactivate_cs(struct spi_device *spi)
{
struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
- if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
- mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
-
+ if (spi->cs_gpiod) {
+ if (mps->cs_control)
+ /* boardfile override */
+ mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+ else
+ /* gpiolib will deal with the inversion */
+ gpiod_set_value(spi->cs_gpiod, 0);
+ }
}
/* extract and scale size field in txsz or rxsz */
@@ -363,7 +373,6 @@ static int mpc512x_psc_spi_unprep_xfer_hw(struct spi_master *master)
static int mpc512x_psc_spi_setup(struct spi_device *spi)
{
struct mpc512x_psc_spi_cs *cs = spi->controller_state;
- int ret;
if (spi->bits_per_word % 8)
return -EINVAL;
@@ -373,18 +382,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
if (!cs)
return -ENOMEM;
- if (gpio_is_valid(spi->cs_gpio)) {
- ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev));
- if (ret) {
- dev_err(&spi->dev, "can't get CS gpio: %d\n",
- ret);
- kfree(cs);
- return ret;
- }
- gpio_direction_output(spi->cs_gpio,
- spi->mode & SPI_CS_HIGH ? 0 : 1);
- }
-
spi->controller_state = cs;
}
@@ -396,8 +393,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
static void mpc512x_psc_spi_cleanup(struct spi_device *spi)
{
- if (gpio_is_valid(spi->cs_gpio))
- gpio_free(spi->cs_gpio);
kfree(spi->controller_state);
}
@@ -476,11 +471,6 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id)
return IRQ_NONE;
}
-static void mpc512x_spi_cs_control(struct spi_device *spi, bool onoff)
-{
- gpio_set_value(spi->cs_gpio, onoff);
-}
-
static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
u32 size, unsigned int irq)
{
@@ -500,9 +490,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
mps->type = (int)of_device_get_match_data(dev);
mps->irq = irq;
- if (pdata == NULL) {
- mps->cs_control = mpc512x_spi_cs_control;
- } else {
+ if (pdata) {
mps->cs_control = pdata->cs_control;
master->bus_num = pdata->bus_num;
master->num_chipselect = pdata->max_chipselect;
@@ -513,6 +501,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
master->prepare_transfer_hardware = mpc512x_psc_spi_prep_xfer_hw;
master->transfer_one_message = mpc512x_psc_spi_msg_xfer;
master->unprepare_transfer_hardware = mpc512x_psc_spi_unprep_xfer_hw;
+ master->use_gpio_descriptors = true;
master->cleanup = mpc512x_psc_spi_cleanup;
master->dev.of_node = dev->of_node;
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index a15de10ee286..1a0b3208dfca 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -12,7 +12,7 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/platform_device.h>
#include <linux/platform_data/spi-mt65xx.h>
#include <linux/pm_runtime.h>
@@ -31,6 +31,7 @@
#define SPI_CFG2_REG 0x0028
#define SPI_TX_SRC_REG_64 0x002c
#define SPI_RX_DST_REG_64 0x0030
+#define SPI_CFG3_IPM_REG 0x0040
#define SPI_CFG0_SCK_HIGH_OFFSET 0
#define SPI_CFG0_SCK_LOW_OFFSET 8
@@ -43,11 +44,15 @@
#define SPI_CFG1_PACKET_LOOP_OFFSET 8
#define SPI_CFG1_PACKET_LENGTH_OFFSET 16
#define SPI_CFG1_GET_TICK_DLY_OFFSET 29
+#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1 30
#define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000
+#define SPI_CFG1_GET_TICK_DLY_MASK_V1 0xc0000000
+
#define SPI_CFG1_CS_IDLE_MASK 0xff
#define SPI_CFG1_PACKET_LOOP_MASK 0xff00
#define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000
+#define SPI_CFG1_IPM_PACKET_LENGTH_MASK GENMASK(31, 16)
#define SPI_CFG2_SCK_HIGH_OFFSET 0
#define SPI_CFG2_SCK_LOW_OFFSET 16
@@ -68,7 +73,13 @@
#define SPI_CMD_TX_ENDIAN BIT(15)
#define SPI_CMD_FINISH_IE BIT(16)
#define SPI_CMD_PAUSE_IE BIT(17)
+#define SPI_CMD_IPM_NONIDLE_MODE BIT(19)
+#define SPI_CMD_IPM_SPIM_LOOP BIT(21)
+#define SPI_CMD_IPM_GET_TICKDLY_OFFSET 22
+#define SPI_CMD_IPM_GET_TICKDLY_MASK GENMASK(24, 22)
+#define SPI_CFG3_IPM_HALF_DUPLEX_DIR BIT(2)
+#define SPI_CFG3_IPM_HALF_DUPLEX_EN BIT(3)
#define MT8173_SPI_MAX_PAD_SEL 3
#define MTK_SPI_PAUSE_INT_STATUS 0x2
@@ -78,6 +89,7 @@
#define MTK_SPI_MAX_FIFO_SIZE 32U
#define MTK_SPI_PACKET_SIZE 1024
+#define MTK_SPI_IPM_PACKET_SIZE SZ_64K
#define MTK_SPI_32BITS_MASK (0xffffffff)
#define DMA_ADDR_EXT_BITS (36)
@@ -93,6 +105,9 @@ struct mtk_spi_compatible {
bool dma_ext;
/* some IC no need unprepare SPI clk */
bool no_need_unprepare;
+ /* IPM design adjust and extend register to support more features */
+ bool ipm_design;
+
};
struct mtk_spi {
@@ -116,6 +131,12 @@ static const struct mtk_spi_compatible mt2712_compat = {
.must_tx = true,
};
+static const struct mtk_spi_compatible mtk_ipm_compat = {
+ .enhance_timing = true,
+ .dma_ext = true,
+ .ipm_design = true,
+};
+
static const struct mtk_spi_compatible mt6765_compat = {
.need_pad_sel = true,
.must_tx = true,
@@ -157,6 +178,9 @@ static const struct mtk_chip_config mtk_default_chip_info = {
};
static const struct of_device_id mtk_spi_of_match[] = {
+ { .compatible = "mediatek,spi-ipm",
+ .data = (void *)&mtk_ipm_compat,
+ },
{ .compatible = "mediatek,mt2701-spi",
.data = (void *)&mtk_common_compat,
},
@@ -275,12 +299,11 @@ static int mtk_spi_set_hw_cs_timing(struct spi_device *spi)
return 0;
}
-static int mtk_spi_prepare_message(struct spi_master *master,
- struct spi_message *msg)
+static int mtk_spi_hw_init(struct spi_master *master,
+ struct spi_device *spi)
{
u16 cpha, cpol;
u32 reg_val;
- struct spi_device *spi = msg->spi;
struct mtk_chip_config *chip_config = spi->controller_data;
struct mtk_spi *mdata = spi_master_get_devdata(master);
@@ -288,6 +311,15 @@ static int mtk_spi_prepare_message(struct spi_master *master,
cpol = spi->mode & SPI_CPOL ? 1 : 0;
reg_val = readl(mdata->base + SPI_CMD_REG);
+ if (mdata->dev_comp->ipm_design) {
+ /* SPI transfer without idle time until packet length done */
+ reg_val |= SPI_CMD_IPM_NONIDLE_MODE;
+ if (spi->mode & SPI_LOOP)
+ reg_val |= SPI_CMD_IPM_SPIM_LOOP;
+ else
+ reg_val &= ~SPI_CMD_IPM_SPIM_LOOP;
+ }
+
if (cpha)
reg_val |= SPI_CMD_CPHA;
else
@@ -345,17 +377,39 @@ static int mtk_spi_prepare_message(struct spi_master *master,
mdata->base + SPI_PAD_SEL_REG);
/* tick delay */
- reg_val = readl(mdata->base + SPI_CFG1_REG);
- reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
- reg_val |= ((chip_config->tick_delay & 0x7)
- << SPI_CFG1_GET_TICK_DLY_OFFSET);
- writel(reg_val, mdata->base + SPI_CFG1_REG);
+ if (mdata->dev_comp->enhance_timing) {
+ if (mdata->dev_comp->ipm_design) {
+ reg_val = readl(mdata->base + SPI_CMD_REG);
+ reg_val &= ~SPI_CMD_IPM_GET_TICKDLY_MASK;
+ reg_val |= ((chip_config->tick_delay & 0x7)
+ << SPI_CMD_IPM_GET_TICKDLY_OFFSET);
+ writel(reg_val, mdata->base + SPI_CMD_REG);
+ } else {
+ reg_val = readl(mdata->base + SPI_CFG1_REG);
+ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
+ reg_val |= ((chip_config->tick_delay & 0x7)
+ << SPI_CFG1_GET_TICK_DLY_OFFSET);
+ writel(reg_val, mdata->base + SPI_CFG1_REG);
+ }
+ } else {
+ reg_val = readl(mdata->base + SPI_CFG1_REG);
+ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1;
+ reg_val |= ((chip_config->tick_delay & 0x3)
+ << SPI_CFG1_GET_TICK_DLY_OFFSET_V1);
+ writel(reg_val, mdata->base + SPI_CFG1_REG);
+ }
/* set hw cs timing */
mtk_spi_set_hw_cs_timing(spi);
return 0;
}
+static int mtk_spi_prepare_message(struct spi_master *master,
+ struct spi_message *msg)
+{
+ return mtk_spi_hw_init(master, msg->spi);
+}
+
static void mtk_spi_set_cs(struct spi_device *spi, bool enable)
{
u32 reg_val;
@@ -377,13 +431,13 @@ static void mtk_spi_set_cs(struct spi_device *spi, bool enable)
}
static void mtk_spi_prepare_transfer(struct spi_master *master,
- struct spi_transfer *xfer)
+ u32 speed_hz)
{
u32 div, sck_time, reg_val;
struct mtk_spi *mdata = spi_master_get_devdata(master);
- if (xfer->speed_hz < mdata->spi_clk_hz / 2)
- div = DIV_ROUND_UP(mdata->spi_clk_hz, xfer->speed_hz);
+ if (speed_hz < mdata->spi_clk_hz / 2)
+ div = DIV_ROUND_UP(mdata->spi_clk_hz, speed_hz);
else
div = 1;
@@ -414,12 +468,24 @@ static void mtk_spi_setup_packet(struct spi_master *master)
u32 packet_size, packet_loop, reg_val;
struct mtk_spi *mdata = spi_master_get_devdata(master);
- packet_size = min_t(u32, mdata->xfer_len, MTK_SPI_PACKET_SIZE);
+ if (mdata->dev_comp->ipm_design)
+ packet_size = min_t(u32,
+ mdata->xfer_len,
+ MTK_SPI_IPM_PACKET_SIZE);
+ else
+ packet_size = min_t(u32,
+ mdata->xfer_len,
+ MTK_SPI_PACKET_SIZE);
+
packet_loop = mdata->xfer_len / packet_size;
reg_val = readl(mdata->base + SPI_CFG1_REG);
- reg_val &= ~(SPI_CFG1_PACKET_LENGTH_MASK | SPI_CFG1_PACKET_LOOP_MASK);
+ if (mdata->dev_comp->ipm_design)
+ reg_val &= ~SPI_CFG1_IPM_PACKET_LENGTH_MASK;
+ else
+ reg_val &= ~SPI_CFG1_PACKET_LENGTH_MASK;
reg_val |= (packet_size - 1) << SPI_CFG1_PACKET_LENGTH_OFFSET;
+ reg_val &= ~SPI_CFG1_PACKET_LOOP_MASK;
reg_val |= (packet_loop - 1) << SPI_CFG1_PACKET_LOOP_OFFSET;
writel(reg_val, mdata->base + SPI_CFG1_REG);
}
@@ -514,7 +580,7 @@ static int mtk_spi_fifo_transfer(struct spi_master *master,
mdata->cur_transfer = xfer;
mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, xfer->len);
mdata->num_xfered = 0;
- mtk_spi_prepare_transfer(master, xfer);
+ mtk_spi_prepare_transfer(master, xfer->speed_hz);
mtk_spi_setup_packet(master);
if (xfer->tx_buf) {
@@ -547,7 +613,7 @@ static int mtk_spi_dma_transfer(struct spi_master *master,
mdata->cur_transfer = xfer;
mdata->num_xfered = 0;
- mtk_spi_prepare_transfer(master, xfer);
+ mtk_spi_prepare_transfer(master, xfer->speed_hz);
cmd = readl(mdata->base + SPI_CMD_REG);
if (xfer->tx_buf)
@@ -582,6 +648,19 @@ static int mtk_spi_transfer_one(struct spi_master *master,
struct spi_device *spi,
struct spi_transfer *xfer)
{
+ struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
+ u32 reg_val = 0;
+
+ /* prepare xfer direction and duplex mode */
+ if (mdata->dev_comp->ipm_design) {
+ if (!xfer->tx_buf || !xfer->rx_buf) {
+ reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_EN;
+ if (xfer->rx_buf)
+ reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_DIR;
+ }
+ writel(reg_val, mdata->base + SPI_CFG3_IPM_REG);
+ }
+
if (master->can_dma(master, spi, xfer))
return mtk_spi_dma_transfer(master, spi, xfer);
else
@@ -605,8 +684,9 @@ static int mtk_spi_setup(struct spi_device *spi)
if (!spi->controller_data)
spi->controller_data = (void *)&mtk_default_chip_info;
- if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio))
- gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
+ if (mdata->dev_comp->need_pad_sel && spi->cs_gpiod)
+ /* CS de-asserted, gpiolib will handle inversion */
+ gpiod_direction_output(spi->cs_gpiod, 0);
return 0;
}
@@ -624,7 +704,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
else
mdata->state = MTK_SPI_IDLE;
- if (!master->can_dma(master, master->cur_msg->spi, trans)) {
+ if (!master->can_dma(master, NULL, trans)) {
if (trans->rx_buf) {
cnt = mdata->xfer_len / 4;
ioread32_rep(mdata->base + SPI_RX_DATA_REG,
@@ -730,6 +810,7 @@ static int mtk_spi_probe(struct platform_device *pdev)
master->can_dma = mtk_spi_can_dma;
master->setup = mtk_spi_setup;
master->set_cs_timing = mtk_spi_set_hw_cs_timing;
+ master->use_gpio_descriptors = true;
of_id = of_match_node(mtk_spi_of_match, pdev->dev.of_node);
if (!of_id) {
@@ -746,6 +827,8 @@ static int mtk_spi_probe(struct platform_device *pdev)
if (mdata->dev_comp->must_tx)
master->flags = SPI_MASTER_MUST_TX;
+ if (mdata->dev_comp->ipm_design)
+ master->mode_bits |= SPI_LOOP;
if (mdata->dev_comp->need_pad_sel) {
mdata->pad_num = of_property_count_u32_elems(
@@ -853,25 +936,12 @@ static int mtk_spi_probe(struct platform_device *pdev)
goto err_disable_runtime_pm;
}
- if (!master->cs_gpios && master->num_chipselect > 1) {
+ if (!master->cs_gpiods && master->num_chipselect > 1) {
dev_err(&pdev->dev,
"cs_gpios not specified and num_chipselect > 1\n");
ret = -EINVAL;
goto err_disable_runtime_pm;
}
-
- if (master->cs_gpios) {
- for (i = 0; i < master->num_chipselect; i++) {
- ret = devm_gpio_request(&pdev->dev,
- master->cs_gpios[i],
- dev_name(&pdev->dev));
- if (ret) {
- dev_err(&pdev->dev,
- "can't get CS GPIO %i\n", i);
- goto err_disable_runtime_pm;
- }
- }
- }
}
if (mdata->dev_comp->dma_ext)
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 5c93730615f8..94fb09696677 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -95,6 +95,17 @@
#define CLK_TO_US(sp, clkcnt) DIV_ROUND_UP(clkcnt, sp->spi_freq / 1000000)
+struct mtk_nor_caps {
+ u8 dma_bits;
+
+ /* extra_dummy_bit is adding for the IP of new SoCs.
+ * Some new SoCs modify the timing of fetching registers' values
+ * and IDs of nor flash, they need a extra_dummy_bit which can add
+ * more clock cycles for fetching data.
+ */
+ u8 extra_dummy_bit;
+};
+
struct mtk_nor {
struct spi_controller *ctlr;
struct device *dev;
@@ -104,11 +115,13 @@ struct mtk_nor {
struct clk *spi_clk;
struct clk *ctlr_clk;
struct clk *axi_clk;
+ struct clk *axi_s_clk;
unsigned int spi_freq;
bool wbuf_en;
bool has_irq;
bool high_dma;
struct completion op_done;
+ const struct mtk_nor_caps *caps;
};
static inline void mtk_nor_rmw(struct mtk_nor *sp, u32 reg, u32 set, u32 clr)
@@ -554,7 +567,12 @@ static int mtk_nor_spi_mem_prg(struct mtk_nor *sp, const struct spi_mem_op *op)
}
// trigger op
- writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+ if (rx_len)
+ writel(prg_len * BITS_PER_BYTE + sp->caps->extra_dummy_bit,
+ sp->base + MTK_NOR_REG_PRG_CNT);
+ else
+ writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+
ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM,
prg_len * BITS_PER_BYTE);
if (ret)
@@ -674,6 +692,7 @@ static void mtk_nor_disable_clk(struct mtk_nor *sp)
clk_disable_unprepare(sp->spi_clk);
clk_disable_unprepare(sp->ctlr_clk);
clk_disable_unprepare(sp->axi_clk);
+ clk_disable_unprepare(sp->axi_s_clk);
}
static int mtk_nor_enable_clk(struct mtk_nor *sp)
@@ -697,6 +716,14 @@ static int mtk_nor_enable_clk(struct mtk_nor *sp)
return ret;
}
+ ret = clk_prepare_enable(sp->axi_s_clk);
+ if (ret) {
+ clk_disable_unprepare(sp->spi_clk);
+ clk_disable_unprepare(sp->ctlr_clk);
+ clk_disable_unprepare(sp->axi_clk);
+ return ret;
+ }
+
return 0;
}
@@ -743,9 +770,25 @@ static const struct spi_controller_mem_ops mtk_nor_mem_ops = {
.exec_op = mtk_nor_exec_op
};
+static const struct mtk_nor_caps mtk_nor_caps_mt8173 = {
+ .dma_bits = 32,
+ .extra_dummy_bit = 0,
+};
+
+static const struct mtk_nor_caps mtk_nor_caps_mt8186 = {
+ .dma_bits = 32,
+ .extra_dummy_bit = 1,
+};
+
+static const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
+ .dma_bits = 36,
+ .extra_dummy_bit = 0,
+};
+
static const struct of_device_id mtk_nor_match[] = {
- { .compatible = "mediatek,mt8192-nor", .data = (void *)36 },
- { .compatible = "mediatek,mt8173-nor", .data = (void *)32 },
+ { .compatible = "mediatek,mt8173-nor", .data = &mtk_nor_caps_mt8173 },
+ { .compatible = "mediatek,mt8186-nor", .data = &mtk_nor_caps_mt8186 },
+ { .compatible = "mediatek,mt8192-nor", .data = &mtk_nor_caps_mt8192 },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, mtk_nor_match);
@@ -754,10 +797,10 @@ static int mtk_nor_probe(struct platform_device *pdev)
{
struct spi_controller *ctlr;
struct mtk_nor *sp;
+ struct mtk_nor_caps *caps;
void __iomem *base;
- struct clk *spi_clk, *ctlr_clk, *axi_clk;
+ struct clk *spi_clk, *ctlr_clk, *axi_clk, *axi_s_clk;
int ret, irq;
- unsigned long dma_bits;
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
@@ -775,10 +818,16 @@ static int mtk_nor_probe(struct platform_device *pdev)
if (IS_ERR(axi_clk))
return PTR_ERR(axi_clk);
- dma_bits = (unsigned long)of_device_get_match_data(&pdev->dev);
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_bits))) {
- dev_err(&pdev->dev, "failed to set dma mask(%lu)\n", dma_bits);
- return -EINVAL;
+ axi_s_clk = devm_clk_get_optional(&pdev->dev, "axi_s");
+ if (IS_ERR(axi_s_clk))
+ return PTR_ERR(axi_s_clk);
+
+ caps = (struct mtk_nor_caps *)of_device_get_match_data(&pdev->dev);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(caps->dma_bits));
+ if (ret) {
+ dev_err(&pdev->dev, "failed to set dma mask(%u)\n", caps->dma_bits);
+ return ret;
}
ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp));
@@ -808,7 +857,9 @@ static int mtk_nor_probe(struct platform_device *pdev)
sp->spi_clk = spi_clk;
sp->ctlr_clk = ctlr_clk;
sp->axi_clk = axi_clk;
- sp->high_dma = (dma_bits > 32);
+ sp->axi_s_clk = axi_s_clk;
+ sp->caps = caps;
+ sp->high_dma = caps->dma_bits > 32;
sp->buffer = dmam_alloc_coherent(&pdev->dev,
MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN,
&sp->buffer_dma, GFP_KERNEL);
diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index b62471ab6d7f..ba67dbed9fb8 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -201,7 +201,7 @@ struct fiu_data {
int fiu_max;
};
-static const struct npcm_fiu_info npxm7xx_fiu_info[] = {
+static const struct npcm_fiu_info npcm7xx_fiu_info[] = {
{.name = "FIU0", .fiu_id = FIU0,
.max_map_size = MAP_SIZE_128MB, .max_cs = 2},
{.name = "FIU3", .fiu_id = FIU3,
@@ -209,8 +209,8 @@ static const struct npcm_fiu_info npxm7xx_fiu_info[] = {
{.name = "FIUX", .fiu_id = FIUX,
.max_map_size = MAP_SIZE_16MB, .max_cs = 2} };
-static const struct fiu_data npxm7xx_fiu_data = {
- .npcm_fiu_data_info = npxm7xx_fiu_info,
+static const struct fiu_data npcm7xx_fiu_data = {
+ .npcm_fiu_data_info = npcm7xx_fiu_info,
.fiu_max = 3,
};
@@ -664,14 +664,13 @@ static const struct spi_controller_mem_ops npcm_fiu_mem_ops = {
};
static const struct of_device_id npcm_fiu_dt_ids[] = {
- { .compatible = "nuvoton,npcm750-fiu", .data = &npxm7xx_fiu_data },
+ { .compatible = "nuvoton,npcm750-fiu", .data = &npcm7xx_fiu_data },
{ /* sentinel */ }
};
static int npcm_fiu_probe(struct platform_device *pdev)
{
const struct fiu_data *fiu_data_match;
- const struct of_device_id *match;
struct device *dev = &pdev->dev;
struct spi_controller *ctrl;
struct npcm_fiu_spi *fiu;
@@ -685,13 +684,12 @@ static int npcm_fiu_probe(struct platform_device *pdev)
fiu = spi_controller_get_devdata(ctrl);
- match = of_match_device(npcm_fiu_dt_ids, dev);
- if (!match || !match->data) {
+ fiu_data_match = of_device_get_match_data(dev);
+ if (!fiu_data_match) {
dev_err(dev, "No compatible OF match\n");
return -ENODEV;
}
- fiu_data_match = match->data;
id = of_alias_get_id(dev->of_node, "fiu");
if (id < 0 || id >= fiu_data_match->fiu_max) {
dev_err(dev, "Invalid platform device id: %d\n", id);
diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index f86433b29260..7e5c09a7d489 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -591,18 +591,16 @@ static int pic32_spi_setup(struct spi_device *spi)
* unreliable/erroneous SPI transactions.
* To avoid that we will always handle /CS by toggling GPIO.
*/
- if (!gpio_is_valid(spi->cs_gpio))
+ if (!spi->cs_gpiod)
return -EINVAL;
- gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-
return 0;
}
static void pic32_spi_cleanup(struct spi_device *spi)
{
- /* de-activate cs-gpio */
- gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
+ /* de-activate cs-gpio, gpiolib will handle inversion */
+ gpiod_direction_output(spi->cs_gpiod, 0);
}
static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
@@ -784,6 +782,7 @@ static int pic32_spi_probe(struct platform_device *pdev)
master->unprepare_message = pic32_spi_unprepare_message;
master->prepare_transfer_hardware = pic32_spi_prepare_hardware;
master->unprepare_transfer_hardware = pic32_spi_unprepare_hardware;
+ master->use_gpio_descriptors = true;
/* optional DMA support */
ret = pic32_spi_dma_prep(pic32s, &pdev->dev);
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 2e134eb4bd2c..861b21c63504 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -15,32 +15,20 @@
#include <linux/dmaengine.h>
#include <linux/platform_data/dma-dw.h>
-enum {
- PORT_QUARK_X1000,
- PORT_BYT,
- PORT_MRFLD,
- PORT_BSW0,
- PORT_BSW1,
- PORT_BSW2,
- PORT_CE4100,
- PORT_LPT0,
- PORT_LPT1,
-};
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000 0x0935
+#define PCI_DEVICE_ID_INTEL_BYT 0x0f0e
+#define PCI_DEVICE_ID_INTEL_MRFLD 0x1194
+#define PCI_DEVICE_ID_INTEL_BSW0 0x228e
+#define PCI_DEVICE_ID_INTEL_BSW1 0x2290
+#define PCI_DEVICE_ID_INTEL_BSW2 0x22ac
+#define PCI_DEVICE_ID_INTEL_CE4100 0x2e6a
+#define PCI_DEVICE_ID_INTEL_LPT0_0 0x9c65
+#define PCI_DEVICE_ID_INTEL_LPT0_1 0x9c66
+#define PCI_DEVICE_ID_INTEL_LPT1_0 0x9ce5
+#define PCI_DEVICE_ID_INTEL_LPT1_1 0x9ce6
struct pxa_spi_info {
- enum pxa_ssp_type type;
- int port_id;
- int num_chipselect;
- unsigned long max_clk_rate;
-
- /* DMA channel request parameters */
- bool (*dma_filter)(struct dma_chan *chan, void *param);
- void *tx_param;
- void *rx_param;
-
- int dma_burst_size;
-
- int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c);
+ int (*setup)(struct pci_dev *pdev, struct pxa2xx_spi_controller *c);
};
static struct dw_dma_slave byt_tx_param = { .dst_id = 0 };
@@ -65,6 +53,24 @@ static struct dw_dma_slave lpt1_rx_param = { .src_id = 1 };
static struct dw_dma_slave lpt0_tx_param = { .dst_id = 2 };
static struct dw_dma_slave lpt0_rx_param = { .src_id = 3 };
+static void pxa2xx_spi_pci_clk_unregister(void *clk)
+{
+ clk_unregister(clk);
+}
+
+static int pxa2xx_spi_pci_clk_register(struct pci_dev *dev, struct ssp_device *ssp,
+ unsigned long rate)
+{
+ char buf[40];
+
+ snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id);
+ ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0, rate);
+ if (IS_ERR(ssp->clk))
+ return PTR_ERR(ssp->clk);
+
+ return devm_add_action_or_reset(&dev->dev, pxa2xx_spi_pci_clk_unregister, ssp->clk);
+}
+
static bool lpss_dma_filter(struct dma_chan *chan, void *param)
{
struct dw_dma_slave *dws = param;
@@ -76,55 +82,131 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param)
return true;
}
-static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static void lpss_dma_put_device(void *dma_dev)
{
+ pci_dev_put(dma_dev);
+}
+
+static int lpss_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
+{
+ struct ssp_device *ssp = &c->ssp;
+ struct dw_dma_slave *tx, *rx;
struct pci_dev *dma_dev;
+ int ret;
- c->num_chipselect = 1;
- c->max_clk_rate = 50000000;
+ switch (dev->device) {
+ case PCI_DEVICE_ID_INTEL_BYT:
+ ssp->type = LPSS_BYT_SSP;
+ ssp->port_id = 0;
+ c->tx_param = &byt_tx_param;
+ c->rx_param = &byt_rx_param;
+ break;
+ case PCI_DEVICE_ID_INTEL_BSW0:
+ ssp->type = LPSS_BSW_SSP;
+ ssp->port_id = 0;
+ c->tx_param = &bsw0_tx_param;
+ c->rx_param = &bsw0_rx_param;
+ break;
+ case PCI_DEVICE_ID_INTEL_BSW1:
+ ssp->type = LPSS_BSW_SSP;
+ ssp->port_id = 1;
+ c->tx_param = &bsw1_tx_param;
+ c->rx_param = &bsw1_rx_param;
+ break;
+ case PCI_DEVICE_ID_INTEL_BSW2:
+ ssp->type = LPSS_BSW_SSP;
+ ssp->port_id = 2;
+ c->tx_param = &bsw2_tx_param;
+ c->rx_param = &bsw2_rx_param;
+ break;
+ case PCI_DEVICE_ID_INTEL_LPT0_0:
+ case PCI_DEVICE_ID_INTEL_LPT1_0:
+ ssp->type = LPSS_LPT_SSP;
+ ssp->port_id = 0;
+ c->tx_param = &lpt0_tx_param;
+ c->rx_param = &lpt0_rx_param;
+ break;
+ case PCI_DEVICE_ID_INTEL_LPT0_1:
+ case PCI_DEVICE_ID_INTEL_LPT1_1:
+ ssp->type = LPSS_LPT_SSP;
+ ssp->port_id = 1;
+ c->tx_param = &lpt1_tx_param;
+ c->rx_param = &lpt1_rx_param;
+ break;
+ default:
+ return -ENODEV;
+ }
- dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ c->num_chipselect = 1;
- if (c->tx_param) {
- struct dw_dma_slave *slave = c->tx_param;
+ ret = pxa2xx_spi_pci_clk_register(dev, ssp, 50000000);
+ if (ret)
+ return ret;
- slave->dma_dev = &dma_dev->dev;
- slave->m_master = 0;
- slave->p_master = 1;
- }
+ dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
+ if (ret)
+ return ret;
- if (c->rx_param) {
- struct dw_dma_slave *slave = c->rx_param;
+ tx = c->tx_param;
+ tx->dma_dev = &dma_dev->dev;
+ tx->m_master = 0;
+ tx->p_master = 1;
- slave->dma_dev = &dma_dev->dev;
- slave->m_master = 0;
- slave->p_master = 1;
- }
+ rx = c->rx_param;
+ rx->dma_dev = &dma_dev->dev;
+ rx->m_master = 0;
+ rx->p_master = 1;
c->dma_filter = lpss_dma_filter;
+ c->dma_burst_size = 1;
+ c->enable_dma = 1;
return 0;
}
-static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static const struct pxa_spi_info lpss_info_config = {
+ .setup = lpss_spi_setup,
+};
+
+static int ce4100_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
{
- struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
+ struct ssp_device *ssp = &c->ssp;
+
+ ssp->type = PXA25x_SSP;
+ ssp->port_id = dev->devfn;
+ c->num_chipselect = dev->devfn;
+
+ return pxa2xx_spi_pci_clk_register(dev, ssp, 3686400);
+}
+
+static const struct pxa_spi_info ce4100_info_config = {
+ .setup = ce4100_spi_setup,
+};
+
+static int mrfld_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
+{
+ struct ssp_device *ssp = &c->ssp;
struct dw_dma_slave *tx, *rx;
+ struct pci_dev *dma_dev;
+ int ret;
+
+ ssp->type = MRFLD_SSP;
switch (PCI_FUNC(dev->devfn)) {
case 0:
- c->port_id = 3;
+ ssp->port_id = 3;
c->num_chipselect = 1;
c->tx_param = &mrfld3_tx_param;
c->rx_param = &mrfld3_rx_param;
break;
case 1:
- c->port_id = 5;
+ ssp->port_id = 5;
c->num_chipselect = 4;
c->tx_param = &mrfld5_tx_param;
c->rx_param = &mrfld5_rx_param;
break;
case 2:
- c->port_id = 6;
+ ssp->port_id = 6;
c->num_chipselect = 1;
c->tx_param = &mrfld6_tx_param;
c->rx_param = &mrfld6_rx_param;
@@ -133,6 +215,15 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
return -ENODEV;
}
+ ret = pxa2xx_spi_pci_clk_register(dev, ssp, 25000000);
+ if (ret)
+ return ret;
+
+ dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
+ ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
+ if (ret)
+ return ret;
+
tx = c->tx_param;
tx->dma_dev = &dma_dev->dev;
@@ -141,81 +232,38 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
c->dma_filter = lpss_dma_filter;
c->dma_burst_size = 8;
+ c->enable_dma = 1;
return 0;
}
-static struct pxa_spi_info spi_info_configs[] = {
- [PORT_CE4100] = {
- .type = PXA25x_SSP,
- .port_id = -1,
- .num_chipselect = -1,
- .max_clk_rate = 3686400,
- },
- [PORT_BYT] = {
- .type = LPSS_BYT_SSP,
- .port_id = 0,
- .setup = lpss_spi_setup,
- .tx_param = &byt_tx_param,
- .rx_param = &byt_rx_param,
- },
- [PORT_BSW0] = {
- .type = LPSS_BSW_SSP,
- .port_id = 0,
- .setup = lpss_spi_setup,
- .tx_param = &bsw0_tx_param,
- .rx_param = &bsw0_rx_param,
- },
- [PORT_BSW1] = {
- .type = LPSS_BSW_SSP,
- .port_id = 1,
- .setup = lpss_spi_setup,
- .tx_param = &bsw1_tx_param,
- .rx_param = &bsw1_rx_param,
- },
- [PORT_BSW2] = {
- .type = LPSS_BSW_SSP,
- .port_id = 2,
- .setup = lpss_spi_setup,
- .tx_param = &bsw2_tx_param,
- .rx_param = &bsw2_rx_param,
- },
- [PORT_MRFLD] = {
- .type = MRFLD_SSP,
- .max_clk_rate = 25000000,
- .setup = mrfld_spi_setup,
- },
- [PORT_QUARK_X1000] = {
- .type = QUARK_X1000_SSP,
- .port_id = -1,
- .num_chipselect = 1,
- .max_clk_rate = 50000000,
- },
- [PORT_LPT0] = {
- .type = LPSS_LPT_SSP,
- .port_id = 0,
- .setup = lpss_spi_setup,
- .tx_param = &lpt0_tx_param,
- .rx_param = &lpt0_rx_param,
- },
- [PORT_LPT1] = {
- .type = LPSS_LPT_SSP,
- .port_id = 1,
- .setup = lpss_spi_setup,
- .tx_param = &lpt1_tx_param,
- .rx_param = &lpt1_rx_param,
- },
+static const struct pxa_spi_info mrfld_info_config = {
+ .setup = mrfld_spi_setup,
+};
+
+static int qrk_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
+{
+ struct ssp_device *ssp = &c->ssp;
+
+ ssp->type = QUARK_X1000_SSP;
+ ssp->port_id = dev->devfn;
+ c->num_chipselect = 1;
+
+ return pxa2xx_spi_pci_clk_register(dev, ssp, 50000000);
+}
+
+static const struct pxa_spi_info qrk_info_config = {
+ .setup = qrk_spi_setup,
};
static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
const struct pci_device_id *ent)
{
+ const struct pxa_spi_info *info;
struct platform_device_info pi;
int ret;
struct platform_device *pdev;
struct pxa2xx_spi_controller spi_pdata;
struct ssp_device *ssp;
- struct pxa_spi_info *c;
- char buf[40];
ret = pcim_enable_device(dev);
if (ret)
@@ -225,27 +273,17 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
if (ret)
return ret;
- c = &spi_info_configs[ent->driver_data];
- if (c->setup) {
- ret = c->setup(dev, c);
- if (ret)
- return ret;
- }
-
memset(&spi_pdata, 0, sizeof(spi_pdata));
- spi_pdata.num_chipselect = (c->num_chipselect > 0) ? c->num_chipselect : dev->devfn;
- spi_pdata.dma_filter = c->dma_filter;
- spi_pdata.tx_param = c->tx_param;
- spi_pdata.rx_param = c->rx_param;
- spi_pdata.enable_dma = c->rx_param && c->tx_param;
- spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1;
ssp = &spi_pdata.ssp;
ssp->dev = &dev->dev;
ssp->phys_base = pci_resource_start(dev, 0);
ssp->mmio_base = pcim_iomap_table(dev)[0];
- ssp->port_id = (c->port_id >= 0) ? c->port_id : dev->devfn;
- ssp->type = c->type;
+
+ info = (struct pxa_spi_info *)ent->driver_data;
+ ret = info->setup(dev, &spi_pdata);
+ if (ret)
+ return ret;
pci_set_master(dev);
@@ -254,14 +292,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
return ret;
ssp->irq = pci_irq_vector(dev, 0);
- snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id);
- ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0,
- c->max_clk_rate);
- if (IS_ERR(ssp->clk))
- return PTR_ERR(ssp->clk);
-
memset(&pi, 0, sizeof(pi));
- pi.fwnode = dev->dev.fwnode;
+ pi.fwnode = dev_fwnode(&dev->dev);
pi.parent = &dev->dev;
pi.name = "pxa2xx-spi";
pi.id = ssp->port_id;
@@ -269,10 +301,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
pi.size_data = sizeof(spi_pdata);
pdev = platform_device_register_full(&pi);
- if (IS_ERR(pdev)) {
- clk_unregister(ssp->clk);
+ if (IS_ERR(pdev))
return PTR_ERR(pdev);
- }
pci_set_drvdata(dev, pdev);
@@ -282,26 +312,22 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
{
struct platform_device *pdev = pci_get_drvdata(dev);
- struct pxa2xx_spi_controller *spi_pdata;
-
- spi_pdata = dev_get_platdata(&pdev->dev);
platform_device_unregister(pdev);
- clk_unregister(spi_pdata->ssp.clk);
}
static const struct pci_device_id pxa2xx_spi_pci_devices[] = {
- { PCI_VDEVICE(INTEL, 0x0935), PORT_QUARK_X1000 },
- { PCI_VDEVICE(INTEL, 0x0f0e), PORT_BYT },
- { PCI_VDEVICE(INTEL, 0x1194), PORT_MRFLD },
- { PCI_VDEVICE(INTEL, 0x228e), PORT_BSW0 },
- { PCI_VDEVICE(INTEL, 0x2290), PORT_BSW1 },
- { PCI_VDEVICE(INTEL, 0x22ac), PORT_BSW2 },
- { PCI_VDEVICE(INTEL, 0x2e6a), PORT_CE4100 },
- { PCI_VDEVICE(INTEL, 0x9c65), PORT_LPT0 },
- { PCI_VDEVICE(INTEL, 0x9c66), PORT_LPT1 },
- { PCI_VDEVICE(INTEL, 0x9ce5), PORT_LPT0 },
- { PCI_VDEVICE(INTEL, 0x9ce6), PORT_LPT1 },
+ { PCI_DEVICE_DATA(INTEL, QUARK_X1000, &qrk_info_config) },
+ { PCI_DEVICE_DATA(INTEL, BYT, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, MRFLD, &mrfld_info_config) },
+ { PCI_DEVICE_DATA(INTEL, BSW0, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, BSW1, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, BSW2, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, CE4100, &ce4100_info_config) },
+ { PCI_DEVICE_DATA(INTEL, LPT0_0, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, LPT0_1, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, LPT1_0, &lpss_info_config) },
+ { PCI_DEVICE_DATA(INTEL, LPT1_1, &lpss_info_config) },
{ }
};
MODULE_DEVICE_TABLE(pci, pxa2xx_spi_pci_devices);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index e88f86274eeb..edb42d08857d 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -13,7 +13,6 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/gpio/consumer.h>
-#include <linux/gpio.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
@@ -1163,57 +1162,6 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
return 0;
}
-static void cleanup_cs(struct spi_device *spi)
-{
- if (!gpio_is_valid(spi->cs_gpio))
- return;
-
- gpio_free(spi->cs_gpio);
- spi->cs_gpio = -ENOENT;
-}
-
-static int setup_cs(struct spi_device *spi, struct chip_data *chip,
- struct pxa2xx_spi_chip *chip_info)
-{
- struct driver_data *drv_data = spi_controller_get_devdata(spi->controller);
-
- if (chip == NULL)
- return 0;
-
- if (chip_info == NULL)
- return 0;
-
- if (drv_data->ssp_type == CE4100_SSP)
- return 0;
-
- /*
- * NOTE: setup() can be called multiple times, possibly with
- * different chip_info, release previously requested GPIO.
- */
- cleanup_cs(spi);
-
- if (gpio_is_valid(chip_info->gpio_cs)) {
- int gpio = chip_info->gpio_cs;
- int err;
-
- err = gpio_request(gpio, "SPI_CS");
- if (err) {
- dev_err(&spi->dev, "failed to request chip select GPIO%d\n", gpio);
- return err;
- }
-
- err = gpio_direction_output(gpio, !(spi->mode & SPI_CS_HIGH));
- if (err) {
- gpio_free(gpio);
- return err;
- }
-
- spi->cs_gpio = gpio;
- }
-
- return 0;
-}
-
static int setup(struct spi_device *spi)
{
struct pxa2xx_spi_chip *chip_info;
@@ -1222,7 +1170,6 @@ static int setup(struct spi_device *spi)
struct driver_data *drv_data =
spi_controller_get_devdata(spi->controller);
uint tx_thres, tx_hi_thres, rx_thres;
- int err;
switch (drv_data->ssp_type) {
case QUARK_X1000_SSP:
@@ -1365,21 +1312,13 @@ static int setup(struct spi_device *spi)
spi_set_ctldata(spi, chip);
- if (drv_data->ssp_type == CE4100_SSP)
- return 0;
-
- err = setup_cs(spi, chip, chip_info);
- if (err)
- kfree(chip);
-
- return err;
+ return 0;
}
static void cleanup(struct spi_device *spi)
{
struct chip_data *chip = spi_get_ctldata(spi);
- cleanup_cs(spi);
kfree(chip);
}
@@ -1455,6 +1394,11 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
{ PCI_VDEVICE(INTEL, 0x5ac6), LPSS_BXT_SSP },
+ /* RPL-S */
+ { PCI_VDEVICE(INTEL, 0x7a2a), LPSS_CNL_SSP },
+ { PCI_VDEVICE(INTEL, 0x7a2b), LPSS_CNL_SSP },
+ { PCI_VDEVICE(INTEL, 0x7a79), LPSS_CNL_SSP },
+ { PCI_VDEVICE(INTEL, 0x7a7b), LPSS_CNL_SSP },
/* ADL-S */
{ PCI_VDEVICE(INTEL, 0x7aaa), LPSS_CNL_SSP },
{ PCI_VDEVICE(INTEL, 0x7aab), LPSS_CNL_SSP },
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index d39dec6d1c91..00d6084306b4 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -593,7 +593,6 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
{
struct spi_qup *controller = dev_id;
u32 opflags, qup_err, spi_err;
- unsigned long flags;
int error = 0;
qup_err = readl_relaxed(controller->base + QUP_ERROR_FLAGS);
@@ -625,10 +624,10 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
error = -EIO;
}
- spin_lock_irqsave(&controller->lock, flags);
+ spin_lock(&controller->lock);
if (!controller->error)
controller->error = error;
- spin_unlock_irqrestore(&controller->lock, flags);
+ spin_unlock(&controller->lock);
if (spi_qup_is_dma_xfer(controller->mode)) {
writel_relaxed(opflags, controller->base + QUP_OPERATIONAL);
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index a46b38544027..bd87d3c92dd3 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -624,10 +624,8 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
/* Find the irq */
ret = platform_get_irq(pdev, 0);
- if (ret < 0) {
- dev_err(dev, "Failed to get the irq\n");
+ if (ret < 0)
goto err_irq;
- }
ret = devm_request_irq(dev, ret, rockchip_sfc_irq_handler,
0, pdev->name, sfc);
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 553b6b9d0222..cdc16eecaf6b 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -133,7 +133,8 @@
#define INT_TF_OVERFLOW (1 << 1)
#define INT_RF_UNDERFLOW (1 << 2)
#define INT_RF_OVERFLOW (1 << 3)
-#define INT_RF_FULL (1 << 4)
+#define INT_RF_FULL (1 << 4)
+#define INT_CS_INACTIVE (1 << 6)
/* Bit fields in ICR, 4bit */
#define ICR_MASK 0x0f
@@ -194,6 +195,8 @@ struct rockchip_spi {
bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
bool slave_abort;
+ bool cs_inactive; /* spi slave tansmition stop when cs inactive */
+ struct spi_transfer *xfer; /* Store xfer temporarily */
};
static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
@@ -275,8 +278,9 @@ static void rockchip_spi_handle_err(struct spi_controller *ctlr,
*/
spi_enable_chip(rs, false);
- /* make sure all interrupts are masked */
+ /* make sure all interrupts are masked and status cleared */
writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
if (atomic_read(&rs->state) & TXDMA)
dmaengine_terminate_async(ctlr->dma_tx);
@@ -343,6 +347,15 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
struct spi_controller *ctlr = dev_id;
struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+ /* When int_cs_inactive comes, spi slave abort */
+ if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) {
+ ctlr->slave_abort(ctlr);
+ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
+
+ return IRQ_HANDLED;
+ }
+
if (rs->tx_left)
rockchip_spi_pio_writer(rs);
@@ -350,6 +363,7 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
if (!rs->rx_left) {
spi_enable_chip(rs, false);
writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
spi_finalize_current_transfer(ctlr);
}
@@ -357,14 +371,18 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
}
static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
- struct spi_transfer *xfer)
+ struct spi_controller *ctlr,
+ struct spi_transfer *xfer)
{
rs->tx = xfer->tx_buf;
rs->rx = xfer->rx_buf;
rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
rs->rx_left = xfer->len / rs->n_bytes;
- writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+ if (rs->cs_inactive)
+ writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
+ else
+ writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
spi_enable_chip(rs, true);
if (rs->tx_left)
@@ -383,6 +401,9 @@ static void rockchip_spi_dma_rxcb(void *data)
if (state & TXDMA && !rs->slave_abort)
return;
+ if (rs->cs_inactive)
+ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+
spi_enable_chip(rs, false);
spi_finalize_current_transfer(ctlr);
}
@@ -423,14 +444,16 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
atomic_set(&rs->state, 0);
+ rs->tx = xfer->tx_buf;
+ rs->rx = xfer->rx_buf;
+
rxdesc = NULL;
if (xfer->rx_buf) {
struct dma_slave_config rxconf = {
.direction = DMA_DEV_TO_MEM,
.src_addr = rs->dma_addr_rx,
.src_addr_width = rs->n_bytes,
- .src_maxburst = rockchip_spi_calc_burst_size(xfer->len /
- rs->n_bytes),
+ .src_maxburst = rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes),
};
dmaengine_slave_config(ctlr->dma_rx, &rxconf);
@@ -474,10 +497,13 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
/* rx must be started before tx due to spi instinct */
if (rxdesc) {
atomic_or(RXDMA, &rs->state);
- dmaengine_submit(rxdesc);
+ ctlr->dma_rx->cookie = dmaengine_submit(rxdesc);
dma_async_issue_pending(ctlr->dma_rx);
}
+ if (rs->cs_inactive)
+ writel_relaxed(INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
+
spi_enable_chip(rs, true);
if (txdesc) {
@@ -584,7 +610,48 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
{
struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+ u32 rx_fifo_left;
+ struct dma_tx_state state;
+ enum dma_status status;
+
+ /* Get current dma rx point */
+ if (atomic_read(&rs->state) & RXDMA) {
+ dmaengine_pause(ctlr->dma_rx);
+ status = dmaengine_tx_status(ctlr->dma_rx, ctlr->dma_rx->cookie, &state);
+ if (status == DMA_ERROR) {
+ rs->rx = rs->xfer->rx_buf;
+ rs->xfer->len = 0;
+ rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+ for (; rx_fifo_left; rx_fifo_left--)
+ readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+ goto out;
+ } else {
+ rs->rx += rs->xfer->len - rs->n_bytes * state.residue;
+ }
+ }
+
+ /* Get the valid data left in rx fifo and set rs->xfer->len real rx size */
+ if (rs->rx) {
+ rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+ for (; rx_fifo_left; rx_fifo_left--) {
+ u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+
+ if (rs->n_bytes == 1)
+ *(u8 *)rs->rx = (u8)rxw;
+ else
+ *(u16 *)rs->rx = (u16)rxw;
+ rs->rx += rs->n_bytes;
+ }
+ rs->xfer->len = (unsigned int)(rs->rx - rs->xfer->rx_buf);
+ }
+out:
+ if (atomic_read(&rs->state) & RXDMA)
+ dmaengine_terminate_sync(ctlr->dma_rx);
+ if (atomic_read(&rs->state) & TXDMA)
+ dmaengine_terminate_sync(ctlr->dma_tx);
+ atomic_set(&rs->state, 0);
+ spi_enable_chip(rs, false);
rs->slave_abort = true;
spi_finalize_current_transfer(ctlr);
@@ -620,7 +687,7 @@ static int rockchip_spi_transfer_one(
}
rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
-
+ rs->xfer = xfer;
use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
@@ -630,7 +697,7 @@ static int rockchip_spi_transfer_one(
if (use_dma)
return rockchip_spi_prepare_dma(rs, ctlr, xfer);
- return rockchip_spi_prepare_irq(rs, xfer);
+ return rockchip_spi_prepare_irq(rs, ctlr, xfer);
}
static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
@@ -647,6 +714,29 @@ static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
return xfer->len / bytes_per_word >= rs->fifo_len;
}
+static int rockchip_spi_setup(struct spi_device *spi)
+{
+ struct rockchip_spi *rs = spi_controller_get_devdata(spi->controller);
+ u32 cr0;
+
+ pm_runtime_get_sync(rs->dev);
+
+ cr0 = readl_relaxed(rs->regs + ROCKCHIP_SPI_CTRLR0);
+
+ cr0 &= ~(0x3 << CR0_SCPH_OFFSET);
+ cr0 |= ((spi->mode & 0x3) << CR0_SCPH_OFFSET);
+ if (spi->mode & SPI_CS_HIGH && spi->chip_select <= 1)
+ cr0 |= BIT(spi->chip_select) << CR0_SOI_OFFSET;
+ else if (spi->chip_select <= 1)
+ cr0 &= ~(BIT(spi->chip_select) << CR0_SOI_OFFSET);
+
+ writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
+
+ pm_runtime_put(rs->dev);
+
+ return 0;
+}
+
static int rockchip_spi_probe(struct platform_device *pdev)
{
int ret;
@@ -654,7 +744,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
struct spi_controller *ctlr;
struct resource *mem;
struct device_node *np = pdev->dev.of_node;
- u32 rsd_nsecs;
+ u32 rsd_nsecs, num_cs;
bool slave_mode;
slave_mode = of_property_read_bool(np, "spi-slave");
@@ -764,8 +854,9 @@ static int rockchip_spi_probe(struct platform_device *pdev)
* rk spi0 has two native cs, spi1..5 one cs only
* if num-cs is missing in the dts, default to 1
*/
- if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect))
- ctlr->num_chipselect = 1;
+ if (of_property_read_u32(np, "num-cs", &num_cs))
+ num_cs = 1;
+ ctlr->num_chipselect = num_cs;
ctlr->use_gpio_descriptors = true;
}
ctlr->dev.of_node = pdev->dev.of_node;
@@ -773,6 +864,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
+ ctlr->setup = rockchip_spi_setup;
ctlr->set_cs = rockchip_spi_set_cs;
ctlr->transfer_one = rockchip_spi_transfer_one;
ctlr->max_transfer_size = rockchip_spi_max_transfer_size;
@@ -808,8 +900,13 @@ static int rockchip_spi_probe(struct platform_device *pdev)
switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) {
case ROCKCHIP_SPI_VER2_TYPE2:
ctlr->mode_bits |= SPI_CS_HIGH;
+ if (ctlr->can_dma && slave_mode)
+ rs->cs_inactive = true;
+ else
+ rs->cs_inactive = false;
break;
default:
+ rs->cs_inactive = false;
break;
}
@@ -868,14 +965,14 @@ static int rockchip_spi_suspend(struct device *dev)
{
int ret;
struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
ret = spi_controller_suspend(ctlr);
if (ret < 0)
return ret;
- ret = pm_runtime_force_suspend(dev);
- if (ret < 0)
- return ret;
+ clk_disable_unprepare(rs->spiclk);
+ clk_disable_unprepare(rs->apb_pclk);
pinctrl_pm_select_sleep_state(dev);
@@ -890,10 +987,14 @@ static int rockchip_spi_resume(struct device *dev)
pinctrl_pm_select_default_state(dev);
- ret = pm_runtime_force_resume(dev);
+ ret = clk_prepare_enable(rs->apb_pclk);
if (ret < 0)
return ret;
+ ret = clk_prepare_enable(rs->spiclk);
+ if (ret < 0)
+ clk_disable_unprepare(rs->apb_pclk);
+
ret = spi_controller_resume(ctlr);
if (ret < 0) {
clk_disable_unprepare(rs->spiclk);
@@ -935,7 +1036,7 @@ static int rockchip_spi_runtime_resume(struct device *dev)
#endif /* CONFIG_PM */
static const struct dev_pm_ops rockchip_spi_pm = {
- SET_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)
SET_RUNTIME_PM_OPS(rockchip_spi_runtime_suspend,
rockchip_spi_runtime_resume, NULL)
};
diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index d6f51695ca5b..660aa866af06 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -12,7 +12,6 @@
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/io.h>
#include <linux/slab.h>
@@ -62,9 +61,6 @@ struct s3c24xx_spi {
unsigned char fiq_inuse;
unsigned char fiq_claimed;
- void (*set_cs)(struct s3c2410_spi_info *spi,
- int cs, int pol);
-
/* data buffers */
const unsigned char *tx;
unsigned char *rx;
@@ -84,29 +80,21 @@ static inline struct s3c24xx_spi *to_hw(struct spi_device *sdev)
return spi_master_get_devdata(sdev->master);
}
-static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol)
-{
- gpio_set_value(spi->pin_cs, pol);
-}
-
static void s3c24xx_spi_chipsel(struct spi_device *spi, int value)
{
struct s3c24xx_spi_devstate *cs = spi->controller_state;
struct s3c24xx_spi *hw = to_hw(spi);
- unsigned int cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
/* change the chipselect state and the state of the spi engine clock */
switch (value) {
case BITBANG_CS_INACTIVE:
- hw->set_cs(hw->pdata, spi->chip_select, cspol^1);
writeb(cs->spcon, hw->regs + S3C2410_SPCON);
break;
case BITBANG_CS_ACTIVE:
writeb(cs->spcon | S3C2410_SPCON_ENSCK,
hw->regs + S3C2410_SPCON);
- hw->set_cs(hw->pdata, spi->chip_select, cspol);
break;
}
}
@@ -452,14 +440,6 @@ static void s3c24xx_spi_initialsetup(struct s3c24xx_spi *hw)
writeb(0xff, hw->regs + S3C2410_SPPRE);
writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN);
writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON);
-
- if (hw->pdata) {
- if (hw->set_cs == s3c24xx_spi_gpiocs)
- gpio_direction_output(hw->pdata->pin_cs, 1);
-
- if (hw->pdata->gpio_setup)
- hw->pdata->gpio_setup(hw->pdata, 1);
- }
}
static int s3c24xx_spi_probe(struct platform_device *pdev)
@@ -502,6 +482,9 @@ static int s3c24xx_spi_probe(struct platform_device *pdev)
master->num_chipselect = hw->pdata->num_cs;
master->bus_num = pdata->bus_num;
master->bits_per_word_mask = SPI_BPW_MASK(8);
+ /* we need to call the local chipselect callback */
+ master->flags = SPI_MASTER_GPIO_SS;
+ master->use_gpio_descriptors = true;
/* setup the state for the bitbang driver */
@@ -541,27 +524,6 @@ static int s3c24xx_spi_probe(struct platform_device *pdev)
goto err_no_pdata;
}
- /* setup any gpio we can */
-
- if (!pdata->set_cs) {
- if (pdata->pin_cs < 0) {
- dev_err(&pdev->dev, "No chipselect pin\n");
- err = -EINVAL;
- goto err_register;
- }
-
- err = devm_gpio_request(&pdev->dev, pdata->pin_cs,
- dev_name(&pdev->dev));
- if (err) {
- dev_err(&pdev->dev, "Failed to get gpio for cs\n");
- goto err_register;
- }
-
- hw->set_cs = s3c24xx_spi_gpiocs;
- gpio_direction_output(pdata->pin_cs, 1);
- } else
- hw->set_cs = pdata->set_cs;
-
s3c24xx_spi_initialsetup(hw);
/* register our spi controller */
@@ -604,9 +566,6 @@ static int s3c24xx_spi_suspend(struct device *dev)
if (ret)
return ret;
- if (hw->pdata && hw->pdata->gpio_setup)
- hw->pdata->gpio_setup(hw->pdata, 0);
-
clk_disable(hw->clk);
return 0;
}
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 8755cd85e83c..c26440e9058d 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -13,10 +13,8 @@
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/spi/spi.h>
-#include <linux/gpio.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/of_gpio.h>
#include <linux/platform_data/spi-s3c64xx.h>
@@ -656,7 +654,11 @@ static int s3c64xx_spi_prepare_message(struct spi_master *master,
struct s3c64xx_spi_csinfo *cs = spi->controller_data;
/* Configure feedback delay */
- writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK);
+ if (!cs)
+ /* No delay if not defined */
+ writel(0, sdd->regs + S3C64XX_SPI_FB_CLK);
+ else
+ writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK);
return 0;
}
@@ -796,16 +798,14 @@ static struct s3c64xx_spi_csinfo *s3c64xx_get_slave_ctrldata(
return ERR_PTR(-EINVAL);
}
- data_np = of_get_child_by_name(slave_np, "controller-data");
- if (!data_np) {
- dev_err(&spi->dev, "child node 'controller-data' not found\n");
- return ERR_PTR(-EINVAL);
- }
-
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs) {
- of_node_put(data_np);
+ if (!cs)
return ERR_PTR(-ENOMEM);
+
+ data_np = of_get_child_by_name(slave_np, "controller-data");
+ if (!data_np) {
+ dev_info(&spi->dev, "feedback delay set to default (0)\n");
+ return cs;
}
of_property_read_u32(data_np, "samsung,spi-feedback-delay", &fb_delay);
@@ -830,34 +830,16 @@ static int s3c64xx_spi_setup(struct spi_device *spi)
if (spi->dev.of_node) {
cs = s3c64xx_get_slave_ctrldata(spi);
spi->controller_data = cs;
- } else if (cs) {
- /* On non-DT platforms the SPI core will set spi->cs_gpio
- * to -ENOENT. The GPIO pin used to drive the chip select
- * is defined by using platform data so spi->cs_gpio value
- * has to be override to have the proper GPIO pin number.
- */
- spi->cs_gpio = cs->line;
}
- if (IS_ERR_OR_NULL(cs)) {
+ /* NULL is fine, we just avoid using the FB delay (=0) */
+ if (IS_ERR(cs)) {
dev_err(&spi->dev, "No CS for SPI(%d)\n", spi->chip_select);
return -ENODEV;
}
- if (!spi_get_ctldata(spi)) {
- if (gpio_is_valid(spi->cs_gpio)) {
- err = gpio_request_one(spi->cs_gpio, GPIOF_OUT_INIT_HIGH,
- dev_name(&spi->dev));
- if (err) {
- dev_err(&spi->dev,
- "Failed to get /CS gpio [%d]: %d\n",
- spi->cs_gpio, err);
- goto err_gpio_req;
- }
- }
-
+ if (!spi_get_ctldata(spi))
spi_set_ctldata(spi, cs);
- }
pm_runtime_get_sync(&sdd->pdev->dev);
@@ -909,11 +891,9 @@ setup_exit:
/* setup() returns with device de-selected */
s3c64xx_spi_set_cs(spi, false);
- if (gpio_is_valid(spi->cs_gpio))
- gpio_free(spi->cs_gpio);
spi_set_ctldata(spi, NULL);
-err_gpio_req:
+ /* This was dynamically allocated on the DT path */
if (spi->dev.of_node)
kfree(cs);
@@ -924,19 +904,9 @@ static void s3c64xx_spi_cleanup(struct spi_device *spi)
{
struct s3c64xx_spi_csinfo *cs = spi_get_ctldata(spi);
- if (gpio_is_valid(spi->cs_gpio)) {
- gpio_free(spi->cs_gpio);
- if (spi->dev.of_node)
- kfree(cs);
- else {
- /* On non-DT platforms, the SPI core sets
- * spi->cs_gpio to -ENOENT and .setup()
- * overrides it with the GPIO pin value
- * passed using platform data.
- */
- spi->cs_gpio = -ENOENT;
- }
- }
+ /* This was dynamically allocated on the DT path */
+ if (spi->dev.of_node)
+ kfree(cs);
spi_set_ctldata(spi, NULL);
}
@@ -1131,6 +1101,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
master->prepare_message = s3c64xx_spi_prepare_message;
master->transfer_one = s3c64xx_spi_transfer_one;
master->num_chipselect = sci->num_cs;
+ master->use_gpio_descriptors = true;
master->dma_alignment = 8;
master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
SPI_BPW_MASK(8);
@@ -1442,6 +1413,16 @@ static const struct s3c64xx_spi_port_config exynos5433_spi_port_config = {
.quirks = S3C64XX_SPI_QUIRK_CS_AUTO,
};
+static struct s3c64xx_spi_port_config fsd_spi_port_config = {
+ .fifo_lvl_mask = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f},
+ .rx_lvl_offset = 15,
+ .tx_st_done = 25,
+ .high_speed = true,
+ .clk_from_cmu = true,
+ .clk_ioclk = false,
+ .quirks = S3C64XX_SPI_QUIRK_CS_AUTO,
+};
+
static const struct platform_device_id s3c64xx_spi_driver_ids[] = {
{
.name = "s3c2443-spi",
@@ -1472,6 +1453,9 @@ static const struct of_device_id s3c64xx_spi_dt_match[] = {
{ .compatible = "samsung,exynos5433-spi",
.data = (void *)&exynos5433_spi_port_config,
},
+ { .compatible = "tesla,fsd-spi",
+ .data = (void *)&fsd_spi_port_config,
+ },
{ },
};
MODULE_DEVICE_TABLE(of, s3c64xx_spi_dt_match);
diff --git a/drivers/spi/spi-slave-system-control.c b/drivers/spi/spi-slave-system-control.c
index 169f3d595f60..d37cfe995a63 100644
--- a/drivers/spi/spi-slave-system-control.c
+++ b/drivers/spi/spi-slave-system-control.c
@@ -132,13 +132,12 @@ static int spi_slave_system_control_probe(struct spi_device *spi)
return 0;
}
-static int spi_slave_system_control_remove(struct spi_device *spi)
+static void spi_slave_system_control_remove(struct spi_device *spi)
{
struct spi_slave_system_control_priv *priv = spi_get_drvdata(spi);
spi_slave_abort(spi);
wait_for_completion(&priv->finished);
- return 0;
}
static struct spi_driver spi_slave_system_control_driver = {
diff --git a/drivers/spi/spi-slave-time.c b/drivers/spi/spi-slave-time.c
index f2e07a392d68..f56c1afb8534 100644
--- a/drivers/spi/spi-slave-time.c
+++ b/drivers/spi/spi-slave-time.c
@@ -106,13 +106,12 @@ static int spi_slave_time_probe(struct spi_device *spi)
return 0;
}
-static int spi_slave_time_remove(struct spi_device *spi)
+static void spi_slave_time_remove(struct spi_device *spi)
{
struct spi_slave_time_priv *priv = spi_get_drvdata(spi);
spi_slave_abort(spi);
wait_for_completion(&priv->finished);
- return 0;
}
static struct spi_driver spi_slave_time_driver = {
diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index 6c44dda9ee8c..843be803696b 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -17,7 +17,6 @@
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/pm_runtime.h>
#include <linux/spi/spi.h>
@@ -171,11 +170,6 @@ static int spi_st_transfer_one(struct spi_master *master,
return t->len;
}
-static void spi_st_cleanup(struct spi_device *spi)
-{
- gpio_free(spi->cs_gpio);
-}
-
/* the spi->mode bits understood by this driver: */
#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_LOOP | SPI_CS_HIGH)
static int spi_st_setup(struct spi_device *spi)
@@ -183,29 +177,17 @@ static int spi_st_setup(struct spi_device *spi)
struct spi_st *spi_st = spi_master_get_devdata(spi->master);
u32 spi_st_clk, sscbrg, var;
u32 hz = spi->max_speed_hz;
- int cs = spi->cs_gpio;
- int ret;
if (!hz) {
dev_err(&spi->dev, "max_speed_hz unspecified\n");
return -EINVAL;
}
- if (!gpio_is_valid(cs)) {
- dev_err(&spi->dev, "%d is not a valid gpio\n", cs);
+ if (!spi->cs_gpiod) {
+ dev_err(&spi->dev, "no valid gpio assigned\n");
return -EINVAL;
}
- ret = gpio_request(cs, dev_name(&spi->dev));
- if (ret) {
- dev_err(&spi->dev, "could not request gpio:%d\n", cs);
- return ret;
- }
-
- ret = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
- if (ret)
- goto out_free_gpio;
-
spi_st_clk = clk_get_rate(spi_st->clk);
/* Set SSC_BRF */
@@ -213,8 +195,7 @@ static int spi_st_setup(struct spi_device *spi)
if (sscbrg < 0x07 || sscbrg > BIT(16)) {
dev_err(&spi->dev,
"baudrate %d outside valid range %d\n", sscbrg, hz);
- ret = -EINVAL;
- goto out_free_gpio;
+ return -EINVAL;
}
spi_st->baud = spi_st_clk / (2 * sscbrg);
@@ -263,10 +244,6 @@ static int spi_st_setup(struct spi_device *spi)
readl_relaxed(spi_st->base + SSC_RBUF);
return 0;
-
-out_free_gpio:
- gpio_free(cs);
- return ret;
}
/* Interrupt fired when TX shift register becomes empty */
@@ -309,11 +286,11 @@ static int spi_st_probe(struct platform_device *pdev)
master->dev.of_node = np;
master->mode_bits = MODEBITS;
master->setup = spi_st_setup;
- master->cleanup = spi_st_cleanup;
master->transfer_one = spi_st_transfer_one;
master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
master->auto_runtime_pm = true;
master->bus_num = pdev->id;
+ master->use_gpio_descriptors = true;
spi_st = spi_master_get_devdata(master);
spi_st->clk = devm_clk_get(&pdev->dev, "ssc");
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 514337c86d2c..ffdc55f87e82 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -688,7 +688,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
struct resource *res;
int ret, irq;
- ctrl = spi_alloc_master(dev, sizeof(*qspi));
+ ctrl = devm_spi_alloc_master(dev, sizeof(*qspi));
if (!ctrl)
return -ENOMEM;
@@ -697,58 +697,46 @@ static int stm32_qspi_probe(struct platform_device *pdev)
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi");
qspi->io_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(qspi->io_base)) {
- ret = PTR_ERR(qspi->io_base);
- goto err_master_put;
- }
+ if (IS_ERR(qspi->io_base))
+ return PTR_ERR(qspi->io_base);
qspi->phys_base = res->start;
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm");
qspi->mm_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(qspi->mm_base)) {
- ret = PTR_ERR(qspi->mm_base);
- goto err_master_put;
- }
+ if (IS_ERR(qspi->mm_base))
+ return PTR_ERR(qspi->mm_base);
qspi->mm_size = resource_size(res);
- if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) {
- ret = -EINVAL;
- goto err_master_put;
- }
+ if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ)
+ return -EINVAL;
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- ret = irq;
- goto err_master_put;
- }
+ if (irq < 0)
+ return irq;
ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
dev_name(dev), qspi);
if (ret) {
dev_err(dev, "failed to request irq\n");
- goto err_master_put;
+ return ret;
}
init_completion(&qspi->data_completion);
init_completion(&qspi->match_completion);
qspi->clk = devm_clk_get(dev, NULL);
- if (IS_ERR(qspi->clk)) {
- ret = PTR_ERR(qspi->clk);
- goto err_master_put;
- }
+ if (IS_ERR(qspi->clk))
+ return PTR_ERR(qspi->clk);
qspi->clk_rate = clk_get_rate(qspi->clk);
- if (!qspi->clk_rate) {
- ret = -EINVAL;
- goto err_master_put;
- }
+ if (!qspi->clk_rate)
+ return -EINVAL;
ret = clk_prepare_enable(qspi->clk);
if (ret) {
dev_err(dev, "can not enable the clock\n");
- goto err_master_put;
+ return ret;
}
rstc = devm_reset_control_get_exclusive(dev, NULL);
@@ -784,7 +772,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
pm_runtime_get_noresume(dev);
- ret = devm_spi_register_master(dev, ctrl);
+ ret = spi_register_master(ctrl);
if (ret)
goto err_pm_runtime_free;
@@ -806,8 +794,6 @@ err_dma_free:
stm32_qspi_dma_free(qspi);
err_clk_disable:
clk_disable_unprepare(qspi->clk);
-err_master_put:
- spi_master_put(qspi->ctrl);
return ret;
}
@@ -817,6 +803,7 @@ static int stm32_qspi_remove(struct platform_device *pdev)
struct stm32_qspi *qspi = platform_get_drvdata(pdev);
pm_runtime_get_sync(qspi->dev);
+ spi_unregister_master(qspi->ctrl);
/* disable qspi */
writel_relaxed(0, qspi->io_base + QSPI_CR);
stm32_qspi_dma_free(qspi);
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 9bd3fd1652f7..a6adc20f6862 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -221,7 +221,6 @@ struct stm32_spi;
* time between frames (if driver has this functionality)
* @set_number_of_data: optional routine to configure registers to desired
* number of data (if driver has this functionality)
- * @can_dma: routine to determine if the transfer is eligible for DMA use
* @transfer_one_dma_start: routine to start transfer a single spi_transfer
* using DMA
* @dma_rx_cb: routine to call after DMA RX channel operation is complete
@@ -232,7 +231,7 @@ struct stm32_spi;
* @baud_rate_div_min: minimum baud rate divisor
* @baud_rate_div_max: maximum baud rate divisor
* @has_fifo: boolean to know if fifo is used for driver
- * @has_startbit: boolean to know if start bit is used to start transfer
+ * @flags: compatible specific SPI controller flags used at registration time
*/
struct stm32_spi_cfg {
const struct stm32_spi_regspec *regs;
@@ -253,6 +252,7 @@ struct stm32_spi_cfg {
unsigned int baud_rate_div_min;
unsigned int baud_rate_div_max;
bool has_fifo;
+ u16 flags;
};
/**
@@ -763,7 +763,7 @@ static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id)
if (!spi->cur_usedma && (spi->cur_comm == SPI_SIMPLEX_TX ||
spi->cur_comm == SPI_3WIRE_TX)) {
/* OVR flag shouldn't be handled for TX only mode */
- sr &= ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE;
+ sr &= ~(STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE);
mask |= STM32F4_SPI_SR_TXE;
}
@@ -1722,6 +1722,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = {
.baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN,
.baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX,
.has_fifo = false,
+ .flags = SPI_MASTER_MUST_TX,
};
static const struct stm32_spi_cfg stm32h7_spi_cfg = {
@@ -1854,7 +1855,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
master->prepare_message = stm32_spi_prepare_msg;
master->transfer_one = stm32_spi_transfer_one;
master->unprepare_message = stm32_spi_unprepare_msg;
- master->flags = SPI_MASTER_MUST_TX;
+ master->flags = spi->cfg->flags;
spi->dma_tx = dma_request_chan(spi->dev, "tx");
if (IS_ERR(spi->dma_tx)) {
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1fdfc6e6691d..6000d0761206 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -280,7 +280,7 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
* SPI_CLK = MOD_CLK / (2 ^ (cdr + 1))
* Or we can use CDR2, which is calculated with the formula:
* SPI_CLK = MOD_CLK / (2 * (cdr + 1))
- * Wether we use the former or the latter is set through the
+ * Whether we use the former or the latter is set through the
* DRS bit.
*
* First try CDR2, and if we can't reach the expected
diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
new file mode 100644
index 000000000000..f989f7b99296
--- /dev/null
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2021 Sunplus Inc.
+// Author: Li-hao Kuo <lhjeff911@gmail.com>
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/spi/spi.h>
+
+#define SP7021_DATA_RDY_REG 0x0044
+#define SP7021_SLAVE_DMA_CTRL_REG 0x0048
+#define SP7021_SLAVE_DMA_LENGTH_REG 0x004c
+#define SP7021_SLAVE_DMA_ADDR_REG 0x004c
+
+#define SP7021_SLAVE_DATA_RDY BIT(0)
+#define SP7021_SLAVE_SW_RST BIT(1)
+#define SP7021_SLA_DMA_W_INT BIT(8)
+#define SP7021_SLAVE_CLR_INT BIT(8)
+#define SP7021_SLAVE_DMA_EN BIT(0)
+#define SP7021_SLAVE_DMA_RW BIT(6)
+#define SP7021_SLAVE_DMA_CMD GENMASK(3, 2)
+
+#define SP7021_FIFO_REG 0x0034
+#define SP7021_SPI_STATUS_REG 0x0038
+#define SP7021_SPI_CONFIG_REG 0x003c
+#define SP7021_INT_BUSY_REG 0x004c
+#define SP7021_DMA_CTRL_REG 0x0050
+
+#define SP7021_SPI_START_FD BIT(0)
+#define SP7021_FD_SW_RST BIT(1)
+#define SP7021_TX_EMP_FLAG BIT(2)
+#define SP7021_RX_EMP_FLAG BIT(4)
+#define SP7021_RX_FULL_FLAG BIT(5)
+#define SP7021_FINISH_FLAG BIT(6)
+
+#define SP7021_TX_CNT_MASK GENMASK(11, 8)
+#define SP7021_RX_CNT_MASK GENMASK(15, 12)
+#define SP7021_TX_LEN_MASK GENMASK(23, 16)
+#define SP7021_GET_LEN_MASK GENMASK(31, 24)
+#define SP7021_SET_TX_LEN GENMASK(23, 16)
+#define SP7021_SET_XFER_LEN GENMASK(31, 24)
+
+#define SP7021_CPOL_FD BIT(0)
+#define SP7021_CPHA_R BIT(1)
+#define SP7021_CPHA_W BIT(2)
+#define SP7021_LSB_SEL BIT(4)
+#define SP7021_CS_POR BIT(5)
+#define SP7021_FD_SEL BIT(6)
+
+#define SP7021_RX_UNIT GENMASK(8, 7)
+#define SP7021_TX_UNIT GENMASK(10, 9)
+#define SP7021_TX_EMP_FLAG_MASK BIT(11)
+#define SP7021_RX_FULL_FLAG_MASK BIT(14)
+#define SP7021_FINISH_FLAG_MASK BIT(15)
+#define SP7021_CLEAN_RW_BYTE GENMASK(10, 7)
+#define SP7021_CLEAN_FLUG_MASK GENMASK(15, 11)
+#define SP7021_CLK_MASK GENMASK(31, 16)
+
+#define SP7021_INT_BYPASS BIT(3)
+#define SP7021_CLR_MASTER_INT BIT(6)
+
+#define SP7021_SPI_DATA_SIZE (255)
+#define SP7021_FIFO_DATA_LEN (16)
+
+enum {
+ SP7021_MASTER_MODE = 0,
+ SP7021_SLAVE_MODE = 1,
+};
+
+struct sp7021_spi_ctlr {
+ struct device *dev;
+ struct spi_controller *ctlr;
+ void __iomem *m_base;
+ void __iomem *s_base;
+ u32 xfer_conf;
+ int mode;
+ int m_irq;
+ int s_irq;
+ struct clk *spi_clk;
+ struct reset_control *rstc;
+ // irq spin lock
+ spinlock_t lock;
+ // data xfer lock
+ struct mutex buf_lock;
+ struct completion isr_done;
+ struct completion slave_isr;
+ unsigned int rx_cur_len;
+ unsigned int tx_cur_len;
+ unsigned int data_unit;
+ const u8 *tx_buf;
+ u8 *rx_buf;
+};
+
+static irqreturn_t sp7021_spi_slave_irq(int irq, void *dev)
+{
+ struct sp7021_spi_ctlr *pspim = dev;
+ unsigned int data_status;
+
+ data_status = readl(pspim->s_base + SP7021_DATA_RDY_REG);
+ data_status |= SP7021_SLAVE_CLR_INT;
+ writel(data_status , pspim->s_base + SP7021_DATA_RDY_REG);
+ complete(&pspim->slave_isr);
+ return IRQ_HANDLED;
+}
+
+static int sp7021_spi_slave_abort(struct spi_controller *ctlr)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ complete(&pspim->slave_isr);
+ complete(&pspim->isr_done);
+ return 0;
+}
+
+static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
+{
+ struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
+ u32 value;
+
+ reinit_completion(&pspim->slave_isr);
+ value = SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3);
+ writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+ writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
+ writel(xfer->tx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
+ value = readl(pspim->s_base + SP7021_DATA_RDY_REG);
+ value |= SP7021_SLAVE_DATA_RDY;
+ writel(value, pspim->s_base + SP7021_DATA_RDY_REG);
+ if (wait_for_completion_interruptible(&pspim->isr_done)) {
+ dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__);
+ return -EINTR;
+ }
+ return 0;
+}
+
+static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
+{
+ struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
+ u32 value;
+
+ reinit_completion(&pspim->isr_done);
+ value = SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3);
+ writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+ writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
+ writel(xfer->rx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
+ if (wait_for_completion_interruptible(&pspim->isr_done)) {
+ dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__);
+ return -EINTR;
+ }
+ writel(SP7021_SLAVE_SW_RST, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+ return 0;
+}
+
+static void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ pspim->rx_buf[pspim->rx_cur_len] =
+ readl(pspim->m_base + SP7021_FIFO_REG);
+ pspim->rx_cur_len++;
+ }
+}
+
+static void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ writel(pspim->tx_buf[pspim->tx_cur_len],
+ pspim->m_base + SP7021_FIFO_REG);
+ pspim->tx_cur_len++;
+ }
+}
+
+static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
+{
+ struct sp7021_spi_ctlr *pspim = dev;
+ unsigned int tx_cnt, total_len;
+ unsigned int tx_len, rx_cnt;
+ unsigned int fd_status;
+ bool isrdone = false;
+ u32 value;
+
+ fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+ tx_cnt = FIELD_GET(SP7021_TX_CNT_MASK, fd_status);
+ tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status);
+ total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+
+ if ((fd_status & SP7021_TX_EMP_FLAG) && (fd_status & SP7021_RX_EMP_FLAG) && total_len == 0)
+ return IRQ_NONE;
+
+ if (tx_len == 0 && total_len == 0)
+ return IRQ_NONE;
+
+ spin_lock_irq(&pspim->lock);
+
+ rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
+ if (fd_status & SP7021_RX_FULL_FLAG)
+ rx_cnt = pspim->data_unit;
+
+ tx_cnt = min(tx_len - pspim->tx_cur_len, pspim->data_unit - tx_cnt);
+ dev_dbg(pspim->dev, "fd_st=0x%x rx_c:%d tx_c:%d tx_l:%d",
+ fd_status, rx_cnt, tx_cnt, tx_len);
+
+ if (rx_cnt > 0)
+ sp7021_spi_master_rb(pspim, rx_cnt);
+ if (tx_cnt > 0)
+ sp7021_spi_master_wb(pspim, tx_cnt);
+
+ fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+ tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status);
+ total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+
+ if (fd_status & SP7021_FINISH_FLAG || tx_len == pspim->tx_cur_len) {
+ while (total_len != pspim->rx_cur_len) {
+ fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+ total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+ if (fd_status & SP7021_RX_FULL_FLAG)
+ rx_cnt = pspim->data_unit;
+ else
+ rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
+
+ if (rx_cnt > 0)
+ sp7021_spi_master_rb(pspim, rx_cnt);
+ }
+ value = readl(pspim->m_base + SP7021_INT_BUSY_REG);
+ value |= SP7021_CLR_MASTER_INT;
+ writel(value, pspim->m_base + SP7021_INT_BUSY_REG);
+ writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG);
+ isrdone = true;
+ }
+
+ if (isrdone)
+ complete(&pspim->isr_done);
+ spin_unlock_irq(&pspim->lock);
+ return IRQ_HANDLED;
+}
+
+static void sp7021_prep_transfer(struct spi_controller *ctlr, struct spi_device *spi)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ pspim->tx_cur_len = 0;
+ pspim->rx_cur_len = 0;
+ pspim->data_unit = SP7021_FIFO_DATA_LEN;
+}
+
+// preliminary set CS, CPOL, CPHA and LSB
+static int sp7021_spi_controller_prepare_message(struct spi_controller *ctlr,
+ struct spi_message *msg)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+ struct spi_device *s = msg->spi;
+ u32 valus, rs = 0;
+
+ valus = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+ valus |= SP7021_FD_SW_RST;
+ writel(valus, pspim->m_base + SP7021_SPI_STATUS_REG);
+ rs |= SP7021_FD_SEL;
+ if (s->mode & SPI_CPOL)
+ rs |= SP7021_CPOL_FD;
+
+ if (s->mode & SPI_LSB_FIRST)
+ rs |= SP7021_LSB_SEL;
+
+ if (s->mode & SPI_CS_HIGH)
+ rs |= SP7021_CS_POR;
+
+ if (s->mode & SPI_CPHA)
+ rs |= SP7021_CPHA_R;
+ else
+ rs |= SP7021_CPHA_W;
+
+ rs |= FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0);
+ pspim->xfer_conf = rs;
+ if (pspim->xfer_conf & SP7021_CPOL_FD)
+ writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+ return 0;
+}
+
+static void sp7021_spi_setup_clk(struct spi_controller *ctlr, struct spi_transfer *xfer)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+ u32 clk_rate, clk_sel, div;
+
+ clk_rate = clk_get_rate(pspim->spi_clk);
+ div = max(2U, clk_rate / xfer->speed_hz);
+
+ clk_sel = (div / 2) - 1;
+ pspim->xfer_conf &= ~SP7021_CLK_MASK;
+ pspim->xfer_conf |= FIELD_PREP(SP7021_CLK_MASK, clk_sel);
+ writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+}
+
+static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+ struct spi_transfer *xfer)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+ unsigned long timeout = msecs_to_jiffies(1000);
+ unsigned int xfer_cnt, xfer_len, last_len;
+ unsigned int i, len_temp;
+ u32 reg_temp;
+
+ xfer_cnt = xfer->len / SP7021_SPI_DATA_SIZE;
+ last_len = xfer->len % SP7021_SPI_DATA_SIZE;
+
+ for (i = 0; i <= xfer_cnt; i++) {
+ mutex_lock(&pspim->buf_lock);
+ sp7021_prep_transfer(ctlr, spi);
+ sp7021_spi_setup_clk(ctlr, xfer);
+ reinit_completion(&pspim->isr_done);
+
+ if (i == xfer_cnt)
+ xfer_len = last_len;
+ else
+ xfer_len = SP7021_SPI_DATA_SIZE;
+
+ pspim->tx_buf = xfer->tx_buf + i * SP7021_SPI_DATA_SIZE;
+ pspim->rx_buf = xfer->rx_buf + i * SP7021_SPI_DATA_SIZE;
+
+ if (pspim->tx_cur_len < xfer_len) {
+ len_temp = min(pspim->data_unit, xfer_len);
+ sp7021_spi_master_wb(pspim, len_temp);
+ }
+ reg_temp = readl(pspim->m_base + SP7021_SPI_CONFIG_REG);
+ reg_temp &= ~SP7021_CLEAN_RW_BYTE;
+ reg_temp &= ~SP7021_CLEAN_FLUG_MASK;
+ reg_temp |= SP7021_FD_SEL | SP7021_FINISH_FLAG_MASK |
+ SP7021_TX_EMP_FLAG_MASK | SP7021_RX_FULL_FLAG_MASK |
+ FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0);
+ writel(reg_temp, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+ reg_temp = FIELD_PREP(SP7021_SET_TX_LEN, xfer_len) |
+ FIELD_PREP(SP7021_SET_XFER_LEN, xfer_len) |
+ SP7021_SPI_START_FD;
+ writel(reg_temp, pspim->m_base + SP7021_SPI_STATUS_REG);
+
+ if (!wait_for_completion_interruptible_timeout(&pspim->isr_done, timeout)) {
+ dev_err(&spi->dev, "wait_for_completion err\n");
+ mutex_unlock(&pspim->buf_lock);
+ return -ETIMEDOUT;
+ }
+
+ reg_temp = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+ if (reg_temp & SP7021_FINISH_FLAG) {
+ writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG);
+ writel(readl(pspim->m_base + SP7021_SPI_CONFIG_REG) &
+ SP7021_CLEAN_FLUG_MASK, pspim->m_base + SP7021_SPI_CONFIG_REG);
+ }
+
+ if (pspim->xfer_conf & SP7021_CPOL_FD)
+ writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+ mutex_unlock(&pspim->buf_lock);
+ }
+ return 0;
+}
+
+static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+ struct spi_transfer *xfer)
+{
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+ struct device *dev = pspim->dev;
+ int ret;
+
+ if (xfer->tx_buf && !xfer->rx_buf) {
+ xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf,
+ xfer->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, xfer->tx_dma))
+ return -ENOMEM;
+ ret = sp7021_spi_slave_tx(spi, xfer);
+ dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
+ } else if (xfer->rx_buf && !xfer->tx_buf) {
+ xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, xfer->rx_dma))
+ return -ENOMEM;
+ ret = sp7021_spi_slave_rx(spi, xfer);
+ dma_unmap_single(dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
+ } else {
+ dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__);
+ return -EINVAL;
+ }
+
+ spi_finalize_current_transfer(ctlr);
+ return ret;
+}
+
+static void sp7021_spi_disable_unprepare(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static void sp7021_spi_reset_control_assert(void *data)
+{
+ reset_control_assert(data);
+}
+
+static int sp7021_spi_controller_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct sp7021_spi_ctlr *pspim;
+ struct spi_controller *ctlr;
+ int mode, ret;
+
+ pdev->id = of_alias_get_id(pdev->dev.of_node, "sp_spi");
+
+ if (device_property_read_bool(dev, "spi-slave"))
+ mode = SP7021_SLAVE_MODE;
+ else
+ mode = SP7021_MASTER_MODE;
+
+ if (mode == SP7021_SLAVE_MODE)
+ ctlr = devm_spi_alloc_slave(dev, sizeof(*pspim));
+ else
+ ctlr = devm_spi_alloc_master(dev, sizeof(*pspim));
+ if (!ctlr)
+ return -ENOMEM;
+ device_set_node(&ctlr->dev, dev_fwnode(dev));
+ ctlr->bus_num = pdev->id;
+ ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+ ctlr->auto_runtime_pm = true;
+ ctlr->prepare_message = sp7021_spi_controller_prepare_message;
+ if (mode == SP7021_SLAVE_MODE) {
+ ctlr->transfer_one = sp7021_spi_slave_transfer_one;
+ ctlr->slave_abort = sp7021_spi_slave_abort;
+ ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+ } else {
+ ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+ ctlr->min_speed_hz = 40000;
+ ctlr->max_speed_hz = 25000000;
+ ctlr->use_gpio_descriptors = true;
+ ctlr->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+ ctlr->transfer_one = sp7021_spi_master_transfer_one;
+ }
+ platform_set_drvdata(pdev, ctlr);
+ pspim = spi_controller_get_devdata(ctlr);
+ pspim->mode = mode;
+ pspim->ctlr = ctlr;
+ pspim->dev = dev;
+ spin_lock_init(&pspim->lock);
+ mutex_init(&pspim->buf_lock);
+ init_completion(&pspim->isr_done);
+ init_completion(&pspim->slave_isr);
+
+ pspim->m_base = devm_platform_ioremap_resource_byname(pdev, "master");
+ if (IS_ERR(pspim->m_base))
+ return dev_err_probe(dev, PTR_ERR(pspim->m_base), "m_base get fail\n");
+
+ pspim->s_base = devm_platform_ioremap_resource_byname(pdev, "slave");
+ if (IS_ERR(pspim->s_base))
+ return dev_err_probe(dev, PTR_ERR(pspim->s_base), "s_base get fail\n");
+
+ pspim->m_irq = platform_get_irq_byname(pdev, "master_risc");
+ if (pspim->m_irq < 0)
+ return pspim->m_irq;
+
+ pspim->s_irq = platform_get_irq_byname(pdev, "slave_risc");
+ if (pspim->s_irq < 0)
+ return pspim->s_irq;
+
+ pspim->spi_clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(pspim->spi_clk))
+ return dev_err_probe(dev, PTR_ERR(pspim->spi_clk), "clk get fail\n");
+
+ pspim->rstc = devm_reset_control_get_exclusive(dev, NULL);
+ if (IS_ERR(pspim->rstc))
+ return dev_err_probe(dev, PTR_ERR(pspim->rstc), "rst get fail\n");
+
+ ret = clk_prepare_enable(pspim->spi_clk);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to enable clk\n");
+
+ ret = devm_add_action_or_reset(dev, sp7021_spi_disable_unprepare, pspim->spi_clk);
+ if (ret)
+ return ret;
+
+ ret = reset_control_deassert(pspim->rstc);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to deassert reset\n");
+
+ ret = devm_add_action_or_reset(dev, sp7021_spi_reset_control_assert, pspim->rstc);
+ if (ret)
+ return ret;
+
+ ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq,
+ IRQF_TRIGGER_RISING, pdev->name, pspim);
+ if (ret)
+ return ret;
+
+ ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq,
+ IRQF_TRIGGER_RISING, pdev->name, pspim);
+ if (ret)
+ return ret;
+
+ pm_runtime_enable(dev);
+ ret = spi_register_controller(ctlr);
+ if (ret) {
+ pm_runtime_disable(dev);
+ return dev_err_probe(dev, ret, "spi_register_master fail\n");
+ }
+ return 0;
+}
+
+static int sp7021_spi_controller_remove(struct platform_device *pdev)
+{
+ struct spi_controller *ctlr = dev_get_drvdata(&pdev->dev);
+
+ spi_unregister_controller(ctlr);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+ return 0;
+}
+
+static int __maybe_unused sp7021_spi_controller_suspend(struct device *dev)
+{
+ struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ return reset_control_assert(pspim->rstc);
+}
+
+static int __maybe_unused sp7021_spi_controller_resume(struct device *dev)
+{
+ struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ reset_control_deassert(pspim->rstc);
+ return clk_prepare_enable(pspim->spi_clk);
+}
+
+#ifdef CONFIG_PM
+static int sp7021_spi_runtime_suspend(struct device *dev)
+{
+ struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ return reset_control_assert(pspim->rstc);
+}
+
+static int sp7021_spi_runtime_resume(struct device *dev)
+{
+ struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+ return reset_control_deassert(pspim->rstc);
+}
+#endif
+
+static const struct dev_pm_ops sp7021_spi_pm_ops = {
+ SET_RUNTIME_PM_OPS(sp7021_spi_runtime_suspend,
+ sp7021_spi_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(sp7021_spi_controller_suspend,
+ sp7021_spi_controller_resume)
+};
+
+static const struct of_device_id sp7021_spi_controller_ids[] = {
+ { .compatible = "sunplus,sp7021-spi" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, sp7021_spi_controller_ids);
+
+static struct platform_driver sp7021_spi_controller_driver = {
+ .probe = sp7021_spi_controller_probe,
+ .remove = sp7021_spi_controller_remove,
+ .driver = {
+ .name = "sunplus,sp7021-spi-controller",
+ .of_match_table = sp7021_spi_controller_ids,
+ .pm = &sp7021_spi_pm_ops,
+ },
+};
+module_platform_driver(sp7021_spi_controller_driver);
+
+MODULE_AUTHOR("Li-hao Kuo <lhjeff911@gmail.com>");
+MODULE_DESCRIPTION("Sunplus SPI controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index e9de1d958bbd..8f345247a8c3 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev)
tspi->phys = r->start;
spi_irq = platform_get_irq(pdev, 0);
+ if (spi_irq < 0) {
+ ret = spi_irq;
+ goto exit_free_master;
+ }
tspi->irq = spi_irq;
tspi->clk = devm_clk_get(&pdev->dev, "spi");
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 2a03739a0c60..80c3787deea9 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1006,14 +1006,8 @@ static int tegra_slink_probe(struct platform_device *pdev)
struct resource *r;
int ret, spi_irq;
const struct tegra_slink_chip_data *cdata = NULL;
- const struct of_device_id *match;
- match = of_match_device(tegra_slink_of_match, &pdev->dev);
- if (!match) {
- dev_err(&pdev->dev, "Error: No device match found\n");
- return -ENODEV;
- }
- cdata = match->data;
+ cdata = of_device_get_match_data(&pdev->dev);
master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
if (!master) {
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index ce1bdb4767ea..66f647f32876 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -21,6 +21,8 @@
#include <linux/of_device.h>
#include <linux/reset.h>
#include <linux/spi/spi.h>
+#include <linux/acpi.h>
+#include <linux/property.h>
#define QSPI_COMMAND1 0x000
#define QSPI_BIT_LENGTH(x) (((x) & 0x1f) << 0)
@@ -119,11 +121,40 @@
#define QSPI_NUM_DUMMY_CYCLE(x) (((x) & 0xff) << 0)
#define QSPI_DUMMY_CYCLES_MAX 0xff
+#define QSPI_CMB_SEQ_CMD 0x19c
+#define QSPI_COMMAND_VALUE_SET(X) (((x) & 0xFF) << 0)
+
+#define QSPI_CMB_SEQ_CMD_CFG 0x1a0
+#define QSPI_COMMAND_X1_X2_X4(x) (((x) & 0x3) << 13)
+#define QSPI_COMMAND_X1_X2_X4_MASK (0x03 << 13)
+#define QSPI_COMMAND_SDR_DDR BIT(12)
+#define QSPI_COMMAND_SIZE_SET(x) (((x) & 0xFF) << 0)
+
+#define QSPI_GLOBAL_CONFIG 0X1a4
+#define QSPI_CMB_SEQ_EN BIT(0)
+
+#define QSPI_CMB_SEQ_ADDR 0x1a8
+#define QSPI_ADDRESS_VALUE_SET(X) (((x) & 0xFFFF) << 0)
+
+#define QSPI_CMB_SEQ_ADDR_CFG 0x1ac
+#define QSPI_ADDRESS_X1_X2_X4(x) (((x) & 0x3) << 13)
+#define QSPI_ADDRESS_X1_X2_X4_MASK (0x03 << 13)
+#define QSPI_ADDRESS_SDR_DDR BIT(12)
+#define QSPI_ADDRESS_SIZE_SET(x) (((x) & 0xFF) << 0)
+
#define DATA_DIR_TX BIT(0)
#define DATA_DIR_RX BIT(1)
#define QSPI_DMA_TIMEOUT (msecs_to_jiffies(1000))
#define DEFAULT_QSPI_DMA_BUF_LEN (64 * 1024)
+#define CMD_TRANSFER 0
+#define ADDR_TRANSFER 1
+#define DATA_TRANSFER 2
+
+struct tegra_qspi_soc_data {
+ bool has_dma;
+ bool cmb_xfer_capable;
+};
struct tegra_qspi_client_data {
int tx_clk_tap_delay;
@@ -137,7 +168,6 @@ struct tegra_qspi {
spinlock_t lock;
struct clk *clk;
- struct reset_control *rst;
void __iomem *base;
phys_addr_t phys;
unsigned int irq;
@@ -185,6 +215,7 @@ struct tegra_qspi {
u32 *tx_dma_buf;
dma_addr_t tx_dma_phys;
struct dma_async_tx_descriptor *tx_dma_desc;
+ const struct tegra_qspi_soc_data *soc_data;
};
static inline u32 tegra_qspi_readl(struct tegra_qspi *tqspi, unsigned long offset)
@@ -767,7 +798,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran
u32 tx_tap = 0, rx_tap = 0;
int req_mode;
- if (speed != tqspi->cur_speed) {
+ if (!has_acpi_companion(tqspi->dev) && speed != tqspi->cur_speed) {
clk_set_rate(tqspi->clk, speed);
tqspi->cur_speed = speed;
}
@@ -875,16 +906,16 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi,
static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi)
{
struct tegra_qspi_client_data *cdata;
- struct device_node *slave_np = spi->dev.of_node;
cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL);
if (!cdata)
return NULL;
- of_property_read_u32(slave_np, "nvidia,tx-clk-tap-delay",
- &cdata->tx_clk_tap_delay);
- of_property_read_u32(slave_np, "nvidia,rx-clk-tap-delay",
- &cdata->rx_clk_tap_delay);
+ device_property_read_u32(&spi->dev, "nvidia,tx-clk-tap-delay",
+ &cdata->tx_clk_tap_delay);
+ device_property_read_u32(&spi->dev, "nvidia,rx-clk-tap-delay",
+ &cdata->rx_clk_tap_delay);
+
return cdata;
}
@@ -906,7 +937,6 @@ static int tegra_qspi_setup(struct spi_device *spi)
cdata = tegra_qspi_parse_cdata_dt(spi);
spi->controller_data = cdata;
}
-
spin_lock_irqsave(&tqspi->lock, flags);
/* keep default cs state to inactive */
@@ -948,9 +978,8 @@ static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
tegra_qspi_dump_regs(tqspi);
tegra_qspi_flush_fifos(tqspi, true);
- reset_control_assert(tqspi->rst);
- udelay(2);
- reset_control_deassert(tqspi->rst);
+ if (device_reset(tqspi->dev) < 0)
+ dev_warn_once(tqspi->dev, "device reset failed\n");
}
static void tegra_qspi_transfer_end(struct spi_device *spi)
@@ -966,19 +995,179 @@ static void tegra_qspi_transfer_end(struct spi_device *spi)
tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
}
-static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi_message *msg)
+static u32 tegra_qspi_cmd_config(bool is_ddr, u8 bus_width, u8 len)
+{
+ u32 cmd_config = 0;
+
+ /* Extract Command configuration and value */
+ if (is_ddr)
+ cmd_config |= QSPI_COMMAND_SDR_DDR;
+ else
+ cmd_config &= ~QSPI_COMMAND_SDR_DDR;
+
+ cmd_config |= QSPI_COMMAND_X1_X2_X4(bus_width);
+ cmd_config |= QSPI_COMMAND_SIZE_SET((len * 8) - 1);
+
+ return cmd_config;
+}
+
+static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
+{
+ u32 addr_config = 0;
+
+ /* Extract Address configuration and value */
+ is_ddr = 0; //Only SDR mode supported
+ bus_width = 0; //X1 mode
+
+ if (is_ddr)
+ addr_config |= QSPI_ADDRESS_SDR_DDR;
+ else
+ addr_config &= ~QSPI_ADDRESS_SDR_DDR;
+
+ addr_config |= QSPI_ADDRESS_X1_X2_X4(bus_width);
+ addr_config |= QSPI_ADDRESS_SIZE_SET((len * 8) - 1);
+
+ return addr_config;
+}
+
+static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
+ struct spi_message *msg)
+{
+ bool is_first_msg = true;
+ struct spi_transfer *xfer;
+ struct spi_device *spi = msg->spi;
+ u8 transfer_phase = 0;
+ u32 cmd1 = 0, dma_ctl = 0;
+ int ret = 0;
+ u32 address_value = 0;
+ u32 cmd_config = 0, addr_config = 0;
+ u8 cmd_value = 0, val = 0;
+
+ /* Enable Combined sequence mode */
+ val = tegra_qspi_readl(tqspi, QSPI_GLOBAL_CONFIG);
+ val |= QSPI_CMB_SEQ_EN;
+ tegra_qspi_writel(tqspi, val, QSPI_GLOBAL_CONFIG);
+ /* Process individual transfer list */
+ list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+ switch (transfer_phase) {
+ case CMD_TRANSFER:
+ /* X1 SDR mode */
+ cmd_config = tegra_qspi_cmd_config(false, 0,
+ xfer->len);
+ cmd_value = *((const u8 *)(xfer->tx_buf));
+ break;
+ case ADDR_TRANSFER:
+ /* X1 SDR mode */
+ addr_config = tegra_qspi_addr_config(false, 0,
+ xfer->len);
+ address_value = *((const u32 *)(xfer->tx_buf));
+ break;
+ case DATA_TRANSFER:
+ /* Program Command, Address value in register */
+ tegra_qspi_writel(tqspi, cmd_value, QSPI_CMB_SEQ_CMD);
+ tegra_qspi_writel(tqspi, address_value,
+ QSPI_CMB_SEQ_ADDR);
+ /* Program Command and Address config in register */
+ tegra_qspi_writel(tqspi, cmd_config,
+ QSPI_CMB_SEQ_CMD_CFG);
+ tegra_qspi_writel(tqspi, addr_config,
+ QSPI_CMB_SEQ_ADDR_CFG);
+
+ reinit_completion(&tqspi->xfer_completion);
+ cmd1 = tegra_qspi_setup_transfer_one(spi, xfer,
+ is_first_msg);
+ ret = tegra_qspi_start_transfer_one(spi, xfer,
+ cmd1);
+
+ if (ret < 0) {
+ dev_err(tqspi->dev, "Failed to start transfer-one: %d\n",
+ ret);
+ return ret;
+ }
+
+ is_first_msg = false;
+ ret = wait_for_completion_timeout
+ (&tqspi->xfer_completion,
+ QSPI_DMA_TIMEOUT);
+
+ if (WARN_ON(ret == 0)) {
+ dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n",
+ ret);
+ if (tqspi->is_curr_dma_xfer &&
+ (tqspi->cur_direction & DATA_DIR_TX))
+ dmaengine_terminate_all
+ (tqspi->tx_dma_chan);
+
+ if (tqspi->is_curr_dma_xfer &&
+ (tqspi->cur_direction & DATA_DIR_RX))
+ dmaengine_terminate_all
+ (tqspi->rx_dma_chan);
+
+ /* Abort transfer by resetting pio/dma bit */
+ if (!tqspi->is_curr_dma_xfer) {
+ cmd1 = tegra_qspi_readl
+ (tqspi,
+ QSPI_COMMAND1);
+ cmd1 &= ~QSPI_PIO;
+ tegra_qspi_writel
+ (tqspi, cmd1,
+ QSPI_COMMAND1);
+ } else {
+ dma_ctl = tegra_qspi_readl
+ (tqspi,
+ QSPI_DMA_CTL);
+ dma_ctl &= ~QSPI_DMA_EN;
+ tegra_qspi_writel(tqspi, dma_ctl,
+ QSPI_DMA_CTL);
+ }
+
+ /* Reset controller if timeout happens */
+ if (device_reset(tqspi->dev) < 0)
+ dev_warn_once(tqspi->dev,
+ "device reset failed\n");
+ ret = -EIO;
+ goto exit;
+ }
+
+ if (tqspi->tx_status || tqspi->rx_status) {
+ dev_err(tqspi->dev, "QSPI Transfer failed\n");
+ tqspi->tx_status = 0;
+ tqspi->rx_status = 0;
+ ret = -EIO;
+ goto exit;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ goto exit;
+ }
+ msg->actual_length += xfer->len;
+ transfer_phase++;
+ }
+
+exit:
+ msg->status = ret;
+
+ return ret;
+}
+
+static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
+ struct spi_message *msg)
{
- struct tegra_qspi *tqspi = spi_master_get_devdata(master);
struct spi_device *spi = msg->spi;
struct spi_transfer *transfer;
bool is_first_msg = true;
- int ret;
+ int ret = 0, val = 0;
msg->status = 0;
msg->actual_length = 0;
tqspi->tx_status = 0;
tqspi->rx_status = 0;
+ /* Disable Combined sequence mode */
+ val = tegra_qspi_readl(tqspi, QSPI_GLOBAL_CONFIG);
+ val &= ~QSPI_CMB_SEQ_EN;
+ tegra_qspi_writel(tqspi, val, QSPI_GLOBAL_CONFIG);
list_for_each_entry(transfer, &msg->transfers, transfer_list) {
struct spi_transfer *xfer = transfer;
u8 dummy_bytes = 0;
@@ -1016,7 +1205,6 @@ static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi
goto complete_xfer;
}
- is_first_msg = false;
ret = wait_for_completion_timeout(&tqspi->xfer_completion,
QSPI_DMA_TIMEOUT);
if (WARN_ON(ret == 0)) {
@@ -1061,7 +1249,48 @@ complete_xfer:
ret = 0;
exit:
msg->status = ret;
+
+ return ret;
+}
+
+static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
+ struct spi_message *msg)
+{
+ int transfer_count = 0;
+ struct spi_transfer *xfer;
+
+ list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+ transfer_count++;
+ }
+ if (!tqspi->soc_data->cmb_xfer_capable || transfer_count != 3)
+ return false;
+ xfer = list_first_entry(&msg->transfers, typeof(*xfer),
+ transfer_list);
+ if (xfer->len > 2)
+ return false;
+ xfer = list_next_entry(xfer, transfer_list);
+ if (xfer->len > 4 || xfer->len < 3)
+ return false;
+ xfer = list_next_entry(xfer, transfer_list);
+ if (!tqspi->soc_data->has_dma || xfer->len > (QSPI_FIFO_DEPTH << 2))
+ return false;
+
+ return true;
+}
+
+static int tegra_qspi_transfer_one_message(struct spi_master *master,
+ struct spi_message *msg)
+{
+ struct tegra_qspi *tqspi = spi_master_get_devdata(master);
+ int ret;
+
+ if (tegra_qspi_validate_cmb_seq(tqspi, msg))
+ ret = tegra_qspi_combined_seq_xfer(tqspi, msg);
+ else
+ ret = tegra_qspi_non_combined_seq_xfer(tqspi, msg);
+
spi_finalize_current_message(master);
+
return ret;
}
@@ -1193,15 +1422,58 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
return handle_dma_based_xfer(tqspi);
}
+static struct tegra_qspi_soc_data tegra210_qspi_soc_data = {
+ .has_dma = true,
+ .cmb_xfer_capable = false,
+};
+
+static struct tegra_qspi_soc_data tegra186_qspi_soc_data = {
+ .has_dma = true,
+ .cmb_xfer_capable = true,
+};
+
+static struct tegra_qspi_soc_data tegra234_qspi_soc_data = {
+ .has_dma = false,
+ .cmb_xfer_capable = true,
+};
+
static const struct of_device_id tegra_qspi_of_match[] = {
- { .compatible = "nvidia,tegra210-qspi", },
- { .compatible = "nvidia,tegra186-qspi", },
- { .compatible = "nvidia,tegra194-qspi", },
+ {
+ .compatible = "nvidia,tegra210-qspi",
+ .data = &tegra210_qspi_soc_data,
+ }, {
+ .compatible = "nvidia,tegra186-qspi",
+ .data = &tegra186_qspi_soc_data,
+ }, {
+ .compatible = "nvidia,tegra194-qspi",
+ .data = &tegra186_qspi_soc_data,
+ }, {
+ .compatible = "nvidia,tegra234-qspi",
+ .data = &tegra234_qspi_soc_data,
+ },
{}
};
MODULE_DEVICE_TABLE(of, tegra_qspi_of_match);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id tegra_qspi_acpi_match[] = {
+ {
+ .id = "NVDA1213",
+ .driver_data = (kernel_ulong_t)&tegra210_qspi_soc_data,
+ }, {
+ .id = "NVDA1313",
+ .driver_data = (kernel_ulong_t)&tegra186_qspi_soc_data,
+ }, {
+ .id = "NVDA1413",
+ .driver_data = (kernel_ulong_t)&tegra234_qspi_soc_data,
+ },
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, tegra_qspi_acpi_match);
+#endif
+
static int tegra_qspi_probe(struct platform_device *pdev)
{
struct spi_master *master;
@@ -1233,6 +1505,7 @@ static int tegra_qspi_probe(struct platform_device *pdev)
tqspi->dev = &pdev->dev;
spin_lock_init(&tqspi->lock);
+ tqspi->soc_data = device_get_match_data(&pdev->dev);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
tqspi->base = devm_ioremap_resource(&pdev->dev, r);
if (IS_ERR(tqspi->base))
@@ -1240,20 +1513,18 @@ static int tegra_qspi_probe(struct platform_device *pdev)
tqspi->phys = r->start;
qspi_irq = platform_get_irq(pdev, 0);
+ if (qspi_irq < 0)
+ return qspi_irq;
tqspi->irq = qspi_irq;
- tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
- if (IS_ERR(tqspi->clk)) {
- ret = PTR_ERR(tqspi->clk);
- dev_err(&pdev->dev, "failed to get clock: %d\n", ret);
- return ret;
- }
+ if (!has_acpi_companion(tqspi->dev)) {
+ tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
+ if (IS_ERR(tqspi->clk)) {
+ ret = PTR_ERR(tqspi->clk);
+ dev_err(&pdev->dev, "failed to get clock: %d\n", ret);
+ return ret;
+ }
- tqspi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
- if (IS_ERR(tqspi->rst)) {
- ret = PTR_ERR(tqspi->rst);
- dev_err(&pdev->dev, "failed to get reset control: %d\n", ret);
- return ret;
}
tqspi->max_buf_size = QSPI_FIFO_DEPTH << 2;
@@ -1277,9 +1548,8 @@ static int tegra_qspi_probe(struct platform_device *pdev)
goto exit_pm_disable;
}
- reset_control_assert(tqspi->rst);
- udelay(2);
- reset_control_deassert(tqspi->rst);
+ if (device_reset(tqspi->dev) < 0)
+ dev_warn_once(tqspi->dev, "device reset failed\n");
tqspi->def_command1_reg = QSPI_M_S | QSPI_CS_SW_HW | QSPI_CS_SW_VAL;
tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
@@ -1358,6 +1628,9 @@ static int __maybe_unused tegra_qspi_runtime_suspend(struct device *dev)
struct spi_master *master = dev_get_drvdata(dev);
struct tegra_qspi *tqspi = spi_master_get_devdata(master);
+ /* Runtime pm disabled with ACPI */
+ if (has_acpi_companion(tqspi->dev))
+ return 0;
/* flush all write which are in PPSB queue by reading back */
tegra_qspi_readl(tqspi, QSPI_COMMAND1);
@@ -1372,6 +1645,9 @@ static int __maybe_unused tegra_qspi_runtime_resume(struct device *dev)
struct tegra_qspi *tqspi = spi_master_get_devdata(master);
int ret;
+ /* Runtime pm disabled with ACPI */
+ if (has_acpi_companion(tqspi->dev))
+ return 0;
ret = clk_prepare_enable(tqspi->clk);
if (ret < 0)
dev_err(tqspi->dev, "failed to enable clock: %d\n", ret);
@@ -1389,6 +1665,7 @@ static struct platform_driver tegra_qspi_driver = {
.name = "tegra-qspi",
.pm = &tegra_qspi_pm_ops,
.of_match_table = tegra_qspi_of_match,
+ .acpi_match_table = ACPI_PTR(tegra_qspi_acpi_match),
},
.probe = tegra_qspi_probe,
.remove = tegra_qspi_remove,
diff --git a/drivers/spi/spi-tle62x0.c b/drivers/spi/spi-tle62x0.c
index f8ad0709d015..a565352f6381 100644
--- a/drivers/spi/spi-tle62x0.c
+++ b/drivers/spi/spi-tle62x0.c
@@ -288,7 +288,7 @@ static int tle62x0_probe(struct spi_device *spi)
return ret;
}
-static int tle62x0_remove(struct spi_device *spi)
+static void tle62x0_remove(struct spi_device *spi)
{
struct tle62x0_state *st = spi_get_drvdata(spi);
int ptr;
@@ -298,7 +298,6 @@ static int tle62x0_remove(struct spi_device *spi)
device_remove_file(&spi->dev, &dev_attr_status_show);
kfree(st);
- return 0;
}
static struct spi_driver tle62x0_driver = {
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 8c4615b76339..dfaa1d79a78b 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -103,6 +103,7 @@
static int use_dma = 1;
struct pch_spi_dma_ctrl {
+ struct pci_dev *dma_dev;
struct dma_async_tx_descriptor *desc_tx;
struct dma_async_tx_descriptor *desc_rx;
struct pch_dma_slave param_tx;
@@ -876,8 +877,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
if (!chan) {
dev_err(&data->master->dev,
"ERROR: dma_request_channel FAILS(Tx)\n");
- data->use_dma = 0;
- return;
+ goto out;
}
dma->chan_tx = chan;
@@ -893,10 +893,15 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
"ERROR: dma_request_channel FAILS(Rx)\n");
dma_release_channel(dma->chan_tx);
dma->chan_tx = NULL;
- data->use_dma = 0;
- return;
+ goto out;
}
dma->chan_rx = chan;
+
+ dma->dma_dev = dma_dev;
+ return;
+out:
+ pci_dev_put(dma_dev);
+ data->use_dma = 0;
}
static void pch_spi_release_dma(struct pch_spi_data *data)
@@ -912,6 +917,8 @@ static void pch_spi_release_dma(struct pch_spi_data *data)
dma_release_channel(dma->chan_rx);
dma->chan_rx = NULL;
}
+
+ pci_dev_put(dma->dma_dev);
}
static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 342ee8d2c476..cc0da4822231 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -726,7 +726,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n",
ret);
- goto out_disable_clk;
+ goto out_release_dma;
}
dma_tx_burst = caps.max_burst;
}
@@ -735,7 +735,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
if (IS_ERR_OR_NULL(master->dma_rx)) {
if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
ret = -EPROBE_DEFER;
- goto out_disable_clk;
+ goto out_release_dma;
}
master->dma_rx = NULL;
dma_rx_burst = INT_MAX;
@@ -744,7 +744,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n",
ret);
- goto out_disable_clk;
+ goto out_release_dma;
}
dma_rx_burst = caps.max_burst;
}
@@ -753,10 +753,20 @@ static int uniphier_spi_probe(struct platform_device *pdev)
ret = devm_spi_register_master(&pdev->dev, master);
if (ret)
- goto out_disable_clk;
+ goto out_release_dma;
return 0;
+out_release_dma:
+ if (!IS_ERR_OR_NULL(master->dma_rx)) {
+ dma_release_channel(master->dma_rx);
+ master->dma_rx = NULL;
+ }
+ if (!IS_ERR_OR_NULL(master->dma_tx)) {
+ dma_release_channel(master->dma_tx);
+ master->dma_tx = NULL;
+ }
+
out_disable_clk:
clk_disable_unprepare(priv->clk);
diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index cfa222c9bd5e..78f31b61a2aa 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -570,6 +570,9 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
if (op->dummy.nbytes) {
tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
+
memset(tmpbuf, 0xff, op->dummy.nbytes);
reinit_completion(&xqspi->data_completion);
xqspi->txbuf = tmpbuf;
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 328b6559bb19..2b5afae8ff7f 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -1172,7 +1172,10 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
goto clk_dis_all;
}
- dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+ if (ret)
+ goto clk_dis_all;
+
ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS;
ctlr->mem_ops = &zynqmp_qspi_mem_ops;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4599b121d744..c4dd1200fe99 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -18,7 +18,6 @@
#include <linux/mod_devicetable.h>
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>
-#include <linux/of_gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/pm_runtime.h>
#include <linux/pm_domain.h>
@@ -144,7 +143,7 @@ static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \
unsigned long flags; \
ssize_t len; \
spin_lock_irqsave(&stat->lock, flags); \
- len = sprintf(buf, format_string, stat->field); \
+ len = sysfs_emit(buf, format_string "\n", stat->field); \
spin_unlock_irqrestore(&stat->lock, flags); \
return len; \
} \
@@ -404,15 +403,8 @@ static void spi_remove(struct device *dev)
{
const struct spi_driver *sdrv = to_spi_driver(dev->driver);
- if (sdrv->remove) {
- int ret;
-
- ret = sdrv->remove(to_spi_device(dev));
- if (ret)
- dev_warn(dev,
- "Failed to unbind driver (%pe), ignoring\n",
- ERR_PTR(ret));
- }
+ if (sdrv->remove)
+ sdrv->remove(to_spi_device(dev));
dev_pm_domain_detach(dev, true);
}
@@ -532,7 +524,7 @@ static DEFINE_MUTEX(board_lock);
*
* Return: a pointer to the new device, or NULL.
*/
-static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
+struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
{
struct spi_device *spi;
@@ -549,7 +541,6 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
spi->dev.parent = &ctlr->dev;
spi->dev.bus = &spi_bus_type;
spi->dev.release = spidev_release;
- spi->cs_gpio = -ENOENT;
spi->mode = ctlr->buswidth_override_bits;
spin_lock_init(&spi->statistics.lock);
@@ -557,6 +548,7 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
device_initialize(&spi->dev);
return spi;
}
+EXPORT_SYMBOL_GPL(spi_alloc_device);
static void spi_dev_set_name(struct spi_device *spi)
{
@@ -612,11 +604,8 @@ static int __spi_add_device(struct spi_device *spi)
return -ENODEV;
}
- /* Descriptors take precedence */
if (ctlr->cs_gpiods)
spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select];
- else if (ctlr->cs_gpios)
- spi->cs_gpio = ctlr->cs_gpios[spi->chip_select];
/*
* Drivers may modify this initial i/o setup, but will
@@ -652,7 +641,7 @@ static int __spi_add_device(struct spi_device *spi)
*
* Return: 0 on success; negative errno on failure
*/
-static int spi_add_device(struct spi_device *spi)
+int spi_add_device(struct spi_device *spi)
{
struct spi_controller *ctlr = spi->controller;
struct device *dev = ctlr->dev.parent;
@@ -673,6 +662,7 @@ static int spi_add_device(struct spi_device *spi)
mutex_unlock(&ctlr->add_lock);
return status;
}
+EXPORT_SYMBOL_GPL(spi_add_device);
static int spi_add_device_locked(struct spi_device *spi)
{
@@ -936,48 +926,40 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
* Avoid calling into the driver (or doing delays) if the chip select
* isn't actually changing from the last time this was called.
*/
- if (!force && (spi->controller->last_cs_enable == enable) &&
+ if (!force && ((enable && spi->controller->last_cs == spi->chip_select) ||
+ (!enable && spi->controller->last_cs != spi->chip_select)) &&
(spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
return;
trace_spi_set_cs(spi, activate);
- spi->controller->last_cs_enable = enable;
+ spi->controller->last_cs = enable ? spi->chip_select : -1;
spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
- if ((spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
- !spi->controller->set_cs_timing) && !activate) {
+ if ((spi->cs_gpiod || !spi->controller->set_cs_timing) && !activate) {
spi_delay_exec(&spi->cs_hold, NULL);
}
if (spi->mode & SPI_CS_HIGH)
enable = !enable;
- if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
+ if (spi->cs_gpiod) {
if (!(spi->mode & SPI_NO_CS)) {
- if (spi->cs_gpiod) {
- /*
- * Historically ACPI has no means of the GPIO polarity and
- * thus the SPISerialBus() resource defines it on the per-chip
- * basis. In order to avoid a chain of negations, the GPIO
- * polarity is considered being Active High. Even for the cases
- * when _DSD() is involved (in the updated versions of ACPI)
- * the GPIO CS polarity must be defined Active High to avoid
- * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
- * into account.
- */
- if (has_acpi_companion(&spi->dev))
- gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
- else
- /* Polarity handled by GPIO library */
- gpiod_set_value_cansleep(spi->cs_gpiod, activate);
- } else {
- /*
- * Invert the enable line, as active low is
- * default for SPI.
- */
- gpio_set_value_cansleep(spi->cs_gpio, !enable);
- }
+ /*
+ * Historically ACPI has no means of the GPIO polarity and
+ * thus the SPISerialBus() resource defines it on the per-chip
+ * basis. In order to avoid a chain of negations, the GPIO
+ * polarity is considered being Active High. Even for the cases
+ * when _DSD() is involved (in the updated versions of ACPI)
+ * the GPIO CS polarity must be defined Active High to avoid
+ * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
+ * into account.
+ */
+ if (has_acpi_companion(&spi->dev))
+ gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
+ else
+ /* Polarity handled by GPIO library */
+ gpiod_set_value_cansleep(spi->cs_gpiod, activate);
}
/* Some SPI masters need both GPIO CS & slave_select */
if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
@@ -987,8 +969,7 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
spi->controller->set_cs(spi, !enable);
}
- if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
- !spi->controller->set_cs_timing) {
+ if (spi->cs_gpiod || !spi->controller->set_cs_timing) {
if (activate)
spi_delay_exec(&spi->cs_setup, NULL);
else
@@ -1019,10 +1000,10 @@ int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
int i, ret;
if (vmalloced_buf || kmap_buf) {
- desc_len = min_t(int, max_seg_size, PAGE_SIZE);
+ desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE);
sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
} else if (virt_addr_valid(buf)) {
- desc_len = min_t(int, max_seg_size, ctlr->max_dma_len);
+ desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len);
sgs = DIV_ROUND_UP(len, desc_len);
} else {
return -EINVAL;
@@ -2318,8 +2299,50 @@ struct acpi_spi_lookup {
int irq;
u8 bits_per_word;
u8 chip_select;
+ int n;
+ int index;
};
+static int acpi_spi_count(struct acpi_resource *ares, void *data)
+{
+ struct acpi_resource_spi_serialbus *sb;
+ int *count = data;
+
+ if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ return 1;
+
+ sb = &ares->data.spi_serial_bus;
+ if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI)
+ return 1;
+
+ *count = *count + 1;
+
+ return 1;
+}
+
+/**
+ * acpi_spi_count_resources - Count the number of SpiSerialBus resources
+ * @adev: ACPI device
+ *
+ * Returns the number of SpiSerialBus resources in the ACPI-device's
+ * resource-list; or a negative error code.
+ */
+int acpi_spi_count_resources(struct acpi_device *adev)
+{
+ LIST_HEAD(r);
+ int count = 0;
+ int ret;
+
+ ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count);
+ if (ret < 0)
+ return ret;
+
+ acpi_dev_free_resource_list(&r);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_count_resources);
+
static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
struct acpi_spi_lookup *lookup)
{
@@ -2349,6 +2372,8 @@ static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
lookup->mode |= SPI_CPHA;
}
+static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
+
static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
{
struct acpi_spi_lookup *lookup = data;
@@ -2362,14 +2387,35 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
sb = &ares->data.spi_serial_bus;
if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {
+ if (lookup->index != -1 && lookup->n++ != lookup->index)
+ return 1;
+
+ if (lookup->index == -1 && !ctlr)
+ return -ENODEV;
+
status = acpi_get_handle(NULL,
sb->resource_source.string_ptr,
&parent_handle);
- if (ACPI_FAILURE(status) ||
- ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+ if (ACPI_FAILURE(status))
return -ENODEV;
+ if (ctlr) {
+ if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+ return -ENODEV;
+ } else {
+ struct acpi_device *adev;
+
+ if (acpi_bus_get_device(parent_handle, &adev))
+ return -ENODEV;
+
+ ctlr = acpi_spi_find_controller_by_adev(adev);
+ if (!ctlr)
+ return -ENODEV;
+
+ lookup->ctlr = ctlr;
+ }
+
/*
* ACPI DeviceSelection numbering is handled by the
* host controller driver in Windows and can vary
@@ -2408,8 +2454,25 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
return 1;
}
-static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
- struct acpi_device *adev)
+/**
+ * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
+ * @ctlr: controller to which the spi device belongs
+ * @adev: ACPI Device for the spi device
+ * @index: Index of the spi resource inside the ACPI Node
+ *
+ * This should be used to allocate a new spi device from and ACPI Node.
+ * The caller is responsible for calling spi_add_device to register the spi device.
+ *
+ * If ctlr is set to NULL, the Controller for the spi device will be looked up
+ * using the resource.
+ * If index is set to -1, index is not used.
+ * Note: If index is -1, ctlr must be set.
+ *
+ * Return: a pointer to the new device, or ERR_PTR on error.
+ */
+struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+ struct acpi_device *adev,
+ int index)
{
acpi_handle parent_handle = NULL;
struct list_head resource_list;
@@ -2417,12 +2480,13 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
struct spi_device *spi;
int ret;
- if (acpi_bus_get_status(adev) || !adev->status.present ||
- acpi_device_enumerated(adev))
- return AE_OK;
+ if (!ctlr && index == -1)
+ return ERR_PTR(-EINVAL);
lookup.ctlr = ctlr;
lookup.irq = -1;
+ lookup.index = index;
+ lookup.n = 0;
INIT_LIST_HEAD(&resource_list);
ret = acpi_dev_get_resources(adev, &resource_list,
@@ -2431,26 +2495,25 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
if (ret < 0)
/* found SPI in _CRS but it points to another controller */
- return AE_OK;
+ return ERR_PTR(-ENODEV);
if (!lookup.max_speed_hz &&
ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
- ACPI_HANDLE(ctlr->dev.parent) == parent_handle) {
+ ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) {
/* Apple does not use _CRS but nested devices for SPI slaves */
acpi_spi_parse_apple_properties(adev, &lookup);
}
if (!lookup.max_speed_hz)
- return AE_OK;
+ return ERR_PTR(-ENODEV);
- spi = spi_alloc_device(ctlr);
+ spi = spi_alloc_device(lookup.ctlr);
if (!spi) {
- dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
+ dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n",
dev_name(&adev->dev));
- return AE_NO_MEMORY;
+ return ERR_PTR(-ENOMEM);
}
-
ACPI_COMPANION_SET(&spi->dev, adev);
spi->max_speed_hz = lookup.max_speed_hz;
spi->mode |= lookup.mode;
@@ -2458,6 +2521,27 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
spi->bits_per_word = lookup.bits_per_word;
spi->chip_select = lookup.chip_select;
+ return spi;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_device_alloc);
+
+static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
+ struct acpi_device *adev)
+{
+ struct spi_device *spi;
+
+ if (acpi_bus_get_status(adev) || !adev->status.present ||
+ acpi_device_enumerated(adev))
+ return AE_OK;
+
+ spi = acpi_spi_device_alloc(ctlr, adev, -1);
+ if (IS_ERR(spi)) {
+ if (PTR_ERR(spi) == -ENOMEM)
+ return AE_NO_MEMORY;
+ else
+ return AE_OK;
+ }
+
acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias,
sizeof(spi->modalias));
@@ -2480,10 +2564,10 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
void *data, void **return_value)
{
+ struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
struct spi_controller *ctlr = data;
- struct acpi_device *adev;
- if (acpi_bus_get_device(handle, &adev))
+ if (!adev)
return AE_OK;
return acpi_register_spi_device(ctlr, adev);
@@ -2729,46 +2813,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
}
EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller);
-#ifdef CONFIG_OF
-static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
-{
- int nb, i, *cs;
- struct device_node *np = ctlr->dev.of_node;
-
- if (!np)
- return 0;
-
- nb = of_gpio_named_count(np, "cs-gpios");
- ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
-
- /* Return error only for an incorrectly formed cs-gpios property */
- if (nb == 0 || nb == -ENOENT)
- return 0;
- else if (nb < 0)
- return nb;
-
- cs = devm_kcalloc(&ctlr->dev, ctlr->num_chipselect, sizeof(int),
- GFP_KERNEL);
- ctlr->cs_gpios = cs;
-
- if (!ctlr->cs_gpios)
- return -ENOMEM;
-
- for (i = 0; i < ctlr->num_chipselect; i++)
- cs[i] = -ENOENT;
-
- for (i = 0; i < nb; i++)
- cs[i] = of_get_named_gpio(np, "cs-gpios", i);
-
- return 0;
-}
-#else
-static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
-{
- return 0;
-}
-#endif
-
/**
* spi_get_gpio_descs() - grab chip select GPIOs for the master
* @ctlr: The SPI master to grab GPIO descriptors for
@@ -2953,22 +2997,15 @@ int spi_register_controller(struct spi_controller *ctlr)
*/
dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num);
- if (!spi_controller_is_slave(ctlr)) {
- if (ctlr->use_gpio_descriptors) {
- status = spi_get_gpio_descs(ctlr);
- if (status)
- goto free_bus_id;
- /*
- * A controller using GPIO descriptors always
- * supports SPI_CS_HIGH if need be.
- */
- ctlr->mode_bits |= SPI_CS_HIGH;
- } else {
- /* Legacy code path for GPIOs from DT */
- status = of_spi_get_gpio_numbers(ctlr);
- if (status)
- goto free_bus_id;
- }
+ if (!spi_controller_is_slave(ctlr) && ctlr->use_gpio_descriptors) {
+ status = spi_get_gpio_descs(ctlr);
+ if (status)
+ goto free_bus_id;
+ /*
+ * A controller using GPIO descriptors always
+ * supports SPI_CS_HIGH if need be.
+ */
+ ctlr->mode_bits |= SPI_CS_HIGH;
}
/*
@@ -2980,6 +3017,9 @@ int spi_register_controller(struct spi_controller *ctlr)
goto free_bus_id;
}
+ /* setting last_cs to -1 means no chip selected */
+ ctlr->last_cs = -1;
+
status = device_add(&ctlr->dev);
if (status < 0)
goto free_bus_id;
@@ -3457,12 +3497,6 @@ int spi_setup(struct spi_device *spi)
*/
bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD |
SPI_NO_TX | SPI_NO_RX);
- /*
- * Nothing prevents from working with active-high CS in case if it
- * is driven by GPIO.
- */
- if (gpio_is_valid(spi->cs_gpio))
- bad_bits &= ~SPI_CS_HIGH;
ugly_bits = bad_bits &
(SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL);
@@ -3588,8 +3622,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
* cs_change is set for each transfer.
*/
if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
- spi->cs_gpiod ||
- gpio_is_valid(spi->cs_gpio))) {
+ spi->cs_gpiod)) {
size_t maxsize;
int ret;
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index a5cceca8b82b..53a551714265 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -453,22 +453,29 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
dev_dbg(&spi->dev, "%d bits per word\n", tmp);
}
break;
- case SPI_IOC_WR_MAX_SPEED_HZ:
+ case SPI_IOC_WR_MAX_SPEED_HZ: {
+ u32 save;
+
retval = get_user(tmp, (__u32 __user *)arg);
- if (retval == 0) {
- u32 save = spi->max_speed_hz;
+ if (retval)
+ break;
+ if (tmp == 0) {
+ retval = -EINVAL;
+ break;
+ }
- spi->max_speed_hz = tmp;
- retval = spi_setup(spi);
- if (retval == 0) {
- spidev->speed_hz = tmp;
- dev_dbg(&spi->dev, "%d Hz (max)\n",
- spidev->speed_hz);
- }
- spi->max_speed_hz = save;
+ save = spi->max_speed_hz;
+
+ spi->max_speed_hz = tmp;
+ retval = spi_setup(spi);
+ if (retval == 0) {
+ spidev->speed_hz = tmp;
+ dev_dbg(&spi->dev, "%d Hz (max)\n", spidev->speed_hz);
}
- break;
+ spi->max_speed_hz = save;
+ break;
+ }
default:
/* segmented and/or full-duplex I/O request */
/* Check message and copy into scratch area */
@@ -803,7 +810,7 @@ static int spidev_probe(struct spi_device *spi)
return status;
}
-static int spidev_remove(struct spi_device *spi)
+static void spidev_remove(struct spi_device *spi)
{
struct spidev_data *spidev = spi_get_drvdata(spi);
@@ -820,8 +827,6 @@ static int spidev_remove(struct spi_device *spi)
if (spidev->users == 0)
kfree(spidev);
mutex_unlock(&device_list_lock);
-
- return 0;
}
static struct spi_driver spidev_spi_driver = {
diff --git a/drivers/staging/fbtft/fb_st7789v.c b/drivers/staging/fbtft/fb_st7789v.c
index abe9395a0aef..861a154144e6 100644
--- a/drivers/staging/fbtft/fb_st7789v.c
+++ b/drivers/staging/fbtft/fb_st7789v.c
@@ -144,6 +144,8 @@ static int init_display(struct fbtft_par *par)
{
int rc;
+ par->fbtftops.reset(par);
+
rc = init_tearing_effect_line(par);
if (rc)
return rc;
diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h
index 4cdec34e23d2..b68f5f9b7c78 100644
--- a/drivers/staging/fbtft/fbtft.h
+++ b/drivers/staging/fbtft/fbtft.h
@@ -272,21 +272,39 @@ void fbtft_write_reg8_bus9(struct fbtft_par *par, int len, ...);
void fbtft_write_reg16_bus8(struct fbtft_par *par, int len, ...);
void fbtft_write_reg16_bus16(struct fbtft_par *par, int len, ...);
+#define FBTFT_DT_TABLE(_compatible) \
+static const struct of_device_id dt_ids[] = { \
+ { .compatible = _compatible }, \
+ {}, \
+}; \
+MODULE_DEVICE_TABLE(of, dt_ids);
+
+#define FBTFT_SPI_DRIVER(_name, _compatible, _display, _spi_ids) \
+ \
+static int fbtft_driver_probe_spi(struct spi_device *spi) \
+{ \
+ return fbtft_probe_common(_display, spi, NULL); \
+} \
+ \
+static void fbtft_driver_remove_spi(struct spi_device *spi) \
+{ \
+ struct fb_info *info = spi_get_drvdata(spi); \
+ \
+ fbtft_remove_common(&spi->dev, info); \
+} \
+ \
+static struct spi_driver fbtft_driver_spi_driver = { \
+ .driver = { \
+ .name = _name, \
+ .of_match_table = dt_ids, \
+ }, \
+ .id_table = _spi_ids, \
+ .probe = fbtft_driver_probe_spi, \
+ .remove = fbtft_driver_remove_spi, \
+};
+
#define FBTFT_REGISTER_DRIVER(_name, _compatible, _display) \
\
-static int fbtft_driver_probe_spi(struct spi_device *spi) \
-{ \
- return fbtft_probe_common(_display, spi, NULL); \
-} \
- \
-static int fbtft_driver_remove_spi(struct spi_device *spi) \
-{ \
- struct fb_info *info = spi_get_drvdata(spi); \
- \
- fbtft_remove_common(&spi->dev, info); \
- return 0; \
-} \
- \
static int fbtft_driver_probe_pdev(struct platform_device *pdev) \
{ \
return fbtft_probe_common(_display, NULL, pdev); \
@@ -300,22 +318,9 @@ static int fbtft_driver_remove_pdev(struct platform_device *pdev) \
return 0; \
} \
\
-static const struct of_device_id dt_ids[] = { \
- { .compatible = _compatible }, \
- {}, \
-}; \
- \
-MODULE_DEVICE_TABLE(of, dt_ids); \
+FBTFT_DT_TABLE(_compatible) \
\
- \
-static struct spi_driver fbtft_driver_spi_driver = { \
- .driver = { \
- .name = _name, \
- .of_match_table = dt_ids, \
- }, \
- .probe = fbtft_driver_probe_spi, \
- .remove = fbtft_driver_remove_spi, \
-}; \
+FBTFT_SPI_DRIVER(_name, _compatible, _display, NULL) \
\
static struct platform_driver fbtft_driver_platform_driver = { \
.driver = { \
@@ -334,7 +339,10 @@ static int __init fbtft_driver_module_init(void) \
ret = spi_register_driver(&fbtft_driver_spi_driver); \
if (ret < 0) \
return ret; \
- return platform_driver_register(&fbtft_driver_platform_driver); \
+ ret = platform_driver_register(&fbtft_driver_platform_driver); \
+ if (ret < 0) \
+ spi_unregister_driver(&fbtft_driver_spi_driver); \
+ return ret; \
} \
\
static void __exit fbtft_driver_module_exit(void) \
@@ -348,42 +356,15 @@ module_exit(fbtft_driver_module_exit);
#define FBTFT_REGISTER_SPI_DRIVER(_name, _comp_vend, _comp_dev, _display) \
\
-static int fbtft_driver_probe_spi(struct spi_device *spi) \
-{ \
- return fbtft_probe_common(_display, spi, NULL); \
-} \
- \
-static int fbtft_driver_remove_spi(struct spi_device *spi) \
-{ \
- struct fb_info *info = spi_get_drvdata(spi); \
- \
- fbtft_remove_common(&spi->dev, info); \
- return 0; \
-} \
- \
-static const struct of_device_id dt_ids[] = { \
- { .compatible = _comp_vend "," _comp_dev }, \
- {}, \
-}; \
- \
-MODULE_DEVICE_TABLE(of, dt_ids); \
+FBTFT_DT_TABLE(_comp_vend "," _comp_dev) \
\
static const struct spi_device_id spi_ids[] = { \
{ .name = _comp_dev }, \
{}, \
}; \
- \
MODULE_DEVICE_TABLE(spi, spi_ids); \
\
-static struct spi_driver fbtft_driver_spi_driver = { \
- .driver = { \
- .name = _name, \
- .of_match_table = dt_ids, \
- }, \
- .id_table = spi_ids, \
- .probe = fbtft_driver_probe_spi, \
- .remove = fbtft_driver_remove_spi, \
-}; \
+FBTFT_SPI_DRIVER(_name, _comp_vend "," _comp_dev, _display, spi_ids) \
\
module_spi_driver(fbtft_driver_spi_driver);
diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c
index 493ed4821515..0d8d8fed283d 100644
--- a/drivers/staging/gdm724x/gdm_lte.c
+++ b/drivers/staging/gdm724x/gdm_lte.c
@@ -76,14 +76,15 @@ static void tx_complete(void *arg)
static int gdm_lte_rx(struct sk_buff *skb, struct nic *nic, int nic_type)
{
- int ret;
+ int ret, len;
+ len = skb->len + ETH_HLEN;
ret = netif_rx_ni(skb);
if (ret == NET_RX_DROP) {
nic->stats.rx_dropped++;
} else {
nic->stats.rx_packets++;
- nic->stats.rx_bytes += skb->len + ETH_HLEN;
+ nic->stats.rx_bytes += len;
}
return 0;
diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
index 7e6347fe93f9..8a7cf1d0e968 100644
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
return -EINVAL;
}
- local_irq_disable();
- ret = generic_handle_irq(irq);
- local_irq_enable();
-
+ ret = generic_handle_irq_safe(irq);
if (ret)
dev_err(dev, "failed to invoke irq handler\n");
diff --git a/drivers/staging/pi433/pi433_if.c b/drivers/staging/pi433/pi433_if.c
index 68c09fa016ed..1d31c35875e3 100644
--- a/drivers/staging/pi433/pi433_if.c
+++ b/drivers/staging/pi433/pi433_if.c
@@ -1264,7 +1264,7 @@ RX_failed:
return retval;
}
-static int pi433_remove(struct spi_device *spi)
+static void pi433_remove(struct spi_device *spi)
{
struct pi433_device *device = spi_get_drvdata(spi);
@@ -1284,8 +1284,6 @@ static int pi433_remove(struct spi_device *spi)
kfree(device->rx_buffer);
kfree(device);
-
- return 0;
}
static const struct of_device_id pi433_dt_ids[] = {
diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
index 0f82f5031c43..49a3f45cb771 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
@@ -5907,6 +5907,7 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
struct sta_info *psta_bmc;
struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
struct xmit_frame *pxmitframe = NULL;
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
struct sta_priv *pstapriv = &padapter->stapriv;
/* for BC/MC Frames */
@@ -5917,7 +5918,8 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
if ((pstapriv->tim_bitmap&BIT(0)) && (psta_bmc->sleepq_len > 0)) {
msleep(10);/* 10ms, ATIM(HIQ) Windows */
- spin_lock_bh(&psta_bmc->sleep_q.lock);
+ /* spin_lock_bh(&psta_bmc->sleep_q.lock); */
+ spin_lock_bh(&pxmitpriv->lock);
xmitframe_phead = get_list_head(&psta_bmc->sleep_q);
list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -5940,7 +5942,8 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
rtw_hal_xmitframe_enqueue(padapter, pxmitframe);
}
- spin_unlock_bh(&psta_bmc->sleep_q.lock);
+ /* spin_unlock_bh(&psta_bmc->sleep_q.lock); */
+ spin_unlock_bh(&pxmitpriv->lock);
/* check hi queue and bmc_sleepq */
rtw_chk_hi_queue_cmd(padapter);
diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c
index 41bfca549c64..105fe0e3482a 100644
--- a/drivers/staging/rtl8723bs/core/rtw_recv.c
+++ b/drivers/staging/rtl8723bs/core/rtw_recv.c
@@ -957,8 +957,10 @@ static signed int validate_recv_ctrl_frame(struct adapter *padapter, union recv_
if ((psta->state&WIFI_SLEEP_STATE) && (pstapriv->sta_dz_bitmap&BIT(psta->aid))) {
struct list_head *xmitframe_plist, *xmitframe_phead;
struct xmit_frame *pxmitframe = NULL;
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
- spin_lock_bh(&psta->sleep_q.lock);
+ /* spin_lock_bh(&psta->sleep_q.lock); */
+ spin_lock_bh(&pxmitpriv->lock);
xmitframe_phead = get_list_head(&psta->sleep_q);
xmitframe_plist = get_next(xmitframe_phead);
@@ -989,10 +991,12 @@ static signed int validate_recv_ctrl_frame(struct adapter *padapter, union recv_
update_beacon(padapter, WLAN_EID_TIM, NULL, true);
}
- spin_unlock_bh(&psta->sleep_q.lock);
+ /* spin_unlock_bh(&psta->sleep_q.lock); */
+ spin_unlock_bh(&pxmitpriv->lock);
} else {
- spin_unlock_bh(&psta->sleep_q.lock);
+ /* spin_unlock_bh(&psta->sleep_q.lock); */
+ spin_unlock_bh(&pxmitpriv->lock);
if (pstapriv->tim_bitmap&BIT(psta->aid)) {
if (psta->sleepq_len == 0) {
diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
index 0c9ea1520fd0..beb11d89db18 100644
--- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
+++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
@@ -293,48 +293,46 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
/* list_del_init(&psta->wakeup_list); */
- spin_lock_bh(&psta->sleep_q.lock);
+ spin_lock_bh(&pxmitpriv->lock);
+
rtw_free_xmitframe_queue(pxmitpriv, &psta->sleep_q);
psta->sleepq_len = 0;
- spin_unlock_bh(&psta->sleep_q.lock);
-
- spin_lock_bh(&pxmitpriv->lock);
/* vo */
- spin_lock_bh(&pstaxmitpriv->vo_q.sta_pending.lock);
+ /* spin_lock_bh(&(pxmitpriv->vo_pending.lock)); */
rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->vo_q.sta_pending);
list_del_init(&(pstaxmitpriv->vo_q.tx_pending));
phwxmit = pxmitpriv->hwxmits;
phwxmit->accnt -= pstaxmitpriv->vo_q.qcnt;
pstaxmitpriv->vo_q.qcnt = 0;
- spin_unlock_bh(&pstaxmitpriv->vo_q.sta_pending.lock);
+ /* spin_unlock_bh(&(pxmitpriv->vo_pending.lock)); */
/* vi */
- spin_lock_bh(&pstaxmitpriv->vi_q.sta_pending.lock);
+ /* spin_lock_bh(&(pxmitpriv->vi_pending.lock)); */
rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->vi_q.sta_pending);
list_del_init(&(pstaxmitpriv->vi_q.tx_pending));
phwxmit = pxmitpriv->hwxmits+1;
phwxmit->accnt -= pstaxmitpriv->vi_q.qcnt;
pstaxmitpriv->vi_q.qcnt = 0;
- spin_unlock_bh(&pstaxmitpriv->vi_q.sta_pending.lock);
+ /* spin_unlock_bh(&(pxmitpriv->vi_pending.lock)); */
/* be */
- spin_lock_bh(&pstaxmitpriv->be_q.sta_pending.lock);
+ /* spin_lock_bh(&(pxmitpriv->be_pending.lock)); */
rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->be_q.sta_pending);
list_del_init(&(pstaxmitpriv->be_q.tx_pending));
phwxmit = pxmitpriv->hwxmits+2;
phwxmit->accnt -= pstaxmitpriv->be_q.qcnt;
pstaxmitpriv->be_q.qcnt = 0;
- spin_unlock_bh(&pstaxmitpriv->be_q.sta_pending.lock);
+ /* spin_unlock_bh(&(pxmitpriv->be_pending.lock)); */
/* bk */
- spin_lock_bh(&pstaxmitpriv->bk_q.sta_pending.lock);
+ /* spin_lock_bh(&(pxmitpriv->bk_pending.lock)); */
rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->bk_q.sta_pending);
list_del_init(&(pstaxmitpriv->bk_q.tx_pending));
phwxmit = pxmitpriv->hwxmits+3;
phwxmit->accnt -= pstaxmitpriv->bk_q.qcnt;
pstaxmitpriv->bk_q.qcnt = 0;
- spin_unlock_bh(&pstaxmitpriv->bk_q.sta_pending.lock);
+ /* spin_unlock_bh(&(pxmitpriv->bk_pending.lock)); */
spin_unlock_bh(&pxmitpriv->lock);
diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c
index 13b8bd5ffabc..f466bfd248fb 100644
--- a/drivers/staging/rtl8723bs/core/rtw_xmit.c
+++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c
@@ -1734,12 +1734,15 @@ void rtw_free_xmitframe_queue(struct xmit_priv *pxmitpriv, struct __queue *pfram
struct list_head *plist, *phead, *tmp;
struct xmit_frame *pxmitframe;
+ spin_lock_bh(&pframequeue->lock);
+
phead = get_list_head(pframequeue);
list_for_each_safe(plist, tmp, phead) {
pxmitframe = list_entry(plist, struct xmit_frame, list);
rtw_free_xmitframe(pxmitpriv, pxmitframe);
}
+ spin_unlock_bh(&pframequeue->lock);
}
s32 rtw_xmitframe_enqueue(struct adapter *padapter, struct xmit_frame *pxmitframe)
@@ -1794,7 +1797,6 @@ s32 rtw_xmit_classifier(struct adapter *padapter, struct xmit_frame *pxmitframe)
struct sta_info *psta;
struct tx_servq *ptxservq;
struct pkt_attrib *pattrib = &pxmitframe->attrib;
- struct xmit_priv *xmit_priv = &padapter->xmitpriv;
struct hw_xmit *phwxmits = padapter->xmitpriv.hwxmits;
signed int res = _SUCCESS;
@@ -1812,14 +1814,12 @@ s32 rtw_xmit_classifier(struct adapter *padapter, struct xmit_frame *pxmitframe)
ptxservq = rtw_get_sta_pending(padapter, psta, pattrib->priority, (u8 *)(&ac_index));
- spin_lock_bh(&xmit_priv->lock);
if (list_empty(&ptxservq->tx_pending))
list_add_tail(&ptxservq->tx_pending, get_list_head(phwxmits[ac_index].sta_queue));
list_add_tail(&pxmitframe->list, get_list_head(&ptxservq->sta_pending));
ptxservq->qcnt++;
phwxmits[ac_index].accnt++;
- spin_unlock_bh(&xmit_priv->lock);
exit:
@@ -2202,10 +2202,11 @@ void wakeup_sta_to_xmit(struct adapter *padapter, struct sta_info *psta)
struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
struct xmit_frame *pxmitframe = NULL;
struct sta_priv *pstapriv = &padapter->stapriv;
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
psta_bmc = rtw_get_bcmc_stainfo(padapter);
- spin_lock_bh(&psta->sleep_q.lock);
+ spin_lock_bh(&pxmitpriv->lock);
xmitframe_phead = get_list_head(&psta->sleep_q);
list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -2306,7 +2307,7 @@ void wakeup_sta_to_xmit(struct adapter *padapter, struct sta_info *psta)
_exit:
- spin_unlock_bh(&psta->sleep_q.lock);
+ spin_unlock_bh(&pxmitpriv->lock);
if (update_mask)
update_beacon(padapter, WLAN_EID_TIM, NULL, true);
@@ -2318,8 +2319,9 @@ void xmit_delivery_enabled_frames(struct adapter *padapter, struct sta_info *pst
struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
struct xmit_frame *pxmitframe = NULL;
struct sta_priv *pstapriv = &padapter->stapriv;
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
- spin_lock_bh(&psta->sleep_q.lock);
+ spin_lock_bh(&pxmitpriv->lock);
xmitframe_phead = get_list_head(&psta->sleep_q);
list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -2372,7 +2374,7 @@ void xmit_delivery_enabled_frames(struct adapter *padapter, struct sta_info *pst
}
}
- spin_unlock_bh(&psta->sleep_q.lock);
+ spin_unlock_bh(&pxmitpriv->lock);
}
void enqueue_pending_xmitbuf(struct xmit_priv *pxmitpriv, struct xmit_buf *pxmitbuf)
diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
index b5d5e922231c..15810438a472 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
@@ -502,7 +502,9 @@ s32 rtl8723bs_hal_xmit(
rtw_issue_addbareq_cmd(padapter, pxmitframe);
}
+ spin_lock_bh(&pxmitpriv->lock);
err = rtw_xmitframe_enqueue(padapter, pxmitframe);
+ spin_unlock_bh(&pxmitpriv->lock);
if (err != _SUCCESS) {
rtw_free_xmitframe(pxmitpriv, pxmitframe);
diff --git a/drivers/staging/rtl8723bs/include/rtw_mlme.h b/drivers/staging/rtl8723bs/include/rtw_mlme.h
index c94fa7d8d5a9..1b343b434f4d 100644
--- a/drivers/staging/rtl8723bs/include/rtw_mlme.h
+++ b/drivers/staging/rtl8723bs/include/rtw_mlme.h
@@ -102,13 +102,17 @@ there are several "locks" in mlme_priv,
since mlme_priv is a shared resource between many threads,
like ISR/Call-Back functions, the OID handlers, and even timer functions.
-
Each struct __queue has its own locks, already.
-Other items are protected by mlme_priv.lock.
+Other items in mlme_priv are protected by mlme_priv.lock, while items in
+xmit_priv are protected by xmit_priv.lock.
To avoid possible dead lock, any thread trying to modifiying mlme_priv
SHALL not lock up more than one locks at a time!
+The only exception is that queue functions which take the __queue.lock
+may be called with the xmit_priv.lock held. In this case the order
+MUST always be first lock xmit_priv.lock and then call any queue functions
+which take __queue.lock.
*/
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 6759a6261500..3a2e4582db8e 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -1058,15 +1058,27 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+ rcu_read_lock();
service = handle_to_service(handle);
- if (WARN_ON(!service))
+ if (WARN_ON(!service)) {
+ rcu_read_unlock();
return VCHIQ_SUCCESS;
+ }
user_service = (struct user_service *)service->base.userdata;
instance = user_service->instance;
- if (!instance || instance->closing)
+ if (!instance || instance->closing) {
+ rcu_read_unlock();
return VCHIQ_SUCCESS;
+ }
+
+ /*
+ * As hopping around different synchronization mechanism,
+ * taking an extra reference results in simpler implementation.
+ */
+ vchiq_service_get(service);
+ rcu_read_unlock();
vchiq_log_trace(vchiq_arm_log_level,
"%s - service %lx(%d,%p), reason %d, header %lx, instance %lx, bulk_userdata %lx",
@@ -1097,6 +1109,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
bulk_userdata);
if (status != VCHIQ_SUCCESS) {
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+ vchiq_service_put(service);
return status;
}
}
@@ -1105,10 +1118,12 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
if (wait_for_completion_interruptible(&user_service->remove_event)) {
vchiq_log_info(vchiq_arm_log_level, "%s interrupted", __func__);
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+ vchiq_service_put(service);
return VCHIQ_RETRY;
} else if (instance->closing) {
vchiq_log_info(vchiq_arm_log_level, "%s closing", __func__);
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+ vchiq_service_put(service);
return VCHIQ_ERROR;
}
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
@@ -1137,6 +1152,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
header = NULL;
}
DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+ vchiq_service_put(service);
if (skip_completion)
return VCHIQ_SUCCESS;
diff --git a/drivers/staging/wfx/bus_spi.c b/drivers/staging/wfx/bus_spi.c
index 55ffcd7c42e2..fa0ff66a457d 100644
--- a/drivers/staging/wfx/bus_spi.c
+++ b/drivers/staging/wfx/bus_spi.c
@@ -232,12 +232,11 @@ static int wfx_spi_probe(struct spi_device *func)
return wfx_probe(bus->core);
}
-static int wfx_spi_remove(struct spi_device *func)
+static void wfx_spi_remove(struct spi_device *func)
{
struct wfx_spi_priv *bus = spi_get_drvdata(func);
wfx_release(bus->core);
- return 0;
}
/* For dynamic driver binding, kernel does not use OF to match driver. It only
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 8075f60fd02c..2d5cf1714ae0 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal(
break;
}
spin_unlock(&tpg->tpg_np_lock);
+
+ if (match)
+ break;
}
spin_unlock(&tiqn->tiqn_tpg_lock);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bf8ae4825a06..87ede165ddba 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -20,7 +20,6 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/bio.h>
-#include <linux/genhd.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
@@ -353,18 +352,16 @@ static struct bio *iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num,
* Only allocate as many vector entries as the bio code allows us to,
* we'll loop later on until we have handled the whole request.
*/
- bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num),
- &ib_dev->ibd_bio_set);
+ bio = bio_alloc_bioset(ib_dev->ibd_bd, bio_max_segs(sg_num), opf,
+ GFP_NOIO, &ib_dev->ibd_bio_set);
if (!bio) {
pr_err("Unable to allocate memory for bio\n");
return NULL;
}
- bio_set_dev(bio, ib_dev->ibd_bd);
bio->bi_private = cmd;
bio->bi_end_io = &iblock_bio_done;
bio->bi_iter.bi_sector = lba;
- bio->bi_opf = opf;
return bio;
}
@@ -418,10 +415,9 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
if (immed)
target_complete_cmd(cmd, SAM_STAT_GOOD);
- bio = bio_alloc(GFP_KERNEL, 0);
+ bio = bio_alloc(ib_dev->ibd_bd, 0, REQ_OP_WRITE | REQ_PREFLUSH,
+ GFP_KERNEL);
bio->bi_end_io = iblock_end_io_flush;
- bio_set_dev(bio, ib_dev->ibd_bd);
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
if (!immed)
bio->bi_private = cmd;
submit_bio(bio);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 807d06ecadee..0fae71ac5cc8 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -17,7 +17,6 @@
#include <linux/blk_types.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/genhd.h>
#include <linux/cdrom.h>
#include <linux/ratelimit.h>
#include <linux/module.h>
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 1ca320885fad..17a6f51d3089 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -158,6 +158,7 @@ void optee_remove_common(struct optee *optee)
optee_unregister_devices();
optee_notif_uninit(optee);
+ teedev_close_context(optee->ctx);
/*
* The two devices have to be unregistered before we can free the
* other resources.
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index 20a1b1a3d965..f744ab15bf2c 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -424,6 +424,7 @@ static struct tee_shm_pool_mgr *optee_ffa_shm_pool_alloc_pages(void)
*/
static void handle_ffa_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+ struct optee *optee,
struct optee_msg_arg *arg)
{
struct tee_shm *shm;
@@ -439,7 +440,7 @@ static void handle_ffa_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
shm = optee_rpc_cmd_alloc_suppl(ctx, arg->params[0].u.value.b);
break;
case OPTEE_RPC_SHM_TYPE_KERNEL:
- shm = tee_shm_alloc(ctx, arg->params[0].u.value.b,
+ shm = tee_shm_alloc(optee->ctx, arg->params[0].u.value.b,
TEE_SHM_MAPPED | TEE_SHM_PRIV);
break;
default:
@@ -493,14 +494,13 @@ err_bad_param:
}
static void handle_ffa_rpc_func_cmd(struct tee_context *ctx,
+ struct optee *optee,
struct optee_msg_arg *arg)
{
- struct optee *optee = tee_get_drvdata(ctx->teedev);
-
arg->ret_origin = TEEC_ORIGIN_COMMS;
switch (arg->cmd) {
case OPTEE_RPC_CMD_SHM_ALLOC:
- handle_ffa_rpc_func_cmd_shm_alloc(ctx, arg);
+ handle_ffa_rpc_func_cmd_shm_alloc(ctx, optee, arg);
break;
case OPTEE_RPC_CMD_SHM_FREE:
handle_ffa_rpc_func_cmd_shm_free(ctx, optee, arg);
@@ -510,12 +510,12 @@ static void handle_ffa_rpc_func_cmd(struct tee_context *ctx,
}
}
-static void optee_handle_ffa_rpc(struct tee_context *ctx, u32 cmd,
- struct optee_msg_arg *arg)
+static void optee_handle_ffa_rpc(struct tee_context *ctx, struct optee *optee,
+ u32 cmd, struct optee_msg_arg *arg)
{
switch (cmd) {
case OPTEE_FFA_YIELDING_CALL_RETURN_RPC_CMD:
- handle_ffa_rpc_func_cmd(ctx, arg);
+ handle_ffa_rpc_func_cmd(ctx, optee, arg);
break;
case OPTEE_FFA_YIELDING_CALL_RETURN_INTERRUPT:
/* Interrupt delivered by now */
@@ -582,7 +582,7 @@ static int optee_ffa_yielding_call(struct tee_context *ctx,
* above.
*/
cond_resched();
- optee_handle_ffa_rpc(ctx, data->data1, rpc_arg);
+ optee_handle_ffa_rpc(ctx, optee, data->data1, rpc_arg);
cmd = OPTEE_FFA_YIELDING_CALL_RESUME;
data->data0 = cmd;
data->data1 = 0;
@@ -619,9 +619,18 @@ static int optee_ffa_do_call_with_arg(struct tee_context *ctx,
.data2 = (u32)(shm->sec_world_id >> 32),
.data3 = shm->offset,
};
- struct optee_msg_arg *arg = tee_shm_get_va(shm, 0);
- unsigned int rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params);
- struct optee_msg_arg *rpc_arg = tee_shm_get_va(shm, rpc_arg_offs);
+ struct optee_msg_arg *arg;
+ unsigned int rpc_arg_offs;
+ struct optee_msg_arg *rpc_arg;
+
+ arg = tee_shm_get_va(shm, 0);
+ if (IS_ERR(arg))
+ return PTR_ERR(arg);
+
+ rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params);
+ rpc_arg = tee_shm_get_va(shm, rpc_arg_offs);
+ if (IS_ERR(rpc_arg))
+ return PTR_ERR(rpc_arg);
return optee_ffa_yielding_call(ctx, &data, rpc_arg);
}
@@ -793,7 +802,9 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev)
{
const struct ffa_dev_ops *ffa_ops;
unsigned int rpc_arg_count;
+ struct tee_shm_pool *pool;
struct tee_device *teedev;
+ struct tee_context *ctx;
struct optee *optee;
int rc;
@@ -813,12 +824,12 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev)
if (!optee)
return -ENOMEM;
- optee->pool = optee_ffa_config_dyn_shm();
- if (IS_ERR(optee->pool)) {
- rc = PTR_ERR(optee->pool);
- optee->pool = NULL;
- goto err;
+ pool = optee_ffa_config_dyn_shm();
+ if (IS_ERR(pool)) {
+ rc = PTR_ERR(pool);
+ goto err_free_optee;
}
+ optee->pool = pool;
optee->ops = &optee_ffa_ops;
optee->ffa.ffa_dev = ffa_dev;
@@ -829,7 +840,7 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev)
optee);
if (IS_ERR(teedev)) {
rc = PTR_ERR(teedev);
- goto err;
+ goto err_free_pool;
}
optee->teedev = teedev;
@@ -837,50 +848,59 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev)
optee);
if (IS_ERR(teedev)) {
rc = PTR_ERR(teedev);
- goto err;
+ goto err_unreg_teedev;
}
optee->supp_teedev = teedev;
rc = tee_device_register(optee->teedev);
if (rc)
- goto err;
+ goto err_unreg_supp_teedev;
rc = tee_device_register(optee->supp_teedev);
if (rc)
- goto err;
+ goto err_unreg_supp_teedev;
rc = rhashtable_init(&optee->ffa.global_ids, &shm_rhash_params);
if (rc)
- goto err;
+ goto err_unreg_supp_teedev;
mutex_init(&optee->ffa.mutex);
mutex_init(&optee->call_queue.mutex);
INIT_LIST_HEAD(&optee->call_queue.waiters);
optee_supp_init(&optee->supp);
ffa_dev_set_drvdata(ffa_dev, optee);
- rc = optee_notif_init(optee, OPTEE_DEFAULT_MAX_NOTIF_VALUE);
- if (rc) {
- optee_ffa_remove(ffa_dev);
- return rc;
+ ctx = teedev_open(optee->teedev);
+ if (IS_ERR(ctx)) {
+ rc = PTR_ERR(ctx);
+ goto err_rhashtable_free;
}
+ optee->ctx = ctx;
+ rc = optee_notif_init(optee, OPTEE_DEFAULT_MAX_NOTIF_VALUE);
+ if (rc)
+ goto err_close_ctx;
rc = optee_enumerate_devices(PTA_CMD_GET_DEVICES);
- if (rc) {
- optee_ffa_remove(ffa_dev);
- return rc;
- }
+ if (rc)
+ goto err_unregister_devices;
pr_info("initialized driver\n");
return 0;
-err:
- /*
- * tee_device_unregister() is safe to call even if the
- * devices hasn't been registered with
- * tee_device_register() yet.
- */
+
+err_unregister_devices:
+ optee_unregister_devices();
+ optee_notif_uninit(optee);
+err_close_ctx:
+ teedev_close_context(ctx);
+err_rhashtable_free:
+ rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL);
+ optee_supp_uninit(&optee->supp);
+ mutex_destroy(&optee->call_queue.mutex);
+err_unreg_supp_teedev:
tee_device_unregister(optee->supp_teedev);
+err_unreg_teedev:
tee_device_unregister(optee->teedev);
- if (optee->pool)
- tee_shm_pool_free(optee->pool);
+err_free_pool:
+ tee_shm_pool_free(pool);
+err_free_optee:
kfree(optee);
return rc;
}
diff --git a/drivers/tee/optee/notif.c b/drivers/tee/optee/notif.c
index a28fa03dcd0e..05212842b0a5 100644
--- a/drivers/tee/optee/notif.c
+++ b/drivers/tee/optee/notif.c
@@ -121,5 +121,5 @@ int optee_notif_init(struct optee *optee, u_int max_key)
void optee_notif_uninit(struct optee *optee)
{
- kfree(optee->notif.bitmap);
+ bitmap_free(optee->notif.bitmap);
}
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
index 46f74ab07c7e..92bc47bef95f 100644
--- a/drivers/tee/optee/optee_private.h
+++ b/drivers/tee/optee/optee_private.h
@@ -53,7 +53,6 @@ struct optee_call_queue {
struct optee_notif {
u_int max_key;
- struct tee_context *ctx;
/* Serializes access to the elements below in this struct */
spinlock_t lock;
struct list_head db;
@@ -134,9 +133,10 @@ struct optee_ops {
/**
* struct optee - main service struct
* @supp_teedev: supplicant device
+ * @teedev: client device
* @ops: internal callbacks for different ways to reach secure
* world
- * @teedev: client device
+ * @ctx: driver internal TEE context
* @smc: specific to SMC ABI
* @ffa: specific to FF-A ABI
* @call_queue: queue of threads waiting to call @invoke_fn
@@ -152,6 +152,7 @@ struct optee {
struct tee_device *supp_teedev;
struct tee_device *teedev;
const struct optee_ops *ops;
+ struct tee_context *ctx;
union {
struct optee_smc smc;
struct optee_ffa ffa;
diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c
index 449d6a72d289..c517d310249f 100644
--- a/drivers/tee/optee/smc_abi.c
+++ b/drivers/tee/optee/smc_abi.c
@@ -75,16 +75,6 @@ static int from_msg_param_tmp_mem(struct tee_param *p, u32 attr,
p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa;
p->u.memref.shm = shm;
- /* Check that the memref is covered by the shm object */
- if (p->u.memref.size) {
- size_t o = p->u.memref.shm_offs +
- p->u.memref.size - 1;
-
- rc = tee_shm_get_pa(shm, o, NULL);
- if (rc)
- return rc;
- }
-
return 0;
}
@@ -622,6 +612,7 @@ static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx,
}
static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+ struct optee *optee,
struct optee_msg_arg *arg,
struct optee_call_ctx *call_ctx)
{
@@ -651,7 +642,8 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
shm = optee_rpc_cmd_alloc_suppl(ctx, sz);
break;
case OPTEE_RPC_SHM_TYPE_KERNEL:
- shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV);
+ shm = tee_shm_alloc(optee->ctx, sz,
+ TEE_SHM_MAPPED | TEE_SHM_PRIV);
break;
default:
arg->ret = TEEC_ERROR_BAD_PARAMETERS;
@@ -747,7 +739,7 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee,
switch (arg->cmd) {
case OPTEE_RPC_CMD_SHM_ALLOC:
free_pages_list(call_ctx);
- handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx);
+ handle_rpc_func_cmd_shm_alloc(ctx, optee, arg, call_ctx);
break;
case OPTEE_RPC_CMD_SHM_FREE:
handle_rpc_func_cmd_shm_free(ctx, arg);
@@ -776,7 +768,7 @@ static void optee_handle_rpc(struct tee_context *ctx,
switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
case OPTEE_SMC_RPC_FUNC_ALLOC:
- shm = tee_shm_alloc(ctx, param->a1,
+ shm = tee_shm_alloc(optee->ctx, param->a1,
TEE_SHM_MAPPED | TEE_SHM_PRIV);
if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
reg_pair_from_64(&param->a1, &param->a2, pa);
@@ -954,57 +946,34 @@ static irqreturn_t notif_irq_thread_fn(int irq, void *dev_id)
{
struct optee *optee = dev_id;
- optee_smc_do_bottom_half(optee->notif.ctx);
+ optee_smc_do_bottom_half(optee->ctx);
return IRQ_HANDLED;
}
static int optee_smc_notif_init_irq(struct optee *optee, u_int irq)
{
- struct tee_context *ctx;
int rc;
- ctx = teedev_open(optee->teedev);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- optee->notif.ctx = ctx;
rc = request_threaded_irq(irq, notif_irq_handler,
notif_irq_thread_fn,
0, "optee_notification", optee);
if (rc)
- goto err_close_ctx;
+ return rc;
optee->smc.notif_irq = irq;
return 0;
-
-err_close_ctx:
- teedev_close_context(optee->notif.ctx);
- optee->notif.ctx = NULL;
-
- return rc;
}
static void optee_smc_notif_uninit_irq(struct optee *optee)
{
- if (optee->notif.ctx) {
- optee_smc_stop_async_notif(optee->notif.ctx);
+ if (optee->smc.sec_caps & OPTEE_SMC_SEC_CAP_ASYNC_NOTIF) {
+ optee_smc_stop_async_notif(optee->ctx);
if (optee->smc.notif_irq) {
free_irq(optee->smc.notif_irq, optee);
irq_dispose_mapping(optee->smc.notif_irq);
}
-
- /*
- * The thread normally working with optee->notif.ctx was
- * stopped with free_irq() above.
- *
- * Note we're not using teedev_close_context() or
- * tee_client_close_context() since we have already called
- * tee_device_put() while initializing to avoid a circular
- * reference counting.
- */
- teedev_close_context(optee->notif.ctx);
}
}
@@ -1366,6 +1335,7 @@ static int optee_probe(struct platform_device *pdev)
struct optee *optee = NULL;
void *memremaped_shm = NULL;
struct tee_device *teedev;
+ struct tee_context *ctx;
u32 max_notif_value;
u32 sec_caps;
int rc;
@@ -1446,9 +1416,15 @@ static int optee_probe(struct platform_device *pdev)
optee->pool = pool;
platform_set_drvdata(pdev, optee);
+ ctx = teedev_open(optee->teedev);
+ if (IS_ERR(ctx)) {
+ rc = PTR_ERR(ctx);
+ goto err_supp_uninit;
+ }
+ optee->ctx = ctx;
rc = optee_notif_init(optee, max_notif_value);
if (rc)
- goto err_supp_uninit;
+ goto err_close_ctx;
if (sec_caps & OPTEE_SMC_SEC_CAP_ASYNC_NOTIF) {
unsigned int irq;
@@ -1496,6 +1472,8 @@ err_disable_shm_cache:
optee_unregister_devices();
err_notif_uninit:
optee_notif_uninit(optee);
+err_close_ctx:
+ teedev_close_context(ctx);
err_supp_uninit:
optee_supp_uninit(&optee->supp);
mutex_destroy(&optee->call_queue.mutex);
diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 8df5edef1ded..0cedb8b4f00a 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -351,7 +351,7 @@ static int brcmstb_thermal_probe(struct platform_device *pdev)
priv->thermal = thermal;
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq >= 0) {
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
brcmstb_tmon_irq_thread,
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index c83ea5d04a1d..f0c845679250 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -99,3 +99,17 @@ config INTEL_MENLOW
Intel Menlow platform.
If unsure, say N.
+
+config INTEL_HFI_THERMAL
+ bool "Intel Hardware Feedback Interface"
+ depends on NET
+ depends on CPU_SUP_INTEL
+ depends on X86_THERMAL_VECTOR
+ select THERMAL_NETLINK
+ help
+ Select this option to enable the Hardware Feedback Interface. If
+ selected, hardware provides guidance to the operating system on
+ the performance and energy efficiency capabilities of each CPU.
+ These capabilities may change as a result of changes in the operating
+ conditions of the system such power and thermal limits. If selected,
+ the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 960b56268b4a..9a8d8054f316 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index e90690a234c4..01b80331eab6 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
int i;
int nr_bad_entries = 0;
struct trt *trts;
- struct acpi_device *adev;
union acpi_object *p;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_buffer element = { 0, NULL };
@@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
if (!create_dev)
continue;
- result = acpi_bus_get_device(trt->source, &adev);
- if (result)
+ if (!acpi_fetch_acpi_dev(trt->source))
pr_warn("Failed to get source ACPI device\n");
- result = acpi_bus_get_device(trt->target, &adev);
- if (result)
+ if (!acpi_fetch_acpi_dev(trt->target))
pr_warn("Failed to get target ACPI device\n");
}
@@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
int i;
int nr_bad_entries = 0;
struct art *arts;
- struct acpi_device *adev;
union acpi_object *p;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_buffer element = { 0, NULL };
@@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
if (!create_dev)
continue;
- if (art->source) {
- result = acpi_bus_get_device(art->source, &adev);
- if (result)
- pr_warn("Failed to get source ACPI device\n");
- }
- if (art->target) {
- result = acpi_bus_get_device(art->target, &adev);
- if (result)
- pr_warn("Failed to get target ACPI device\n");
- }
+ if (!acpi_fetch_acpi_dev(art->source))
+ pr_warn("Failed to get source ACPI device\n");
+
+ if (!acpi_fetch_acpi_dev(art->target))
+ pr_warn("Failed to get target ACPI device\n");
}
*artp = arts;
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 72acb1f61849..4954800b9850 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -17,8 +17,8 @@
#define INT3400_KEEP_ALIVE 0xA0
enum int3400_thermal_uuid {
+ INT3400_THERMAL_ACTIVE = 0,
INT3400_THERMAL_PASSIVE_1,
- INT3400_THERMAL_ACTIVE,
INT3400_THERMAL_CRITICAL,
INT3400_THERMAL_ADAPTIVE_PERFORMANCE,
INT3400_THERMAL_EMERGENCY_CALL_MODE,
@@ -31,8 +31,8 @@ enum int3400_thermal_uuid {
};
static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
- "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
"3A95C389-E4B8-4629-A526-C52C88626BAE",
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D",
"5349962F-71E6-431D-9AE8-0A635B710AEE",
@@ -53,12 +53,13 @@ struct int3400_thermal_priv {
struct art *arts;
int trt_count;
struct trt *trts;
- u8 uuid_bitmap;
+ u32 uuid_bitmap;
int rel_misc_dev_res;
int current_uuid_index;
char *data_vault;
int odvp_count;
int *odvp;
+ u32 os_uuid_mask;
struct odvp_attr *odvp_attrs;
};
@@ -142,12 +143,55 @@ static ssize_t current_uuid_show(struct device *dev,
struct device_attribute *devattr, char *buf)
{
struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ int i, length = 0;
- if (priv->current_uuid_index == -1)
- return sprintf(buf, "INVALID\n");
+ if (priv->current_uuid_index > 0)
+ return sprintf(buf, "%s\n",
+ int3400_thermal_uuids[priv->current_uuid_index]);
- return sprintf(buf, "%s\n",
- int3400_thermal_uuids[priv->current_uuid_index]);
+ for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
+ if (priv->os_uuid_mask & BIT(i))
+ length += scnprintf(&buf[length],
+ PAGE_SIZE - length,
+ "%s\n",
+ int3400_thermal_uuids[i]);
+ }
+
+ if (length)
+ return length;
+
+ return sprintf(buf, "INVALID\n");
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable)
+{
+ u32 ret, buf[2];
+ acpi_status status;
+ int result = 0;
+ struct acpi_osc_context context = {
+ .uuid_str = NULL,
+ .rev = 1,
+ .cap.length = 8,
+ };
+
+ context.uuid_str = uuid_str;
+
+ buf[OSC_QUERY_DWORD] = 0;
+ buf[OSC_SUPPORT_DWORD] = *enable;
+
+ context.cap.pointer = buf;
+
+ status = acpi_run_osc(handle, &context);
+ if (ACPI_SUCCESS(status)) {
+ ret = *((u32 *)(context.ret.pointer + 4));
+ if (ret != *enable)
+ result = -EPERM;
+ } else
+ result = -EPERM;
+
+ kfree(context.ret.pointer);
+
+ return result;
}
static ssize_t current_uuid_store(struct device *dev,
@@ -164,16 +208,47 @@ static ssize_t current_uuid_store(struct device *dev,
* If we have a list of supported UUIDs, make sure
* this one is supported.
*/
- if (priv->uuid_bitmap &&
- !(priv->uuid_bitmap & (1 << i)))
+ if (priv->uuid_bitmap & BIT(i)) {
+ priv->current_uuid_index = i;
+ return count;
+ }
+
+ /*
+ * There is support of only 3 policies via the new
+ * _OSC to inform OS capability:
+ * INT3400_THERMAL_ACTIVE
+ * INT3400_THERMAL_PASSIVE_1
+ * INT3400_THERMAL_CRITICAL
+ */
+
+ if (i > INT3400_THERMAL_CRITICAL)
return -EINVAL;
- priv->current_uuid_index = i;
- return count;
+ priv->os_uuid_mask |= BIT(i);
+
+ break;
}
}
- return -EINVAL;
+ if (priv->os_uuid_mask) {
+ int cap, ret;
+
+ /*
+ * Capability bits:
+ * Bit 0: set to 1 to indicate DPTF is active
+ * Bi1 1: set to 1 to active cooling is supported by user space daemon
+ * Bit 2: set to 1 to passive cooling is supported by user space daemon
+ * Bit 3: set to 1 to critical trip is handled by user space daemon
+ */
+ cap = ((priv->os_uuid_mask << 1) | 0x01);
+ ret = int3400_thermal_run_osc(priv->adev->handle,
+ "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+ &cap);
+ if (ret)
+ return ret;
+ }
+
+ return count;
}
static DEVICE_ATTR_RW(current_uuid);
@@ -236,41 +311,6 @@ end:
return result;
}
-static int int3400_thermal_run_osc(acpi_handle handle,
- enum int3400_thermal_uuid uuid, bool enable)
-{
- u32 ret, buf[2];
- acpi_status status;
- int result = 0;
- struct acpi_osc_context context = {
- .uuid_str = NULL,
- .rev = 1,
- .cap.length = 8,
- };
-
- if (uuid < 0 || uuid >= INT3400_THERMAL_MAXIMUM_UUID)
- return -EINVAL;
-
- context.uuid_str = int3400_thermal_uuids[uuid];
-
- buf[OSC_QUERY_DWORD] = 0;
- buf[OSC_SUPPORT_DWORD] = enable;
-
- context.cap.pointer = buf;
-
- status = acpi_run_osc(handle, &context);
- if (ACPI_SUCCESS(status)) {
- ret = *((u32 *)(context.ret.pointer + 4));
- if (ret != enable)
- result = -EPERM;
- } else
- result = -EPERM;
-
- kfree(context.ret.pointer);
-
- return result;
-}
-
static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
@@ -404,6 +444,10 @@ static void int3400_notify(acpi_handle handle,
thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
thermal_prop[4] = NULL;
kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop);
+ kfree(thermal_prop[0]);
+ kfree(thermal_prop[1]);
+ kfree(thermal_prop[2]);
+ kfree(thermal_prop[3]);
}
static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
@@ -422,10 +466,18 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
if (!priv)
return -EINVAL;
- if (mode != thermal->mode)
+ if (mode != thermal->mode) {
+ int enabled;
+
+ if (priv->current_uuid_index < 0 ||
+ priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+ return -EINVAL;
+
+ enabled = (mode == THERMAL_DEVICE_ENABLED);
result = int3400_thermal_run_osc(priv->adev->handle,
- priv->current_uuid_index,
- mode == THERMAL_DEVICE_ENABLED);
+ int3400_thermal_uuids[priv->current_uuid_index],
+ &enabled);
+ }
evaluate_odvp(priv);
@@ -464,6 +516,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
obj->package.elements[0].buffer.length,
GFP_KERNEL);
+ if (!priv->data_vault) {
+ kfree(buffer.pointer);
+ return;
+ }
+
bin_attr_data_vault.private = priv->data_vault;
bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
kfree(buffer.pointer);
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 000000000000..730fd121df6e
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <aubrey.li@linux.intel.com>
+ * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt) "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+#include "../thermal_core.h"
+#include "intel_hfi.h"
+
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
+ BIT(9) | BIT(11) | BIT(26))
+
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+ struct {
+ u8 performance:1;
+ u8 energy_efficiency:1;
+ u8 __reserved:6;
+ } split;
+ u8 bits;
+};
+
+union cpuid6_edx {
+ struct {
+ union hfi_capabilities capabilities;
+ u32 table_pages:4;
+ u32 __reserved:4;
+ s32 index:16;
+ } split;
+ u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap: Performance capability
+ * @ee_cap: Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+ u8 perf_cap;
+ u8 ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated: Hardware updated performance capabilities
+ * @ee_updated: Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+ u8 perf_updated;
+ u8 ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table: Base of the local copy of the HFI table
+ * @timestamp: Timestamp of the last update of the local table.
+ * Located at the base of the local table.
+ * @hdr: Base address of the header of the local table
+ * @data: Base address of the data of the local table
+ * @cpus: CPUs represented in this HFI table instance
+ * @hw_table: Pointer to the HFI table of this instance
+ * @update_work: Delayed work to process HFI updates
+ * @table_lock: Lock to protect acceses to the table of this instance
+ * @event_lock: Lock to process HFI interrupts
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+ union {
+ void *local_table;
+ u64 *timestamp;
+ };
+ void *hdr;
+ void *data;
+ cpumask_var_t cpus;
+ void *hw_table;
+ struct delayed_work update_work;
+ raw_spinlock_t table_lock;
+ raw_spinlock_t event_lock;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages: Size of the HFI table in 4KB pages
+ * @cpu_stride: Stride size to locate the capability data of a logical
+ * processor within the table (i.e., row stride)
+ * @hdr_size: Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+ unsigned int nr_table_pages;
+ unsigned int cpu_stride;
+ unsigned int hdr_size;
+};
+
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index: Row of this CPU in its HFI table
+ * @hfi_instance: Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+ s16 index;
+ struct hfi_instance *hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT 16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+ struct thermal_genl_cpu_caps *cpu_caps)
+{
+ int cpu, i = 0;
+
+ raw_spin_lock_irq(&hfi_instance->table_lock);
+ for_each_cpu(cpu, hfi_instance->cpus) {
+ struct hfi_cpu_data *caps;
+ s16 index;
+
+ index = per_cpu(hfi_cpu_info, cpu).index;
+ caps = hfi_instance->data + index * hfi_features.cpu_stride;
+ cpu_caps[i].cpu = cpu;
+
+ /*
+ * Scale performance and energy efficiency to
+ * the [0, 1023] interval that thermal netlink uses.
+ */
+ cpu_caps[i].performance = caps->perf_cap << 2;
+ cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+ ++i;
+ }
+ raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+ struct thermal_genl_cpu_caps *cpu_caps;
+ int i = 0, cpu_count;
+
+ /* CPUs may come online/offline while processing an HFI update. */
+ mutex_lock(&hfi_instance_lock);
+
+ cpu_count = cpumask_weight(hfi_instance->cpus);
+
+ /* No CPUs to report in this hfi_instance. */
+ if (!cpu_count)
+ goto out;
+
+ cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+ if (!cpu_caps)
+ goto out;
+
+ get_hfi_caps(hfi_instance, cpu_caps);
+
+ if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+ goto last_cmd;
+
+ /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+ for (i = 0;
+ (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+ i += HFI_MAX_THERM_NOTIFY_COUNT)
+ thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+ &cpu_caps[i]);
+
+ cpu_count = cpu_count - i;
+
+last_cmd:
+ /* Process the remaining capabilities if any. */
+ if (cpu_count)
+ thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+ kfree(cpu_caps);
+out:
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+ struct hfi_instance *hfi_instance;
+
+ hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+ update_work);
+ if (!hfi_instance)
+ return;
+
+ update_capabilities(hfi_instance);
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+ struct hfi_instance *hfi_instance;
+ int cpu = smp_processor_id();
+ struct hfi_cpu_info *info;
+ u64 new_timestamp;
+
+ if (!pkg_therm_status_msr_val)
+ return;
+
+ info = &per_cpu(hfi_cpu_info, cpu);
+ if (!info)
+ return;
+
+ /*
+ * A CPU is linked to its HFI instance before the thermal vector in the
+ * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+ * when receiving an HFI event.
+ */
+ hfi_instance = info->hfi_instance;
+ if (unlikely(!hfi_instance)) {
+ pr_debug("Received event on CPU %d but instance was null", cpu);
+ return;
+ }
+
+ /*
+ * On most systems, all CPUs in the package receive a package-level
+ * thermal interrupt when there is an HFI update. It is sufficient to
+ * let a single CPU to acknowledge the update and queue work to
+ * process it. The remaining CPUs can resume their work.
+ */
+ if (!raw_spin_trylock(&hfi_instance->event_lock))
+ return;
+
+ /* Skip duplicated updates. */
+ new_timestamp = *(u64 *)hfi_instance->hw_table;
+ if (*hfi_instance->timestamp == new_timestamp) {
+ raw_spin_unlock(&hfi_instance->event_lock);
+ return;
+ }
+
+ raw_spin_lock(&hfi_instance->table_lock);
+
+ /*
+ * Copy the updated table into our local copy. This includes the new
+ * timestamp.
+ */
+ memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+ hfi_features.nr_table_pages << PAGE_SHIFT);
+
+ raw_spin_unlock(&hfi_instance->table_lock);
+ raw_spin_unlock(&hfi_instance->event_lock);
+
+ /*
+ * Let hardware know that we are done reading the HFI table and it is
+ * free to update it again.
+ */
+ pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
+ ~PACKAGE_THERM_STATUS_HFI_UPDATED;
+ wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
+
+ queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+ HFI_UPDATE_INTERVAL);
+}
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+ union cpuid6_edx edx;
+
+ /* Do not re-read @cpu's index if it has already been initialized. */
+ if (info->index > -1)
+ return;
+
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+ info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+ /* The HFI header is below the time-stamp. */
+ hfi_instance->hdr = hfi_instance->local_table +
+ sizeof(*hfi_instance->timestamp);
+
+ /* The HFI data starts below the header. */
+ hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu: CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+ struct hfi_instance *hfi_instance;
+ struct hfi_cpu_info *info;
+ phys_addr_t hw_table_pa;
+ u64 msr_val;
+ u16 die_id;
+
+ /* Nothing to do if hfi_instances are missing. */
+ if (!hfi_instances)
+ return;
+
+ /*
+ * Link @cpu to the HFI instance of its package/die. It does not
+ * matter whether the instance has been initialized.
+ */
+ info = &per_cpu(hfi_cpu_info, cpu);
+ die_id = topology_logical_die_id(cpu);
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance) {
+ if (die_id < 0 || die_id >= max_hfi_instances)
+ return;
+
+ hfi_instance = &hfi_instances[die_id];
+ info->hfi_instance = hfi_instance;
+ }
+
+ init_hfi_cpu_index(info);
+
+ /*
+ * Now check if the HFI instance of the package/die of @cpu has been
+ * initialized (by checking its header). In such case, all we have to
+ * do is to add @cpu to this instance's cpumask.
+ */
+ mutex_lock(&hfi_instance_lock);
+ if (hfi_instance->hdr) {
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+ goto unlock;
+ }
+
+ /*
+ * Hardware is programmed with the physical address of the first page
+ * frame of the table. Hence, the allocated memory must be page-aligned.
+ */
+ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hfi_instance->hw_table)
+ goto unlock;
+
+ hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+
+ /*
+ * Allocate memory to keep a local copy of the table that
+ * hardware generates.
+ */
+ hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (!hfi_instance->local_table)
+ goto free_hw_table;
+
+ /*
+ * Program the address of the feedback table of this die/package. On
+ * some processors, hardware remembers the old address of the HFI table
+ * even after having been reprogrammed and re-enabled. Thus, do not free
+ * the pages allocated for the table or reprogram the hardware with a
+ * new base address. Namely, program the hardware only once.
+ */
+ msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+
+ init_hfi_instance(hfi_instance);
+
+ INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+ raw_spin_lock_init(&hfi_instance->table_lock);
+ raw_spin_lock_init(&hfi_instance->event_lock);
+
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+ /*
+ * Enable the hardware feedback interface and never disable it. See
+ * comment on programming the address of the table.
+ */
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+unlock:
+ mutex_unlock(&hfi_instance_lock);
+ return;
+
+free_hw_table:
+ free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+ goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu: CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+ struct hfi_instance *hfi_instance;
+
+ /*
+ * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+ * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+ * is present.
+ */
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance)
+ return;
+
+ if (!hfi_instance->hdr)
+ return;
+
+ mutex_lock(&hfi_instance_lock);
+ cpumask_clear_cpu(cpu, hfi_instance->cpus);
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static __init int hfi_parse_features(void)
+{
+ unsigned int nr_capabilities;
+ union cpuid6_edx edx;
+
+ if (!boot_cpu_has(X86_FEATURE_HFI))
+ return -ENODEV;
+
+ /*
+ * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+ * supported capabilities and the size of the HFI table.
+ */
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+ if (!edx.split.capabilities.split.performance) {
+ pr_debug("Performance reporting not supported! Not using HFI\n");
+ return -ENODEV;
+ }
+
+ /*
+ * The number of supported capabilities determines the number of
+ * columns in the HFI table. Exclude the reserved bits.
+ */
+ edx.split.capabilities.split.__reserved = 0;
+ nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+ /* The number of 4KB pages required by the table */
+ hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+ /*
+ * The header contains change indications for each supported feature.
+ * The size of the table header is rounded up to be a multiple of 8
+ * bytes.
+ */
+ hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ /*
+ * Data of each logical processor is also rounded up to be a multiple
+ * of 8 bytes.
+ */
+ hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ return 0;
+}
+
+void __init intel_hfi_init(void)
+{
+ struct hfi_instance *hfi_instance;
+ int i, j;
+
+ if (hfi_parse_features())
+ return;
+
+ /* There is one HFI instance per die/package. */
+ max_hfi_instances = topology_max_packages() *
+ topology_max_die_per_package();
+
+ /*
+ * This allocation may fail. CPU hotplug callbacks must check
+ * for a null pointer.
+ */
+ hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+ GFP_KERNEL);
+ if (!hfi_instances)
+ return;
+
+ for (i = 0; i < max_hfi_instances; i++) {
+ hfi_instance = &hfi_instances[i];
+ if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+ goto err_nomem;
+ }
+
+ hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+ if (!hfi_updates_wq)
+ goto err_nomem;
+
+ return;
+
+err_nomem:
+ for (j = 0; j < i; ++j) {
+ hfi_instance = &hfi_instances[j];
+ free_cpumask_var(hfi_instance->cpus);
+ }
+
+ kfree(hfi_instances);
+ hfi_instances = NULL;
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 000000000000..325aa78b745c
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
+#else
+static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 14256421d98c..c841ab37e7c6 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -556,12 +556,9 @@ static void end_power_clamp(void)
* stop faster.
*/
clamping = false;
- if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
- for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
- pr_debug("clamping worker for cpu %d alive, destroy\n",
- i);
- stop_power_clamp_worker(i);
- }
+ for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+ pr_debug("clamping worker for cpu %d alive, destroy\n", i);
+ stop_power_clamp_worker(i);
}
}
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index dab7e8fb1059..8352083b87c7 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -32,6 +32,7 @@
#include <asm/irq.h>
#include <asm/msr.h>
+#include "intel_hfi.h"
#include "thermal_interrupt.h"
/* How long to wait between reporting thermal events */
@@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu)
INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+ /*
+ * The first CPU coming online will enable the HFI. Usually this causes
+ * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+ * the CPU once we enable the thermal vector in the local APIC.
+ */
+ intel_hfi_online(cpu);
+
/* Unmask the thermal vector after the above workqueues are initialized. */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
+ intel_hfi_offline(cpu);
+
cancel_delayed_work_sync(&state->package_throttle.therm_work);
cancel_delayed_work_sync(&state->core_throttle.therm_work);
@@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void)
if (!atomic_read(&therm_throt_en))
return 0;
+ intel_hfi_init();
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
thermal_throttle_online,
thermal_throttle_offline);
@@ -608,6 +620,10 @@ void intel_thermal_interrupt(void)
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
PACKAGE_LEVEL);
+
+ if (this_cpu_has(X86_FEATURE_HFI))
+ intel_hfi_process_event(msr_val &
+ PACKAGE_THERM_STATUS_HFI_UPDATED);
}
}
@@ -717,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
l | (PACKAGE_THERM_INT_LOW_ENABLE
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
+
+ if (cpu_has(c, X86_FEATURE_HFI)) {
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+ }
}
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index eafa7526eb8b..c7f91cbdccc7 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -28,6 +28,8 @@
#define LMH_REG_DCVS_INTR_CLR 0x8
+#define LMH_ENABLE_ALGOS 1
+
struct lmh_hw_data {
void __iomem *base;
struct irq_domain *domain;
@@ -90,6 +92,7 @@ static int lmh_probe(struct platform_device *pdev)
struct device_node *cpu_node;
struct lmh_hw_data *lmh_data;
int temp_low, temp_high, temp_arm, cpu_id, ret;
+ unsigned int enable_alg;
u32 node_id;
lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
@@ -141,32 +144,36 @@ static int lmh_probe(struct platform_device *pdev)
if (!qcom_scm_lmh_dcvsh_available())
return -EINVAL;
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling current subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret) {
- dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
- return ret;
- }
-
- ret = qcom_scm_lmh_profile_change(0x1);
- if (ret) {
- dev_err(dev, "Error %d changing profile\n", ret);
- return ret;
+ enable_alg = (uintptr_t)of_device_get_match_data(dev);
+
+ if (enable_alg) {
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_profile_change(0x1);
+ if (ret) {
+ dev_err(dev, "Error %d changing profile\n", ret);
+ return ret;
+ }
}
/* Set default thermal trips */
@@ -213,7 +220,8 @@ static int lmh_probe(struct platform_device *pdev)
}
static const struct of_device_id lmh_table[] = {
- { .compatible = "qcom,sdm845-lmh", },
+ { .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS},
+ { .compatible = "qcom,sm8150-lmh", },
{}
};
MODULE_DEVICE_TABLE(of, lmh_table);
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 99a8d9f3e03c..154d3cb19c88 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -18,6 +18,7 @@
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/thermal.h>
+#include "../thermal_hwmon.h"
#include "tsens.h"
/**
@@ -1060,6 +1061,10 @@ static int tsens_register(struct tsens_priv *priv)
priv->sensor[i].tzd = tzd;
if (priv->ops->enable)
priv->ops->enable(priv, i);
+
+ if (devm_thermal_add_hwmon_sysfs(tzd))
+ dev_warn(priv->dev,
+ "Failed to add hwmon sysfs attributes\n");
}
/* VER_0 require to set MIN and MAX THRESH
diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c
index 94f1da1dcd69..5affc3d196be 100644
--- a/drivers/thermal/tegra/tegra-bpmp-thermal.c
+++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c
@@ -52,6 +52,8 @@ static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp)
err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
if (err)
return err;
+ if (msg.rx.ret)
+ return -EINVAL;
*out_temp = reply.get_temp.temp;
@@ -63,6 +65,7 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
struct tegra_bpmp_thermal_zone *zone = data;
struct mrq_thermal_host_to_bpmp_request req;
struct tegra_bpmp_message msg;
+ int err;
memset(&req, 0, sizeof(req));
req.type = CMD_THERMAL_SET_TRIP;
@@ -76,7 +79,13 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
msg.tx.data = &req;
msg.tx.size = sizeof(req);
- return tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+ err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+ if (err)
+ return err;
+ if (msg.rx.ret)
+ return -EINVAL;
+
+ return 0;
}
static void tz_device_update_work_fn(struct work_struct *work)
@@ -140,6 +149,8 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp,
err = tegra_bpmp_transfer(bpmp, &msg);
if (err)
return err;
+ if (msg.rx.ret)
+ return -EINVAL;
*num_zones = reply.get_num_zones.num;
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index a16dd4d5d710..32fea5174cc0 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] =
[THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 },
[THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING,
.len = THERMAL_NAME_LENGTH },
+ /* CPU capabilities */
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 },
};
struct param {
@@ -58,6 +63,8 @@ struct param {
int temp;
int cdev_state;
int cdev_max_state;
+ struct thermal_genl_cpu_caps *cpu_capabilities;
+ int cpu_capabilities_count;
};
typedef int (*cb_t)(struct param *);
@@ -190,6 +197,42 @@ static int thermal_genl_event_gov_change(struct param *p)
return 0;
}
+static int thermal_genl_event_cpu_capability_change(struct param *p)
+{
+ struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities;
+ struct sk_buff *msg = p->msg;
+ struct nlattr *start_cap;
+ int i;
+
+ start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY);
+ if (!start_cap)
+ return -EMSGSIZE;
+
+ for (i = 0; i < p->cpu_capabilities_count; ++i) {
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+ cpu_cap->cpu))
+ goto out_cancel_nest;
+
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+ cpu_cap->performance))
+ goto out_cancel_nest;
+
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+ cpu_cap->efficiency))
+ goto out_cancel_nest;
+
+ ++cpu_cap;
+ }
+
+ nla_nest_end(msg, start_cap);
+
+ return 0;
+out_cancel_nest:
+ nla_nest_cancel(msg, start_cap);
+
+ return -EMSGSIZE;
+}
+
int thermal_genl_event_tz_delete(struct param *p)
__attribute__((alias("thermal_genl_event_tz")));
@@ -219,6 +262,7 @@ static cb_t event_cb[] = {
[THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete,
[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update,
[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change,
+ [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change,
};
/*
@@ -356,6 +400,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name)
return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p);
}
+int thermal_genl_cpu_capability_event(int count,
+ struct thermal_genl_cpu_caps *caps)
+{
+ struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps };
+
+ return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p);
+}
+EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event);
+
/*************************** Command encoding ********************************/
static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz,
@@ -419,11 +472,12 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p)
for (i = 0; i < tz->trips; i++) {
enum thermal_trip_type type;
- int temp, hyst;
+ int temp, hyst = 0;
tz->ops->get_trip_type(tz, i, &type);
tz->ops->get_trip_temp(tz, i, &temp);
- tz->ops->get_trip_hyst(tz, i, &hyst);
+ if (tz->ops->get_trip_hyst)
+ tz->ops->get_trip_hyst(tz, i, &hyst);
if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, i) ||
nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, type) ||
diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index e554f76291f4..1052f523188d 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -4,6 +4,12 @@
* Author: Daniel Lezcano <daniel.lezcano@linaro.org>
*/
+struct thermal_genl_cpu_caps {
+ int cpu;
+ int performance;
+ int efficiency;
+};
+
/* Netlink notification function */
#ifdef CONFIG_THERMAL_NETLINK
int __init thermal_netlink_init(void);
@@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state);
int thermal_notify_cdev_delete(int cdev_id);
int thermal_notify_tz_gov_change(int tz_id, const char *name);
int thermal_genl_sampling_temp(int id, int temp);
+int thermal_genl_cpu_capability_event(int count,
+ struct thermal_genl_cpu_caps *caps);
#else
static inline int thermal_netlink_init(void)
{
@@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
{
return 0;
}
+
+static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps)
+{
+ return 0;
+}
+
#endif /* CONFIG_THERMAL_NETLINK */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index f84375865c97..703039d8b937 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -21,6 +21,7 @@
#include "ti-thermal.h"
#include "ti-bandgap.h"
+#include "../thermal_hwmon.h"
/* common data structures */
struct ti_thermal_data {
@@ -106,14 +107,6 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
return ret;
}
-static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
- int *temp)
-{
- struct ti_thermal_data *data = thermal->devdata;
-
- return __ti_thermal_get_temp(data, temp);
-}
-
static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
{
struct ti_thermal_data *data = p;
@@ -189,6 +182,9 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
ti_bandgap_set_sensor_data(bgp, id, data);
ti_bandgap_write_update_interval(bgp, data->sensor_id, interval);
+ if (devm_thermal_add_hwmon_sysfs(data->ti_thermal))
+ dev_warn(bgp->dev, "failed to add hwmon sysfs attributes\n");
+
return 0;
}
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index ba27b274c967..fa92f727fdf8 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -322,6 +322,7 @@ static int addr_cnt;
#define GSM1_ESCAPE_BITS 0x20
#define XON 0x11
#define XOFF 0x13
+#define ISO_IEC_646_MASK 0x7F
static const struct tty_port_operations gsm_port_ops;
@@ -438,7 +439,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci)
modembits |= MDM_RTR;
if (dlci->modem_tx & TIOCM_RI)
modembits |= MDM_IC;
- if (dlci->modem_tx & TIOCM_CD)
+ if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator)
modembits |= MDM_DV;
return modembits;
}
@@ -447,7 +448,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci)
* gsm_print_packet - display a frame for debug
* @hdr: header to print before decode
* @addr: address EA from the frame
- * @cr: C/R bit from the frame
+ * @cr: C/R bit seen as initiator
* @control: control including PF bit
* @data: following data bytes
* @dlen: length of data
@@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
int olen = 0;
while (len--) {
if (*input == GSM1_SOF || *input == GSM1_ESCAPE
- || *input == XON || *input == XOFF) {
+ || (*input & ISO_IEC_646_MASK) == XON
+ || (*input & ISO_IEC_646_MASK) == XOFF) {
*output++ = GSM1_ESCAPE;
*output++ = *input++ ^ GSM1_ESCAPE_BITS;
olen++;
@@ -546,7 +548,7 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
* gsm_send - send a control frame
* @gsm: our GSM mux
* @addr: address for control frame
- * @cr: command/response bit
+ * @cr: command/response bit seen as initiator
* @control: control byte including PF bit
*
* Format up and transmit a control frame. These do not go via the
@@ -561,11 +563,15 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
int len;
u8 cbuf[10];
u8 ibuf[3];
+ int ocr;
+
+ /* toggle C/R coding if not initiator */
+ ocr = cr ^ (gsm->initiator ? 0 : 1);
switch (gsm->encoding) {
case 0:
cbuf[0] = GSM0_SOF;
- cbuf[1] = (addr << 2) | (cr << 1) | EA;
+ cbuf[1] = (addr << 2) | (ocr << 1) | EA;
cbuf[2] = control;
cbuf[3] = EA; /* Length of data = 0 */
cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3);
@@ -575,7 +581,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
case 1:
case 2:
/* Control frame + packing (but not frame stuffing) in mode 1 */
- ibuf[0] = (addr << 2) | (cr << 1) | EA;
+ ibuf[0] = (addr << 2) | (ocr << 1) | EA;
ibuf[1] = control;
ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2);
/* Stuffing may double the size worst case */
@@ -609,7 +615,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
static inline void gsm_response(struct gsm_mux *gsm, int addr, int control)
{
- gsm_send(gsm, addr, 1, control);
+ gsm_send(gsm, addr, 0, control);
}
/**
@@ -1015,25 +1021,25 @@ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data,
* @tty: virtual tty bound to the DLCI
* @dlci: DLCI to affect
* @modem: modem bits (full EA)
- * @clen: command length
+ * @slen: number of signal octets
*
* Used when a modem control message or line state inline in adaption
* layer 2 is processed. Sort out the local modem state and throttles
*/
static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
- u32 modem, int clen)
+ u32 modem, int slen)
{
int mlines = 0;
u8 brk = 0;
int fc;
- /* The modem status command can either contain one octet (v.24 signals)
- or two octets (v.24 signals + break signals). The length field will
- either be 2 or 3 respectively. This is specified in section
- 5.4.6.3.7 of the 27.010 mux spec. */
+ /* The modem status command can either contain one octet (V.24 signals)
+ * or two octets (V.24 signals + break signals). This is specified in
+ * section 5.4.6.3.7 of the 07.10 mux spec.
+ */
- if (clen == 2)
+ if (slen == 1)
modem = modem & 0x7f;
else {
brk = modem & 0x7f;
@@ -1090,6 +1096,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
unsigned int brk = 0;
struct gsm_dlci *dlci;
int len = clen;
+ int slen;
const u8 *dp = data;
struct tty_struct *tty;
@@ -1109,6 +1116,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
return;
dlci = gsm->dlci[addr];
+ slen = len;
while (gsm_read_ea(&modem, *dp++) == 0) {
len--;
if (len == 0)
@@ -1125,7 +1133,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
modem |= (brk & 0x7f);
}
tty = tty_port_tty_get(&dlci->port);
- gsm_process_modem(tty, dlci, modem, clen);
+ gsm_process_modem(tty, dlci, modem, slen);
if (tty) {
tty_wakeup(tty);
tty_kref_put(tty);
@@ -1449,6 +1457,9 @@ static void gsm_dlci_close(struct gsm_dlci *dlci)
if (dlci->addr != 0) {
tty_port_tty_hangup(&dlci->port, false);
kfifo_reset(&dlci->fifo);
+ /* Ensure that gsmtty_open() can return. */
+ tty_port_set_initialized(&dlci->port, 0);
+ wake_up_interruptible(&dlci->port.open_wait);
} else
dlci->gsm->dead = true;
/* Unregister gsmtty driver,report gsmtty dev remove uevent for user */
@@ -1512,7 +1523,7 @@ static void gsm_dlci_t1(struct timer_list *t)
dlci->mode = DLCI_MODE_ADM;
gsm_dlci_open(dlci);
} else {
- gsm_dlci_close(dlci);
+ gsm_dlci_begin_close(dlci); /* prevent half open link */
}
break;
@@ -1591,6 +1602,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
struct tty_struct *tty;
unsigned int modem = 0;
int len = clen;
+ int slen = 0;
if (debug & 16)
pr_debug("%d bytes for tty\n", len);
@@ -1603,12 +1615,14 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
case 2: /* Asynchronous serial with line state in each frame */
while (gsm_read_ea(&modem, *data++) == 0) {
len--;
+ slen++;
if (len == 0)
return;
}
+ slen++;
tty = tty_port_tty_get(port);
if (tty) {
- gsm_process_modem(tty, dlci, modem, clen);
+ gsm_process_modem(tty, dlci, modem, slen);
tty_kref_put(tty);
}
fallthrough;
@@ -1746,7 +1760,12 @@ static void gsm_dlci_release(struct gsm_dlci *dlci)
gsm_destroy_network(dlci);
mutex_unlock(&dlci->mutex);
- tty_hangup(tty);
+ /* We cannot use tty_hangup() because in tty_kref_put() the tty
+ * driver assumes that the hangup queue is free and reuses it to
+ * queue release_one_tty() -> NULL pointer panic in
+ * process_one_work().
+ */
+ tty_vhangup(tty);
tty_port_tty_set(&dlci->port, NULL);
tty_kref_put(tty);
@@ -1798,10 +1817,10 @@ static void gsm_queue(struct gsm_mux *gsm)
goto invalid;
cr = gsm->address & 1; /* C/R bit */
+ cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */
gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len);
- cr ^= 1 - gsm->initiator; /* Flip so 1 always means command */
dlci = gsm->dlci[address];
switch (gsm->control) {
@@ -3232,9 +3251,9 @@ static void gsmtty_throttle(struct tty_struct *tty)
if (dlci->state == DLCI_CLOSED)
return;
if (C_CRTSCTS(tty))
- dlci->modem_tx &= ~TIOCM_DTR;
+ dlci->modem_tx &= ~TIOCM_RTS;
dlci->throttled = true;
- /* Send an MSC with DTR cleared */
+ /* Send an MSC with RTS cleared */
gsmtty_modem_update(dlci, 0);
}
@@ -3244,9 +3263,9 @@ static void gsmtty_unthrottle(struct tty_struct *tty)
if (dlci->state == DLCI_CLOSED)
return;
if (C_CRTSCTS(tty))
- dlci->modem_tx |= TIOCM_DTR;
+ dlci->modem_tx |= TIOCM_RTS;
dlci->throttled = false;
- /* Send an MSC with DTR set */
+ /* Send an MSC with RTS set */
gsmtty_modem_update(dlci, 0);
}
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 8933ef1f83c0..efc72104c840 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1329,7 +1329,7 @@ handle_newline:
put_tty_queue(c, ldata);
smp_store_release(&ldata->canon_head, ldata->read_head);
kill_fasync(&tty->fasync, SIGIO, POLL_IN);
- wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
+ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
return;
}
}
@@ -1561,7 +1561,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp,
if (read_cnt(ldata)) {
kill_fasync(&tty->fasync, SIGIO, POLL_IN);
- wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
+ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
}
}
@@ -1926,7 +1926,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
return false;
canon_head = smp_load_acquire(&ldata->canon_head);
- n = min(*nr + 1, canon_head - ldata->read_tail);
+ n = min(*nr, canon_head - ldata->read_tail);
tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1);
size = min_t(size_t, tail + n, N_TTY_BUF_SIZE);
@@ -1948,10 +1948,8 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
n += N_TTY_BUF_SIZE;
c = n + found;
- if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) {
- c = min(*nr, c);
+ if (!found || read_buf(ldata, eol) != __DISABLED_CHAR)
n = c;
- }
n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n",
__func__, eol, found, n, c, tail, more);
diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c
index dae2a4e44f38..29db413bbc03 100644
--- a/drivers/tty/rpmsg_tty.c
+++ b/drivers/tty/rpmsg_tty.c
@@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p
static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty)
{
struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index);
+ struct tty_port *port;
tty->driver_data = cport;
- return tty_port_install(&cport->port, driver, tty);
+ port = tty_port_get(&cport->port);
+ return tty_port_install(port, driver, tty);
+}
+
+static void rpmsg_tty_cleanup(struct tty_struct *tty)
+{
+ tty_port_put(tty->port);
}
static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp)
@@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty)
return size;
}
+static void rpmsg_tty_hangup(struct tty_struct *tty)
+{
+ tty_port_hangup(tty->port);
+}
+
static const struct tty_operations rpmsg_tty_ops = {
.install = rpmsg_tty_install,
.open = rpmsg_tty_open,
.close = rpmsg_tty_close,
.write = rpmsg_tty_write,
.write_room = rpmsg_tty_write_room,
+ .hangup = rpmsg_tty_hangup,
+ .cleanup = rpmsg_tty_cleanup,
};
static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
@@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void)
return cport;
}
-static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
+static void rpmsg_tty_destruct_port(struct tty_port *port)
{
+ struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port);
+
mutex_lock(&idr_lock);
idr_remove(&tty_idr, cport->id);
mutex_unlock(&idr_lock);
@@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport)
kfree(cport);
}
-static const struct tty_port_operations rpmsg_tty_port_ops = { };
+static const struct tty_port_operations rpmsg_tty_port_ops = {
+ .destruct = rpmsg_tty_destruct_port,
+};
+
static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
{
@@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
cport->id, dev);
if (IS_ERR(tty_dev)) {
ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n");
- goto err_destroy;
+ tty_port_put(&cport->port);
+ return ret;
}
cport->rpdev = rpdev;
@@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev)
rpdev->src, rpdev->dst, cport->id);
return 0;
-
-err_destroy:
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
-
- return ret;
}
static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
@@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev)
dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id);
/* User hang up to release the tty */
- if (tty_port_initialized(&cport->port))
- tty_port_tty_hangup(&cport->port, false);
+ tty_port_tty_hangup(&cport->port, false);
tty_unregister_device(rpmsg_tty_driver, cport->id);
- tty_port_destroy(&cport->port);
- rpmsg_tty_release_cport(cport);
+ tty_port_put(&cport->port);
}
static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = {
diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c
index 673cda3d011d..948d0a1c6ae8 100644
--- a/drivers/tty/serial/8250/8250_gsc.c
+++ b/drivers/tty/serial/8250/8250_gsc.c
@@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev)
unsigned long address;
int err;
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC)
if (!dev->irq && (dev->id.sversion == 0xad))
dev->irq = iosapic_serial_irq(dev);
#endif
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index bce28729dd7b..be8626234627 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
port->mapsize = resource_size(&resource);
/* Check for shifted address mapping */
- if (of_property_read_u32(np, "reg-offset", &prop) == 0)
+ if (of_property_read_u32(np, "reg-offset", &prop) == 0) {
+ if (prop >= port->mapsize) {
+ dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n",
+ prop, &port->mapsize);
+ ret = -EINVAL;
+ goto err_unprepare;
+ }
+
port->mapbase += prop;
+ port->mapsize -= prop;
+ }
port->iotype = UPIO_MEM;
if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index e8b5469e9dfa..e17e97ea86fa 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -4779,8 +4779,30 @@ static const struct pci_device_id serial_pci_tbl[] = {
{ PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */
pbn_b2_4_115200 },
+ /* Brainboxes Devices */
/*
- * BrainBoxes UC-260
+ * Brainboxes UC-101
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0BA1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-235/246
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0AA1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_1_115200 },
+ /*
+ * Brainboxes UC-257
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0861,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-260/271/701/756
*/
{ PCI_VENDOR_ID_INTASHIELD, 0x0D21,
PCI_ANY_ID, PCI_ANY_ID,
@@ -4788,7 +4810,81 @@ static const struct pci_device_id serial_pci_tbl[] = {
pbn_b2_4_115200 },
{ PCI_VENDOR_ID_INTASHIELD, 0x0E34,
PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+ PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-268
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0841,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-275/279
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0881,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_8_115200 },
+ /*
+ * Brainboxes UC-302
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08E1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-310
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08C1,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-313
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x08A3,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-320/324
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A61,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_1_115200 },
+ /*
+ * Brainboxes UC-346
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0B02,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-357
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A81,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ { PCI_VENDOR_ID_INTASHIELD, 0x0A83,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_2_115200 },
+ /*
+ * Brainboxes UC-368
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0C41,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_b2_4_115200 },
+ /*
+ * Brainboxes UC-420/431
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0921,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
pbn_b2_4_115200 },
/*
* Perle PCI-RAS cards
diff --git a/drivers/tty/serial/8250/8250_pericom.c b/drivers/tty/serial/8250/8250_pericom.c
index 025b055363c3..95ff10f25d58 100644
--- a/drivers/tty/serial/8250/8250_pericom.c
+++ b/drivers/tty/serial/8250/8250_pericom.c
@@ -117,7 +117,7 @@ static int pericom8250_probe(struct pci_dev *pdev, const struct pci_device_id *i
uart.port.private_data = pericom;
uart.port.iotype = UPIO_PORT;
uart.port.uartclk = 921600 * 16;
- uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ | UPF_MAGIC_MULTIPLIER;
+ uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ;
uart.port.set_divisor = pericom_do_set_divisor;
for (i = 0; i < nr && i < maxnr; i++) {
unsigned int offset = (i == 3 && nr == 4) ? 0x38 : i * 0x8;
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 2abb3de11a48..3b12bfc1ed67 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2056,7 +2056,10 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
serial8250_rpm_put(up);
}
-static void wait_for_lsr(struct uart_8250_port *up, int bits)
+/*
+ * Wait for transmitter & holding register to empty
+ */
+static void wait_for_xmitr(struct uart_8250_port *up, int bits)
{
unsigned int status, tmout = 10000;
@@ -2073,16 +2076,6 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits)
udelay(1);
touch_nmi_watchdog();
}
-}
-
-/*
- * Wait for transmitter & holding register to empty
- */
-static void wait_for_xmitr(struct uart_8250_port *up, int bits)
-{
- unsigned int tmout;
-
- wait_for_lsr(up, bits);
/* Wait up to 1s for flow control if necessary */
if (up->port.flags & UPF_CONS_FLOW) {
@@ -3333,35 +3326,6 @@ static void serial8250_console_restore(struct uart_8250_port *up)
}
/*
- * Print a string to the serial port using the device FIFO
- *
- * It sends fifosize bytes and then waits for the fifo
- * to get empty.
- */
-static void serial8250_console_fifo_write(struct uart_8250_port *up,
- const char *s, unsigned int count)
-{
- int i;
- const char *end = s + count;
- unsigned int fifosize = up->port.fifosize;
- bool cr_sent = false;
-
- while (s != end) {
- wait_for_lsr(up, UART_LSR_THRE);
-
- for (i = 0; i < fifosize && s != end; ++i) {
- if (*s == '\n' && !cr_sent) {
- serial_out(up, UART_TX, '\r');
- cr_sent = true;
- } else {
- serial_out(up, UART_TX, *s++);
- cr_sent = false;
- }
- }
- }
-}
-
-/*
* Print a string to the serial port trying not to disturb
* any possible real use of the port...
*
@@ -3376,7 +3340,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
struct uart_8250_em485 *em485 = up->em485;
struct uart_port *port = &up->port;
unsigned long flags;
- unsigned int ier, use_fifo;
+ unsigned int ier;
int locked = 1;
touch_nmi_watchdog();
@@ -3408,20 +3372,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
mdelay(port->rs485.delay_rts_before_send);
}
- use_fifo = (up->capabilities & UART_CAP_FIFO) &&
- port->fifosize > 1 &&
- (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO) &&
- /*
- * After we put a data in the fifo, the controller will send
- * it regardless of the CTS state. Therefore, only use fifo
- * if we don't use control flow.
- */
- !(up->port.flags & UPF_CONS_FLOW);
-
- if (likely(use_fifo))
- serial8250_console_fifo_write(up, s, count);
- else
- uart_console_write(port, s, count, serial8250_console_putchar);
+ uart_console_write(port, s, count, serial8250_console_putchar);
/*
* Finally, wait for transmitter to become empty
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 1f1df46242f9..ba053a68529f 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1582,9 +1582,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
container_of(port, struct uart_amba_port, port);
unsigned int cr;
- if (port->rs485.flags & SER_RS485_ENABLED)
- mctrl &= ~TIOCM_RTS;
-
cr = pl011_read(uap, REG_CR);
#define TIOCMBIT(tiocmbit, uartbit) \
@@ -1808,14 +1805,8 @@ static int pl011_startup(struct uart_port *port)
cr &= UART011_CR_RTS | UART011_CR_DTR;
cr |= UART01x_CR_UARTEN | UART011_CR_RXE;
- if (port->rs485.flags & SER_RS485_ENABLED) {
- if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
- cr &= ~UART011_CR_RTS;
- else
- cr |= UART011_CR_RTS;
- } else {
+ if (!(port->rs485.flags & SER_RS485_ENABLED))
cr |= UART011_CR_TXE;
- }
pl011_write(cr, uap, REG_CR);
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index 3c92d4e01488..516cff362434 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -805,7 +805,7 @@ static int max3100_probe(struct spi_device *spi)
return 0;
}
-static int max3100_remove(struct spi_device *spi)
+static void max3100_remove(struct spi_device *spi)
{
struct max3100_port *s = spi_get_drvdata(spi);
int i;
@@ -828,13 +828,12 @@ static int max3100_remove(struct spi_device *spi)
for (i = 0; i < MAX_MAX3100; i++)
if (max3100s[i]) {
mutex_unlock(&max3100s_lock);
- return 0;
+ return;
}
pr_debug("removing max3100 driver\n");
uart_unregister_driver(&max3100_uart_driver);
mutex_unlock(&max3100s_lock);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index dde0824b2fa5..3112b4a05448 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1487,10 +1487,9 @@ static int max310x_spi_probe(struct spi_device *spi)
return max310x_probe(&spi->dev, devtype, regmap, spi->irq);
}
-static int max310x_spi_remove(struct spi_device *spi)
+static void max310x_spi_remove(struct spi_device *spi)
{
max310x_remove(&spi->dev);
- return 0;
}
static const struct spi_device_id max310x_id_table[] = {
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 64e7e6c8145f..3a6c68e19c80 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -734,12 +734,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
static void sc16is7xx_tx_proc(struct kthread_work *ws)
{
struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port);
+ struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
if ((port->rs485.flags & SER_RS485_ENABLED) &&
(port->rs485.delay_rts_before_send > 0))
msleep(port->rs485.delay_rts_before_send);
+ mutex_lock(&s->efr_lock);
sc16is7xx_handle_tx(port);
+ mutex_unlock(&s->efr_lock);
}
static void sc16is7xx_reconf_rs485(struct uart_port *port)
@@ -1440,11 +1443,9 @@ static int sc16is7xx_spi_probe(struct spi_device *spi)
return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq);
}
-static int sc16is7xx_spi_remove(struct spi_device *spi)
+static void sc16is7xx_spi_remove(struct spi_device *spi)
{
sc16is7xx_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id sc16is7xx_spi_id_table[] = {
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index dc40c4155356..0db90be4c3bc 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
unsigned long flags;
unsigned int old;
+ if (port->rs485.flags & SER_RS485_ENABLED) {
+ set &= ~TIOCM_RTS;
+ clear &= ~TIOCM_RTS;
+ }
+
spin_lock_irqsave(&port->lock, flags);
old = port->mctrl;
port->mctrl = (old & ~clear) | set;
@@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
static void uart_port_dtr_rts(struct uart_port *uport, int raise)
{
- int rs485_on = uport->rs485_config &&
- (uport->rs485.flags & SER_RS485_ENABLED);
- int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND);
-
- if (raise) {
- if (rs485_on && RTS_after_send) {
- uart_set_mctrl(uport, TIOCM_DTR);
- uart_clear_mctrl(uport, TIOCM_RTS);
- } else {
- uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
- }
- } else {
- unsigned int clear = TIOCM_DTR;
-
- clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0;
- uart_clear_mctrl(uport, clear);
- }
+ if (raise)
+ uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
+ else
+ uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
}
/*
@@ -1075,11 +1067,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
goto out;
if (!tty_io_error(tty)) {
- if (uport->rs485.flags & SER_RS485_ENABLED) {
- set &= ~TIOCM_RTS;
- clear &= ~TIOCM_RTS;
- }
-
uart_update_mctrl(uport, set, clear);
ret = 0;
}
@@ -2390,6 +2377,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
*/
spin_lock_irqsave(&port->lock, flags);
port->mctrl &= TIOCM_DTR;
+ if (port->rs485.flags & SER_RS485_ENABLED &&
+ !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND))
+ port->mctrl |= TIOCM_RTS;
port->ops->set_mctrl(port, port->mctrl);
spin_unlock_irqrestore(&port->lock, flags);
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 1f89ab0e49ac..9570002d07e7 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -550,11 +550,23 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
struct stm32_port *stm32_port = to_stm32_port(port);
const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
struct circ_buf *xmit = &port->state->xmit;
+ u32 isr;
+ int ret;
if (port->x_char) {
if (stm32_usart_tx_dma_started(stm32_port) &&
stm32_usart_tx_dma_enabled(stm32_port))
stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+ /* Check that TDR is empty before filling FIFO */
+ ret =
+ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+ isr,
+ (isr & USART_SR_TXE),
+ 10, 1000);
+ if (ret)
+ dev_warn(port->dev, "1 character may be erased\n");
+
writel_relaxed(port->x_char, port->membase + ofs->tdr);
port->x_char = 0;
port->icount.tx++;
@@ -730,7 +742,7 @@ static void stm32_usart_start_tx(struct uart_port *port)
struct serial_rs485 *rs485conf = &port->rs485;
struct circ_buf *xmit = &port->state->xmit;
- if (uart_circ_empty(xmit))
+ if (uart_circ_empty(xmit) && !port->x_char)
return;
if (rs485conf->flags & SER_RS485_ENABLED) {
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 7e8b3bd59c7b..8fec1d8648f5 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3088,7 +3088,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
{
struct tty_struct *tty;
- tty = kzalloc(sizeof(*tty), GFP_KERNEL);
+ tty = kzalloc(sizeof(*tty), GFP_KERNEL_ACCOUNT);
if (!tty)
return NULL;
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 3639bb6dc372..58013698635f 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -599,8 +599,8 @@ static int vt_setactivate(struct vt_setactivate __user *sa)
if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
return -ENXIO;
- vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES + 1);
vsa.console--;
+ vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES);
console_lock();
ret = vc_allocate(vsa.console);
if (ret) {
@@ -845,6 +845,7 @@ int vt_ioctl(struct tty_struct *tty,
return -ENXIO;
arg--;
+ arg = array_index_nospec(arg, MAX_NR_CONSOLES);
console_lock();
ret = vc_allocate(arg);
console_unlock();
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
index 55c73b1d8704..d00ff98dffab 100644
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns)
/* Indicate the cdns3 core was power lost before */
bool cdns_power_is_lost(struct cdns *cdns)
{
- if (cdns->version == CDNS3_CONTROLLER_V1) {
- if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
+ if (cdns->version == CDNS3_CONTROLLER_V0) {
+ if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
return true;
} else {
- if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
+ if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
return true;
}
return false;
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 73f419adce61..4bb6d304eb4b 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1919,6 +1919,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
struct usbtmc_ctrlrequest request;
u8 *buffer = NULL;
int rv;
+ unsigned int is_in, pipe;
unsigned long res;
res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
@@ -1928,12 +1929,14 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
if (request.req.wLength > USBTMC_BUFSIZE)
return -EMSGSIZE;
+ is_in = request.req.bRequestType & USB_DIR_IN;
+
if (request.req.wLength) {
buffer = kmalloc(request.req.wLength, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
- if ((request.req.bRequestType & USB_DIR_IN) == 0) {
+ if (!is_in) {
/* Send control data to device */
res = copy_from_user(buffer, request.data,
request.req.wLength);
@@ -1944,8 +1947,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
}
}
+ if (is_in)
+ pipe = usb_rcvctrlpipe(data->usb_dev, 0);
+ else
+ pipe = usb_sndctrlpipe(data->usb_dev, 0);
rv = usb_control_msg(data->usb_dev,
- usb_rcvctrlpipe(data->usb_dev, 0),
+ pipe,
request.req.bRequest,
request.req.bRequestType,
request.req.wValue,
@@ -1957,7 +1964,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
goto exit;
}
- if (rv && (request.req.bRequestType & USB_DIR_IN)) {
+ if (rv && is_in) {
/* Read control data from device */
res = copy_to_user(request.data, buffer, rv);
if (res)
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 4169cf40a03b..5509d3847af4 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver)
struct ulpi *ulpi = to_ulpi_dev(dev);
const struct ulpi_device_id *id;
- /* Some ULPI devices don't have a vendor id so rely on OF match */
- if (ulpi->id.vendor == 0)
+ /*
+ * Some ULPI devices don't have a vendor id
+ * or provide an id_table so rely on OF match.
+ */
+ if (ulpi->id.vendor == 0 || !drv->id_table)
return of_driver_match_device(dev, driver);
for (id = drv->id_table; id->vendor; id++)
@@ -127,6 +130,7 @@ static const struct attribute_group *ulpi_dev_attr_groups[] = {
static void ulpi_dev_release(struct device *dev)
{
+ of_node_put(dev->of_node);
kfree(to_ulpi_dev(dev));
}
@@ -244,12 +248,16 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi)
return ret;
ret = ulpi_read_id(ulpi);
- if (ret)
+ if (ret) {
+ of_node_put(ulpi->dev.of_node);
return ret;
+ }
ret = device_register(&ulpi->dev);
- if (ret)
+ if (ret) {
+ put_device(&ulpi->dev);
return ret;
+ }
dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n",
ulpi->id.vendor, ulpi->id.product);
@@ -296,7 +304,6 @@ EXPORT_SYMBOL_GPL(ulpi_register_interface);
*/
void ulpi_unregister_interface(struct ulpi *ulpi)
{
- of_node_put(ulpi->dev.of_node);
device_unregister(&ulpi->dev);
}
EXPORT_SYMBOL_GPL(ulpi_unregister_interface);
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index d630cccd2e6e..dd44e37a454a 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -446,7 +446,7 @@ static int suspend_common(struct device *dev, bool do_wakeup)
HCD_WAKEUP_PENDING(hcd->shared_hcd))
return -EBUSY;
retval = hcd->driver->pci_suspend(hcd, do_wakeup);
- suspend_report_result(hcd->driver->pci_suspend, retval);
+ suspend_report_result(dev, hcd->driver->pci_suspend, retval);
/* Check again in case wakeup raced with pci_suspend */
if ((retval == 0 && do_wakeup && HCD_WAKEUP_PENDING(hcd)) ||
@@ -556,7 +556,7 @@ static int hcd_pci_suspend_noirq(struct device *dev)
dev_dbg(dev, "--> PCI %s\n",
pci_power_name(pci_dev->current_state));
} else {
- suspend_report_result(pci_prepare_to_sleep, retval);
+ suspend_report_result(dev, pci_prepare_to_sleep, retval);
return retval;
}
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 3e01dd6e509b..d9712c2602af 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1563,6 +1563,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
urb->hcpriv = NULL;
INIT_LIST_HEAD(&urb->urb_list);
atomic_dec(&urb->use_count);
+ /*
+ * Order the write of urb->use_count above before the read
+ * of urb->reject below. Pairs with the memory barriers in
+ * usb_kill_urb() and usb_poison_urb().
+ */
+ smp_mb__after_atomic();
+
atomic_dec(&urb->dev->urbnum);
if (atomic_read(&urb->reject))
wake_up(&usb_kill_urb_queue);
@@ -1665,6 +1672,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
usb_anchor_resume_wakeups(anchor);
atomic_dec(&urb->use_count);
+ /*
+ * Order the write of urb->use_count above before the read
+ * of urb->reject below. Pairs with the memory barriers in
+ * usb_kill_urb() and usb_poison_urb().
+ */
+ smp_mb__after_atomic();
+
if (unlikely(atomic_read(&urb->reject)))
wake_up(&usb_kill_urb_queue);
usb_put_urb(urb);
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index c2bbf97a79be..d5bc36ca5b1f 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -602,11 +602,14 @@ int usb_hub_create_port_device(struct usb_hub *hub, int port1)
return retval;
}
- find_and_link_peer(hub, port1);
-
retval = component_add(&port_dev->dev, &connector_ops);
- if (retval)
+ if (retval) {
dev_warn(&port_dev->dev, "failed to add component\n");
+ device_unregister(&port_dev->dev);
+ return retval;
+ }
+
+ find_and_link_peer(hub, port1);
/*
* Enable runtime pm and hold a refernce that hub_configure()
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 30727729a44c..33d62d7e3929 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb)
if (!(urb && urb->dev && urb->ep))
return;
atomic_inc(&urb->reject);
+ /*
+ * Order the write of urb->reject above before the read
+ * of urb->use_count below. Pairs with the barriers in
+ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
+ */
+ smp_mb__after_atomic();
usb_hcd_unlink_urb(urb, -ENOENT);
wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
@@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb)
if (!urb)
return;
atomic_inc(&urb->reject);
+ /*
+ * Order the write of urb->reject above before the read
+ * of urb->use_count below. Pairs with the barriers in
+ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
+ */
+ smp_mb__after_atomic();
if (!urb->dev || !urb->ep)
return;
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 8a63da3ab39d..88c337bf564f 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -1418,6 +1418,7 @@ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg);
void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2);
int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode);
#define dwc2_is_device_connected(hsotg) (hsotg->connected)
+#define dwc2_is_device_enabled(hsotg) (hsotg->enabled)
int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg);
int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup);
int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg);
@@ -1454,6 +1455,7 @@ static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg,
int testmode)
{ return 0; }
#define dwc2_is_device_connected(hsotg) (0)
+#define dwc2_is_device_enabled(hsotg) (0)
static inline int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg)
{ return 0; }
static inline int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg,
diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c
index 1b39c4776369..d8d6493bc457 100644
--- a/drivers/usb/dwc2/drd.c
+++ b/drivers/usb/dwc2/drd.c
@@ -130,8 +130,10 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role)
already = dwc2_ovr_avalid(hsotg, true);
} else if (role == USB_ROLE_DEVICE) {
already = dwc2_ovr_bvalid(hsotg, true);
- /* This clear DCTL.SFTDISCON bit */
- dwc2_hsotg_core_connect(hsotg);
+ if (dwc2_is_device_enabled(hsotg)) {
+ /* This clear DCTL.SFTDISCON bit */
+ dwc2_hsotg_core_connect(hsotg);
+ }
} else {
if (dwc2_is_device_mode(hsotg)) {
if (!dwc2_ovr_bvalid(hsotg, false))
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 2bc03f41c70a..eee3504397e6 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -5097,7 +5097,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
hsotg->gadget.speed = USB_SPEED_UNKNOWN;
spin_unlock_irqrestore(&hsotg->lock, flags);
- for (ep = 0; ep < hsotg->num_of_eps; ep++) {
+ for (ep = 1; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
if (hsotg->eps_out[ep])
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 7ff8fc8f79a9..06d0e88ec8af 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -43,6 +43,7 @@
#define PCI_DEVICE_ID_INTEL_ADLP 0x51ee
#define PCI_DEVICE_ID_INTEL_ADLM 0x54ee
#define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
+#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -85,8 +86,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = {
static struct gpiod_lookup_table platform_bytcr_gpios = {
.dev_id = "0000:00:16.0",
.table = {
- GPIO_LOOKUP("INT33FC:00", 54, "reset", GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("INT33FC:02", 14, "cs", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("INT33FC:00", 54, "cs", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("INT33FC:02", 14, "reset", GPIO_ACTIVE_HIGH),
{}
},
};
@@ -119,6 +120,13 @@ static const struct property_entry dwc3_pci_intel_properties[] = {
{}
};
+static const struct property_entry dwc3_pci_intel_byt_properties[] = {
+ PROPERTY_ENTRY_STRING("dr_mode", "peripheral"),
+ PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
+ PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
+ {}
+};
+
static const struct property_entry dwc3_pci_mrfld_properties[] = {
PROPERTY_ENTRY_STRING("dr_mode", "otg"),
PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"),
@@ -161,6 +169,10 @@ static const struct software_node dwc3_pci_intel_swnode = {
.properties = dwc3_pci_intel_properties,
};
+static const struct software_node dwc3_pci_intel_byt_swnode = {
+ .properties = dwc3_pci_intel_byt_properties,
+};
+
static const struct software_node dwc3_pci_intel_mrfld_swnode = {
.properties = dwc3_pci_mrfld_properties,
};
@@ -344,7 +356,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT),
- (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+ (kernel_ulong_t) &dwc3_pci_intel_byt_swnode, },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
(kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, },
@@ -409,6 +421,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
index 9cc3ad701a29..a6f3a9b38789 100644
--- a/drivers/usb/dwc3/dwc3-xilinx.c
+++ b/drivers/usb/dwc3/dwc3-xilinx.c
@@ -99,17 +99,29 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
struct device *dev = priv_data->dev;
struct reset_control *crst, *hibrst, *apbrst;
struct phy *usb3_phy;
- int ret;
+ int ret = 0;
u32 reg;
- usb3_phy = devm_phy_get(dev, "usb3-phy");
- if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) {
- ret = -EPROBE_DEFER;
+ usb3_phy = devm_phy_optional_get(dev, "usb3-phy");
+ if (IS_ERR(usb3_phy)) {
+ ret = PTR_ERR(usb3_phy);
+ dev_err_probe(dev, ret,
+ "failed to get USB3 PHY\n");
goto err;
- } else if (IS_ERR(usb3_phy)) {
- usb3_phy = NULL;
}
+ /*
+ * The following core resets are not required unless a USB3 PHY
+ * is used, and the subsequent register settings are not required
+ * unless a core reset is performed (they should be set properly
+ * by the first-stage boot loader, but may be reverted by a core
+ * reset). They may also break the configuration if USB3 is actually
+ * in use but the usb3-phy entry is missing from the device tree.
+ * Therefore, skip these operations in this case.
+ */
+ if (!usb3_phy)
+ goto skip_usb3_phy;
+
crst = devm_reset_control_get_exclusive(dev, "usb_crst");
if (IS_ERR(crst)) {
ret = PTR_ERR(crst);
@@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
goto err;
}
+skip_usb3_phy:
/*
* This routes the USB DMA traffic to go through FPD path instead
* of reaching DDR directly. This traffic routing is needed to
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 520031ba38aa..a0c883f19a41 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1291,6 +1291,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable)
trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id);
+ /*
+ * As per data book 4.2.3.2TRB Control Bit Rules section
+ *
+ * The controller autonomously checks the HWO field of a TRB to determine if the
+ * entire TRB is valid. Therefore, software must ensure that the rest of the TRB
+ * is valid before setting the HWO field to '1'. In most systems, this means that
+ * software must update the fourth DWORD of a TRB last.
+ *
+ * However there is a possibility of CPU re-ordering here which can cause
+ * controller to observe the HWO bit set prematurely.
+ * Add a write memory barrier to prevent CPU re-ordering.
+ */
+ wmb();
trb->ctrl |= DWC3_TRB_CTRL_HWO;
dwc3_ep_inc_enq(dep);
@@ -4147,9 +4160,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt)
unsigned long flags;
irqreturn_t ret = IRQ_NONE;
+ local_bh_disable();
spin_lock_irqsave(&dwc->lock, flags);
ret = dwc3_process_event_buf(evt);
spin_unlock_irqrestore(&dwc->lock, flags);
+ local_bh_enable();
return ret;
}
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 16f9e3423c9f..9315313108c9 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1988,6 +1988,9 @@ unknown:
if (w_index != 0x5 || (w_value >> 8))
break;
interface = w_value & 0xFF;
+ if (interface >= MAX_CONFIG_INTERFACES ||
+ !os_desc_cfg->interface[interface])
+ break;
buf[6] = w_index;
count = count_ext_prop(os_desc_cfg,
interface);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 25ad1e97a458..1922fd02043c 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1711,16 +1711,24 @@ static void ffs_data_put(struct ffs_data *ffs)
static void ffs_data_closed(struct ffs_data *ffs)
{
+ struct ffs_epfile *epfiles;
+ unsigned long flags;
+
ENTER();
if (atomic_dec_and_test(&ffs->opened)) {
if (ffs->no_disconnect) {
ffs->state = FFS_DEACTIVATED;
- if (ffs->epfiles) {
- ffs_epfiles_destroy(ffs->epfiles,
- ffs->eps_count);
- ffs->epfiles = NULL;
- }
+ spin_lock_irqsave(&ffs->eps_lock, flags);
+ epfiles = ffs->epfiles;
+ ffs->epfiles = NULL;
+ spin_unlock_irqrestore(&ffs->eps_lock,
+ flags);
+
+ if (epfiles)
+ ffs_epfiles_destroy(epfiles,
+ ffs->eps_count);
+
if (ffs->setup_state == FFS_SETUP_PENDING)
__ffs_ep0_stall(ffs);
} else {
@@ -1767,14 +1775,27 @@ static struct ffs_data *ffs_data_new(const char *dev_name)
static void ffs_data_clear(struct ffs_data *ffs)
{
+ struct ffs_epfile *epfiles;
+ unsigned long flags;
+
ENTER();
ffs_closed(ffs);
BUG_ON(ffs->gadget);
- if (ffs->epfiles) {
- ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
+ spin_lock_irqsave(&ffs->eps_lock, flags);
+ epfiles = ffs->epfiles;
+ ffs->epfiles = NULL;
+ spin_unlock_irqrestore(&ffs->eps_lock, flags);
+
+ /*
+ * potential race possible between ffs_func_eps_disable
+ * & ffs_epfile_release therefore maintaining a local
+ * copy of epfile will save us from use-after-free.
+ */
+ if (epfiles) {
+ ffs_epfiles_destroy(epfiles, ffs->eps_count);
ffs->epfiles = NULL;
}
@@ -1922,12 +1943,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count)
static void ffs_func_eps_disable(struct ffs_function *func)
{
- struct ffs_ep *ep = func->eps;
- struct ffs_epfile *epfile = func->ffs->epfiles;
- unsigned count = func->ffs->eps_count;
+ struct ffs_ep *ep;
+ struct ffs_epfile *epfile;
+ unsigned short count;
unsigned long flags;
spin_lock_irqsave(&func->ffs->eps_lock, flags);
+ count = func->ffs->eps_count;
+ epfile = func->ffs->epfiles;
+ ep = func->eps;
while (count--) {
/* pending requests get nuked */
if (ep->ep)
@@ -1945,14 +1969,18 @@ static void ffs_func_eps_disable(struct ffs_function *func)
static int ffs_func_eps_enable(struct ffs_function *func)
{
- struct ffs_data *ffs = func->ffs;
- struct ffs_ep *ep = func->eps;
- struct ffs_epfile *epfile = ffs->epfiles;
- unsigned count = ffs->eps_count;
+ struct ffs_data *ffs;
+ struct ffs_ep *ep;
+ struct ffs_epfile *epfile;
+ unsigned short count;
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&func->ffs->eps_lock, flags);
+ ffs = func->ffs;
+ ep = func->eps;
+ epfile = ffs->epfiles;
+ count = ffs->eps_count;
while(count--) {
ep->ep->driver_data = ep;
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 46dd11dcb3a8..7371c6e65b10 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -179,6 +179,7 @@
#include <linux/kthread.h>
#include <linux/sched/signal.h>
#include <linux/limits.h>
+#include <linux/pagemap.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index 1abf08e5164a..6803cd60cc6d 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
if (is_iso) {
switch (speed) {
+ case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
size = ss->isoc_maxpacket *
(ss->isoc_mult + 1) *
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 36fa6ef0581b..097a709549d6 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -203,7 +203,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = {
.bDescriptorSubtype = UAC_INPUT_TERMINAL,
/* .bTerminalID = DYNAMIC */
- .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED),
+ .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE),
.bAssocTerminal = 0,
/* .bCSourceID = DYNAMIC */
.iChannelNames = 0,
@@ -231,7 +231,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = {
.bDescriptorSubtype = UAC_OUTPUT_TERMINAL,
/* .bTerminalID = DYNAMIC */
- .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED),
+ .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER),
.bAssocTerminal = 0,
/* .bSourceID = DYNAMIC */
/* .bCSourceID = DYNAMIC */
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 431d5a7d737e..713efd9aefde 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -637,14 +637,18 @@ static int rndis_set_response(struct rndis_params *params,
rndis_set_cmplt_type *resp;
rndis_resp_t *r;
+ BufLength = le32_to_cpu(buf->InformationBufferLength);
+ BufOffset = le32_to_cpu(buf->InformationBufferOffset);
+ if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
+ return -EINVAL;
+
r = rndis_add_response(params, sizeof(rndis_set_cmplt_type));
if (!r)
return -ENOMEM;
resp = (rndis_set_cmplt_type *)r->buf;
- BufLength = le32_to_cpu(buf->InformationBufferLength);
- BufOffset = le32_to_cpu(buf->InformationBufferOffset);
-
#ifdef VERBOSE_DEBUG
pr_debug("%s: Length: %d\n", __func__, BufLength);
pr_debug("%s: Offset: %d\n", __func__, BufOffset);
@@ -919,6 +923,7 @@ struct rndis_params *rndis_register(void (*resp_avail)(void *v), void *v)
params->resp_avail = resp_avail;
params->v = v;
INIT_LIST_HEAD(&params->resp_queue);
+ spin_lock_init(&params->resp_lock);
pr_debug("%s: configNr = %d\n", __func__, i);
return params;
@@ -1012,12 +1017,14 @@ void rndis_free_response(struct rndis_params *params, u8 *buf)
{
rndis_resp_t *r, *n;
+ spin_lock(&params->resp_lock);
list_for_each_entry_safe(r, n, &params->resp_queue, list) {
if (r->buf == buf) {
list_del(&r->list);
kfree(r);
}
}
+ spin_unlock(&params->resp_lock);
}
EXPORT_SYMBOL_GPL(rndis_free_response);
@@ -1027,14 +1034,17 @@ u8 *rndis_get_next_response(struct rndis_params *params, u32 *length)
if (!length) return NULL;
+ spin_lock(&params->resp_lock);
list_for_each_entry_safe(r, n, &params->resp_queue, list) {
if (!r->send) {
r->send = 1;
*length = r->length;
+ spin_unlock(&params->resp_lock);
return r->buf;
}
}
+ spin_unlock(&params->resp_lock);
return NULL;
}
EXPORT_SYMBOL_GPL(rndis_get_next_response);
@@ -1051,7 +1061,9 @@ static rndis_resp_t *rndis_add_response(struct rndis_params *params, u32 length)
r->length = length;
r->send = 0;
+ spin_lock(&params->resp_lock);
list_add_tail(&r->list, &params->resp_queue);
+ spin_unlock(&params->resp_lock);
return r;
}
diff --git a/drivers/usb/gadget/function/rndis.h b/drivers/usb/gadget/function/rndis.h
index f6167f7fea82..6206b8b7490f 100644
--- a/drivers/usb/gadget/function/rndis.h
+++ b/drivers/usb/gadget/function/rndis.h
@@ -174,6 +174,7 @@ typedef struct rndis_params {
void (*resp_avail)(void *v);
void *v;
struct list_head resp_queue;
+ spinlock_t resp_lock;
} rndis_params;
/* RNDIS Message parser and other useless functions */
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
index c5a2c734234a..d86c3a36441e 100644
--- a/drivers/usb/gadget/legacy/raw_gadget.c
+++ b/drivers/usb/gadget/legacy/raw_gadget.c
@@ -1004,7 +1004,7 @@ static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io,
ret = -EBUSY;
goto out_unlock;
}
- if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) {
+ if (in != usb_endpoint_dir_in(ep->ep->desc)) {
dev_dbg(&dev->gadget->dev, "fail, wrong direction\n");
ret = -EINVAL;
goto out_unlock;
diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index dd0819df096e..9040a0561466 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -1895,7 +1895,7 @@ static int at91udc_probe(struct platform_device *pdev)
at91_vbus_irq, 0, driver_name, udc);
if (retval) {
DBG("request vbus irq %d failed\n",
- udc->board.vbus_pin);
+ desc_to_gpio(udc->board.vbus_pin));
goto err_unprepare_iclk;
}
}
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 568534a0d17c..c109b069f511 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1436,7 +1436,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
usb_gadget_udc_stop(udc);
udc->driver = NULL;
- udc->dev.driver = NULL;
udc->gadget->dev.driver = NULL;
}
@@ -1498,7 +1497,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
driver->function);
udc->driver = driver;
- udc->dev.driver = &driver->driver;
udc->gadget->dev.driver = &driver->driver;
usb_gadget_udc_set_speed(udc, driver->max_speed);
@@ -1521,7 +1519,6 @@ err1:
dev_err(&udc->dev, "failed to start %s: %d\n",
udc->driver->function, ret);
udc->driver = NULL;
- udc->dev.driver = NULL;
udc->gadget->dev.driver = NULL;
return ret;
}
diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c
index d2a2b20cc1ad..7d9bd16190c0 100644
--- a/drivers/usb/gadget/udc/max3420_udc.c
+++ b/drivers/usb/gadget/udc/max3420_udc.c
@@ -1292,7 +1292,7 @@ del_gadget:
return err;
}
-static int max3420_remove(struct spi_device *spi)
+static void max3420_remove(struct spi_device *spi)
{
struct max3420_udc *udc = spi_get_drvdata(spi);
unsigned long flags;
@@ -1304,8 +1304,6 @@ static int max3420_remove(struct spi_device *spi)
kthread_stop(udc->thread_task);
spin_unlock_irqrestore(&udc->lock, flags);
-
- return 0;
}
static const struct of_device_id max3420_udc_of_match[] = {
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 57d417a7c3e0..601829a6b4ba 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -2378,6 +2378,8 @@ static void handle_ext_role_switch_states(struct device *dev,
switch (role) {
case USB_ROLE_NONE:
usb3->connection_state = USB_ROLE_NONE;
+ if (cur_role == USB_ROLE_HOST)
+ device_release_driver(host);
if (usb3->driver)
usb3_disconnect(usb3);
usb3_vbus_out(usb3, false);
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index 6ce886fb7bfe..2907fad04e2c 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -1615,6 +1615,8 @@ static void xudc_getstatus(struct xusb_udc *udc)
break;
case USB_RECIP_ENDPOINT:
epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
+ if (epnum >= XUSB_MAX_ENDPOINTS)
+ goto stall;
target_ep = &udc->ep[epnum];
epcfgreg = udc->read_fn(udc->addr + target_ep->offset);
halt = epcfgreg & XUSB_EP_CFG_STALL_MASK;
@@ -1682,6 +1684,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc)
case USB_RECIP_ENDPOINT:
if (!udc->setup.wValue) {
endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
+ if (endpoint >= XUSB_MAX_ENDPOINTS) {
+ xudc_ep0_stall(udc);
+ return;
+ }
target_ep = &udc->ep[endpoint];
outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK;
outinbit = outinbit >> 7;
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
index 30de85a707fe..99a5523a79fb 100644
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1926,7 +1926,7 @@ error:
return retval;
}
-static int
+static void
max3421_remove(struct spi_device *spi)
{
struct max3421_hcd *max3421_hcd;
@@ -1947,7 +1947,6 @@ max3421_remove(struct spi_device *spi)
free_irq(spi->irq, hcd);
usb_put_hcd(hcd);
- return 0;
}
static const struct of_device_id max3421_of_match_table[] = {
diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c
index be09fd9bac58..19b8c7ed74cb 100644
--- a/drivers/usb/host/xen-hcd.c
+++ b/drivers/usb/host/xen-hcd.c
@@ -716,8 +716,9 @@ static int xenhcd_map_urb_for_request(struct xenhcd_info *info, struct urb *urb,
return 0;
}
-static void xenhcd_gnttab_done(struct usb_shadow *shadow)
+static void xenhcd_gnttab_done(struct xenhcd_info *info, unsigned int id)
{
+ struct usb_shadow *shadow = info->shadow + id;
int nr_segs = 0;
int i;
@@ -726,8 +727,10 @@ static void xenhcd_gnttab_done(struct usb_shadow *shadow)
if (xenusb_pipeisoc(shadow->req.pipe))
nr_segs += shadow->req.u.isoc.nr_frame_desc_segs;
- for (i = 0; i < nr_segs; i++)
- gnttab_end_foreign_access(shadow->req.seg[i].gref, 0, 0UL);
+ for (i = 0; i < nr_segs; i++) {
+ if (!gnttab_try_end_foreign_access(shadow->req.seg[i].gref))
+ xenhcd_set_error(info, "backend didn't release grant");
+ }
shadow->req.nr_buffer_segs = 0;
shadow->req.u.isoc.nr_frame_desc_segs = 0;
@@ -841,7 +844,9 @@ static void xenhcd_cancel_all_enqueued_urbs(struct xenhcd_info *info)
list_for_each_entry_safe(urbp, tmp, &info->in_progress_list, list) {
req_id = urbp->req_id;
if (!urbp->unlinked) {
- xenhcd_gnttab_done(&info->shadow[req_id]);
+ xenhcd_gnttab_done(info, req_id);
+ if (info->error)
+ return;
if (urbp->urb->status == -EINPROGRESS)
/* not dequeued */
xenhcd_giveback_urb(info, urbp->urb,
@@ -942,8 +947,7 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
rp = info->urb_ring.sring->rsp_prod;
if (RING_RESPONSE_PROD_OVERFLOW(&info->urb_ring, rp)) {
xenhcd_set_error(info, "Illegal index on urb-ring");
- spin_unlock_irqrestore(&info->lock, flags);
- return 0;
+ goto err;
}
rmb(); /* ensure we see queued responses up to "rp" */
@@ -952,11 +956,13 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
id = res.id;
if (id >= XENUSB_URB_RING_SIZE) {
xenhcd_set_error(info, "Illegal data on urb-ring");
- continue;
+ goto err;
}
if (likely(xenusb_pipesubmit(info->shadow[id].req.pipe))) {
- xenhcd_gnttab_done(&info->shadow[id]);
+ xenhcd_gnttab_done(info, id);
+ if (info->error)
+ goto err;
urb = info->shadow[id].urb;
if (likely(urb)) {
urb->actual_length = res.actual_length;
@@ -978,6 +984,10 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
spin_unlock_irqrestore(&info->lock, flags);
return more_to_do;
+
+ err:
+ spin_unlock_irqrestore(&info->lock, flags);
+ return 0;
}
static int xenhcd_conn_notify(struct xenhcd_info *info)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index c1edcc9b13ce..dc570ce4e831 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
int ret;
+ if (pm_runtime_suspended(dev))
+ pm_runtime_resume(dev);
+
ret = xhci_priv_suspend_quirk(hcd);
if (ret)
return ret;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index dc357cabb265..2d378543bc3a 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1091,6 +1091,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
int retval = 0;
bool comp_timer_running = false;
bool pending_portevent = false;
+ bool reinit_xhc = false;
if (!hcd->state)
return 0;
@@ -1107,10 +1108,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags);
spin_lock_irq(&xhci->lock);
- if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend)
- hibernated = true;
- if (!hibernated) {
+ if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend)
+ reinit_xhc = true;
+
+ if (!reinit_xhc) {
/*
* Some controllers might lose power during suspend, so wait
* for controller not ready bit to clear, just as in xHC init.
@@ -1143,12 +1145,17 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
spin_unlock_irq(&xhci->lock);
return -ETIMEDOUT;
}
- temp = readl(&xhci->op_regs->status);
}
- /* If restore operation fails, re-initialize the HC during resume */
- if ((temp & STS_SRE) || hibernated) {
+ temp = readl(&xhci->op_regs->status);
+ /* re-initialize the HC on Restore Error, or Host Controller Error */
+ if (temp & (STS_SRE | STS_HCE)) {
+ reinit_xhc = true;
+ xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp);
+ }
+
+ if (reinit_xhc) {
if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) &&
!(xhci_all_ports_seen_u0(xhci))) {
del_timer_sync(&xhci->comp_mode_recovery_timer);
@@ -1604,9 +1611,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
struct urb_priv *urb_priv;
int num_tds;
- if (!urb || xhci_check_args(hcd, urb->dev, urb->ep,
- true, true, __func__) <= 0)
+ if (!urb)
return -EINVAL;
+ ret = xhci_check_args(hcd, urb->dev, urb->ep,
+ true, true, __func__);
+ if (ret <= 0)
+ return ret ? ret : -EINVAL;
slot_id = urb->dev->slot_id;
ep_index = xhci_get_endpoint_index(&urb->ep->desc);
@@ -3323,7 +3333,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
return -EINVAL;
ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__);
if (ret <= 0)
- return -EINVAL;
+ return ret ? ret : -EINVAL;
if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) {
xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion"
" descriptor for ep 0x%x does not support streams\n",
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 507deef1f709..04c4e3fed094 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -543,6 +543,9 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1))
hub->lang_id = USB251XB_DEF_LANGUAGE_ID;
+ if (of_property_read_u8(np, "boost-up", &hub->boost_up))
+ hub->boost_up = USB251XB_DEF_BOOST_UP;
+
cproperty_char = of_get_property(np, "manufacturer", NULL);
strlcpy(str, cproperty_char ? : USB251XB_DEF_MANUFACTURER_STRING,
sizeof(str));
@@ -584,7 +587,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
* may be as soon as needed.
*/
hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE;
- hub->boost_up = USB251XB_DEF_BOOST_UP;
hub->boost_57 = USB251XB_DEF_BOOST_57;
hub->boost_14 = USB251XB_DEF_BOOST_14;
hub->port_map12 = USB251XB_DEF_PORT_MAP_12;
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 7d4d0713f4f0..d2b7e613eb34 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -327,7 +327,6 @@ static int omap2430_probe(struct platform_device *pdev)
musb->dev.parent = &pdev->dev;
musb->dev.dma_mask = &omap2430_dmamask;
musb->dev.coherent_dma_mask = omap2430_dmamask;
- device_set_of_node_from_dev(&musb->dev, &pdev->dev);
glue->dev = &pdev->dev;
glue->musb = musb;
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 29f4b87a9e74..2798fca71261 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -81,10 +81,10 @@
#define CH341_QUIRK_SIMULATE_BREAK BIT(1)
static const struct usb_device_id id_table[] = {
- { USB_DEVICE(0x1a86, 0x5512) },
{ USB_DEVICE(0x1a86, 0x5523) },
{ USB_DEVICE(0x1a86, 0x7522) },
{ USB_DEVICE(0x1a86, 0x7523) },
+ { USB_DEVICE(0x2184, 0x0057) },
{ USB_DEVICE(0x4348, 0x5523) },
{ USB_DEVICE(0x9986, 0x7523) },
{ },
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 8a60c0d56863..a27f7efcec6a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -51,6 +51,7 @@ static void cp210x_enable_event_mode(struct usb_serial_port *port);
static void cp210x_disable_event_mode(struct usb_serial_port *port);
static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x0404, 0x034C) }, /* NCR Retail IO Box */
{ USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */
{ USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
@@ -68,6 +69,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
{ USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
{ USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */
+ { USB_DEVICE(0x106F, 0x0003) }, /* CPI / Money Controls Bulk Coin Recycler */
{ USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
{ USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
{ USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 4edebd14ef29..49c08f07c969 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -969,6 +969,7 @@ static const struct usb_device_id id_table_combined[] = {
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) },
@@ -977,12 +978,14 @@ static const struct usb_device_id id_table_combined[] = {
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) },
{ USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 755858ca20ba..d1a9564697a4 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1506,6 +1506,9 @@
#define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */
#define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */
#define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */
+#define BRAINBOXES_US_159_PID 0x1021 /* US-159 1xRS232 */
+#define BRAINBOXES_US_235_PID 0x1017 /* US-235 1xRS232 */
+#define BRAINBOXES_US_320_PID 0x1019 /* US-320 1xRS422/485 */
#define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */
#define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */
#define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 42420bfc983c..e7755d9cfc61 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -198,6 +198,8 @@ static void option_instat_callback(struct urb *urb);
#define DELL_PRODUCT_5821E 0x81d7
#define DELL_PRODUCT_5821E_ESIM 0x81e0
+#define DELL_PRODUCT_5829E_ESIM 0x81e4
+#define DELL_PRODUCT_5829E 0x81e6
#define KYOCERA_VENDOR_ID 0x0c88
#define KYOCERA_PRODUCT_KPC650 0x17da
@@ -1063,6 +1065,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM),
.driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
+ { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E),
+ .driver_info = RSVD(0) | RSVD(6) },
+ { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM),
+ .driver_info = RSVD(0) | RSVD(6) },
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
@@ -1273,10 +1279,16 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(2) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */
.driver_info = NCTRL(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff), /* Telit LE910R1 (RNDIS) */
+ .driver_info = NCTRL(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */
+ .driver_info = NCTRL(2) },
{ USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */
.driver_info = NCTRL(0) | ZLP },
{ USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */
.driver_info = NCTRL(0) | ZLP },
+ { USB_DEVICE(TELIT_VENDOR_ID, 0x9201), /* Telit LE910R1 flashing device */
+ .driver_info = NCTRL(0) | ZLP },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) },
@@ -1649,6 +1661,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(2) },
{ USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff), /* ZTE MF286D */
+ .driver_info = RSVD(5) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) },
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 29191d33c0e3..1a05e3dcfec8 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
US_FL_SCM_MULT_TARG ),
+/*
+ * Reported by DocMAX <mail@vacharakis.de>
+ * and Thomas Weißschuh <linux@weissschuh.net>
+ */
+UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999,
+ "VIA Labs, Inc.",
+ "VL817 SATA Bridge",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
"ST",
"2A",
diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c
index 07d307418b47..a929e000d0e2 100644
--- a/drivers/usb/typec/port-mapper.c
+++ b/drivers/usb/typec/port-mapper.c
@@ -56,7 +56,12 @@ int typec_link_ports(struct typec_port *con)
{
struct each_port_arg arg = { .port = con, .match = NULL };
- bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match);
+ if (!has_acpi_companion(&con->dev))
+ return 0;
+
+ acpi_bus_for_each_dev(typec_port_match, &arg);
+ if (!arg.match)
+ return 0;
/*
* REVISIT: Now each connector can have only a single component master.
@@ -74,5 +79,6 @@ int typec_link_ports(struct typec_port *con)
void typec_unlink_ports(struct typec_port *con)
{
- component_master_del(&con->dev, &typec_aggregate_ops);
+ if (has_acpi_companion(&con->dev))
+ component_master_del(&con->dev, &typec_aggregate_ops);
}
diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 35a1307349a2..e07d26a3cd8e 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val)
static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
{
struct tcpci *tcpci = tcpc_to_tcpci(tcpc);
+ bool vconn_pres;
+ enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1;
unsigned int reg;
int ret;
+ ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, &reg);
+ if (ret < 0)
+ return ret;
+
+ vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES);
+ if (vconn_pres) {
+ ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, &reg);
+ if (ret < 0)
+ return ret;
+
+ if (reg & TCPC_TCPC_CTRL_ORIENTATION)
+ polarity = TYPEC_POLARITY_CC2;
+ }
+
switch (cc) {
case TYPEC_CC_RA:
reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) |
@@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
break;
}
+ if (vconn_pres) {
+ if (polarity == TYPEC_POLARITY_CC2) {
+ reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT);
+ } else {
+ reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT);
+ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT);
+ }
+ }
+
ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg);
if (ret < 0)
return ret;
diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
index 2be7a77d400e..b2edd45f13c6 100644
--- a/drivers/usb/typec/tcpm/tcpci.h
+++ b/drivers/usb/typec/tcpm/tcpci.h
@@ -98,6 +98,7 @@
#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4)
#define TCPC_POWER_STATUS_VBUS_DET BIT(3)
#define TCPC_POWER_STATUS_VBUS_PRES BIT(2)
+#define TCPC_POWER_STATUS_VCONN_PRES BIT(1)
#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
#define TCPC_FAULT_STATUS 0x1f
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 59d4fa2443f2..5fce795b69c7 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
case SNK_TRYWAIT_DEBOUNCE:
break;
case SNK_ATTACH_WAIT:
- tcpm_set_state(port, SNK_UNATTACHED, 0);
+ case SNK_DEBOUNCED:
+ /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */
break;
case SNK_NEGOTIATE_CAPABILITIES:
@@ -5263,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
case PR_SWAP_SNK_SRC_SOURCE_ON:
/* Do nothing, vsafe0v is expected during transition */
break;
+ case SNK_ATTACH_WAIT:
+ case SNK_DEBOUNCED:
+ /*Do nothing, still waiting for VSAFE5V for connect */
+ break;
default:
if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
tcpm_set_state(port, SNK_UNATTACHED, 0);
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 6d27a5b5e3ca..7ffcda94d323 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -761,12 +761,12 @@ static int tps6598x_probe(struct i2c_client *client)
ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
if (ret < 0)
- return ret;
+ goto err_clear_mask;
trace_tps6598x_status(status);
ret = tps6598x_read32(tps, TPS_REG_SYSTEM_CONF, &conf);
if (ret < 0)
- return ret;
+ goto err_clear_mask;
/*
* This fwnode has a "compatible" property, but is never populated as a
@@ -855,7 +855,8 @@ err_role_put:
usb_role_switch_put(tps->role_sw);
err_fwnode_put:
fwnode_handle_put(fwnode);
-
+err_clear_mask:
+ tps6598x_write64(tps, TPS_REG_INT_MASK1, 0);
return ret;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index bff96d64dddf..6db7c8ddd51c 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc)
if (status < 0)
return status;
- if (!data)
+ if (!(data & DEV_INT))
return 0;
status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index f648f1c54a0f..d0f91078600e 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1563,11 +1563,27 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
switch (cmd) {
case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
+ /* This mq feature check aligns with pre-existing userspace
+ * implementation.
+ *
+ * Without it, an untrusted driver could fake a multiqueue config
+ * request down to a non-mq device that may cause kernel to
+ * panic due to uninitialized resources for extra vqs. Even with
+ * a well behaving guest driver, it is not expected to allow
+ * changing the number of vqs on a non-mq device.
+ */
+ if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
+ break;
+
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
if (read != sizeof(mq))
break;
newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
+ if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+ newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
+ break;
+
if (ndev->cur_num_vqs == 2 * newqps) {
status = VIRTIO_NET_OK;
break;
@@ -1897,11 +1913,25 @@ static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
return ndev->mvdev.mlx_features;
}
-static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
+static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
{
+ /* Minimum features to expect */
if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
return -EOPNOTSUPP;
+ /* Double check features combination sent down by the driver.
+ * Fail invalid features due to absence of the depended feature.
+ *
+ * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
+ * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
+ * By failing the invalid features sent down by untrusted drivers,
+ * we're assured the assumption made upon is_index_valid() and
+ * is_ctrl_vq_idx() will not be compromised.
+ */
+ if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
+ BIT_ULL(VIRTIO_NET_F_MQ))
+ return -EINVAL;
+
return 0;
}
@@ -1977,7 +2007,7 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
print_features(mvdev, features, true);
- err = verify_min_features(mvdev, features);
+ err = verify_driver_features(mvdev, features);
if (err)
return err;
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 9846c9de4bfa..1ea525433a5c 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -393,7 +393,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
* If it does happen we assume a legacy guest.
*/
if (!vdev->features_valid)
- vdpa_set_features(vdev, 0, true);
+ vdpa_set_features_unlocked(vdev, 0);
ops->get_config(vdev, offset, buf, len);
}
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
index 2b1143f11d8f..0a4d93edc4c0 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.c
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -294,7 +294,7 @@ vduse_domain_alloc_iova(struct iova_domain *iovad,
iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
- return iova_pfn << shift;
+ return (dma_addr_t)iova_pfn << shift;
}
static void vduse_domain_free_iova(struct iova_domain *iovad,
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index a57e381e830b..cce101e6a940 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -533,8 +533,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
{
struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
- vdpa_unregister_device(&vp_vdpa->vdpa);
vp_modern_remove(&vp_vdpa->mdev);
+ vdpa_unregister_device(&vp_vdpa->vdpa);
}
static struct pci_driver vp_vdpa_driver = {
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 670d56c879e5..40b098320b2a 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -57,6 +57,17 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
if (last < start)
return -EFAULT;
+ /* If the range being mapped is [0, ULONG_MAX], split it into two entries
+ * otherwise its size would overflow u64.
+ */
+ if (start == 0 && last == ULONG_MAX) {
+ u64 mid = last / 2;
+
+ vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque);
+ addr += mid + 1;
+ start = mid + 1;
+ }
+
if (iotlb->limit &&
iotlb->nmaps == iotlb->limit &&
iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 851539807bc9..ec5249e8c32d 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -286,7 +286,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
- if (vdpa_set_features(vdpa, features, false))
+ if (vdpa_set_features(vdpa, features))
return -EINVAL;
return 0;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe2..1768362115c6 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1170,6 +1170,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
goto done;
}
+ if ((msg.type == VHOST_IOTLB_UPDATE ||
+ msg.type == VHOST_IOTLB_INVALIDATE) &&
+ msg.size == 0) {
+ ret = -EINVAL;
+ goto done;
+ }
+
if (dev->msg_handler)
ret = dev->msg_handler(dev, &msg);
else
@@ -1981,7 +1988,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
return 0;
}
-static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
+static int vhost_update_avail_event(struct vhost_virtqueue *vq)
{
if (vhost_put_avail_event(vq))
return -EFAULT;
@@ -2527,7 +2534,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
return false;
}
} else {
- r = vhost_update_avail_event(vq, vq->avail_idx);
+ r = vhost_update_avail_event(vq);
if (r) {
vq_err(vq, "Failed to update avail event index at %p: %d\n",
vhost_avail_event(vq), r);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index d6ca1c7ad513..e6c9d41db1de 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -629,16 +629,18 @@ err:
return ret;
}
-static int vhost_vsock_stop(struct vhost_vsock *vsock)
+static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
{
size_t i;
- int ret;
+ int ret = 0;
mutex_lock(&vsock->dev.mutex);
- ret = vhost_dev_check_owner(&vsock->dev);
- if (ret)
- goto err;
+ if (check_owner) {
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+ }
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = &vsock->vqs[i];
@@ -751,9 +753,15 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
/* Iterating over all connections for all CIDs to find orphans is
* inefficient. Room for improvement here. */
- vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+ vsock_for_each_connected_socket(&vhost_transport.transport,
+ vhost_vsock_reset_orphans);
- vhost_vsock_stop(vsock);
+ /* Don't check the owner, because we are in the release path, so we
+ * need to stop the vsock device in any case.
+ * vhost_vsock_stop() can not fail in this case, so we don't need to
+ * check the return code.
+ */
+ vhost_vsock_stop(vsock, false);
vhost_vsock_flush(vsock);
vhost_dev_stop(&vsock->dev);
@@ -868,7 +876,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
if (start)
return vhost_vsock_start(vsock);
else
- return vhost_vsock_stop(vsock);
+ return vhost_vsock_stop(vsock, true);
case VHOST_GET_FEATURES:
features = VHOST_VSOCK_FEATURES;
if (copy_to_user(argp, &features, sizeof(features)))
diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c
index 8a4361e95a11..522dd81110b8 100644
--- a/drivers/video/backlight/ams369fg06.c
+++ b/drivers/video/backlight/ams369fg06.c
@@ -506,12 +506,11 @@ static int ams369fg06_probe(struct spi_device *spi)
return 0;
}
-static int ams369fg06_remove(struct spi_device *spi)
+static void ams369fg06_remove(struct spi_device *spi)
{
struct ams369fg06 *lcd = spi_get_drvdata(spi);
ams369fg06_power(lcd, FB_BLANK_POWERDOWN);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 33f5d80495e6..0a57033ae31d 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -542,7 +542,7 @@ static int corgi_lcd_probe(struct spi_device *spi)
return 0;
}
-static int corgi_lcd_remove(struct spi_device *spi)
+static void corgi_lcd_remove(struct spi_device *spi)
{
struct corgi_lcd *lcd = spi_get_drvdata(spi);
@@ -550,7 +550,6 @@ static int corgi_lcd_remove(struct spi_device *spi)
lcd->bl_dev->props.brightness = 0;
backlight_update_status(lcd->bl_dev);
corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN);
- return 0;
}
static struct spi_driver corgi_lcd_driver = {
diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c
index 328aba9cddad..e7b6bd827986 100644
--- a/drivers/video/backlight/ili922x.c
+++ b/drivers/video/backlight/ili922x.c
@@ -526,10 +526,9 @@ static int ili922x_probe(struct spi_device *spi)
return 0;
}
-static int ili922x_remove(struct spi_device *spi)
+static void ili922x_remove(struct spi_device *spi)
{
ili922x_poweroff(spi);
- return 0;
}
static struct spi_driver ili922x_driver = {
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index 46f97d1c3d21..cc763cf15f53 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -223,12 +223,11 @@ static int l4f00242t03_probe(struct spi_device *spi)
return 0;
}
-static int l4f00242t03_remove(struct spi_device *spi)
+static void l4f00242t03_remove(struct spi_device *spi)
{
struct l4f00242t03_priv *priv = spi_get_drvdata(spi);
l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN);
- return 0;
}
static void l4f00242t03_shutdown(struct spi_device *spi)
diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c
index f949b66dce1b..5c46df8022bf 100644
--- a/drivers/video/backlight/lms501kf03.c
+++ b/drivers/video/backlight/lms501kf03.c
@@ -364,12 +364,11 @@ static int lms501kf03_probe(struct spi_device *spi)
return 0;
}
-static int lms501kf03_remove(struct spi_device *spi)
+static void lms501kf03_remove(struct spi_device *spi)
{
struct lms501kf03 *lcd = spi_get_drvdata(spi);
lms501kf03_power(lcd, FB_BLANK_POWERDOWN);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 5cbf621e48bd..b6d373af6e3f 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -255,12 +255,11 @@ static int ltv350qv_probe(struct spi_device *spi)
return 0;
}
-static int ltv350qv_remove(struct spi_device *spi)
+static void ltv350qv_remove(struct spi_device *spi)
{
struct ltv350qv *lcd = spi_get_drvdata(spi);
ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 0de044dcafd5..fc6fbaf85594 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -397,12 +397,11 @@ static int tdo24m_probe(struct spi_device *spi)
return 0;
}
-static int tdo24m_remove(struct spi_device *spi)
+static void tdo24m_remove(struct spi_device *spi)
{
struct tdo24m *lcd = spi_get_drvdata(spi);
tdo24m_power(lcd, FB_BLANK_POWERDOWN);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 38765544345b..23d6c6bf0f54 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -232,15 +232,13 @@ err_register:
return ret;
}
-static int tosa_lcd_remove(struct spi_device *spi)
+static void tosa_lcd_remove(struct spi_device *spi)
{
struct tosa_lcd_data *data = spi_get_drvdata(spi);
i2c_unregister_device(data->i2c);
tosa_lcd_tg_off(data);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 3567b45f9ba9..bfc1913e8b55 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -233,11 +233,9 @@ static int vgg2432a4_probe(struct spi_device *spi)
return 0;
}
-static int vgg2432a4_remove(struct spi_device *spi)
+static void vgg2432a4_remove(struct spi_device *spi)
{
ili9320_remove(spi_get_drvdata(spi));
-
- return 0;
}
static void vgg2432a4_shutdown(struct spi_device *spi)
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 840d9813b0bc..fcc46380e7c9 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -78,6 +78,26 @@ config FRAMEBUFFER_CONSOLE
help
Low-level framebuffer-based console driver.
+config FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+ bool "Enable legacy fbcon hardware acceleration code"
+ depends on FRAMEBUFFER_CONSOLE
+ default y if PARISC
+ default n
+ help
+ This option enables the fbcon (framebuffer text-based) hardware
+ acceleration for graphics drivers which were written for the fbdev
+ graphics interface.
+
+ On modern machines, on mainstream machines (like x86-64) or when
+ using a modern Linux distribution those fbdev drivers usually aren't used.
+ So enabling this option wouldn't have any effect, which is why you want
+ to disable this option on such newer machines.
+
+ If you compile this kernel for older machines which still require the
+ fbdev drivers, you may want to say Y.
+
+ If unsure, select n.
+
config FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
bool "Map the console to the primary display device"
depends on FRAMEBUFFER_CONSOLE
diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c
index 01fae2c96965..f98e8f298bc1 100644
--- a/drivers/video/fbdev/core/bitblit.c
+++ b/drivers/video/fbdev/core/bitblit.c
@@ -43,6 +43,21 @@ static void update_attr(u8 *dst, u8 *src, int attribute,
}
}
+static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ struct fb_copyarea area;
+
+ area.sx = sx * vc->vc_font.width;
+ area.sy = sy * vc->vc_font.height;
+ area.dx = dx * vc->vc_font.width;
+ area.dy = dy * vc->vc_font.height;
+ area.height = height * vc->vc_font.height;
+ area.width = width * vc->vc_font.width;
+
+ info->fbops->fb_copyarea(info, &area);
+}
+
static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width)
{
@@ -378,6 +393,7 @@ static int bit_update_start(struct fb_info *info)
void fbcon_set_bitops(struct fbcon_ops *ops)
{
+ ops->bmove = bit_bmove;
ops->clear = bit_clear;
ops->putcs = bit_putcs;
ops->clear_margins = bit_clear_margins;
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index a591d291b231..d0b0b05e0dff 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -151,15 +151,8 @@ static const struct vm_operations_struct fb_deferred_io_vm_ops = {
.page_mkwrite = fb_deferred_io_mkwrite,
};
-static int fb_deferred_io_set_page_dirty(struct page *page)
-{
- if (!PageDirty(page))
- SetPageDirty(page);
- return 0;
-}
-
static const struct address_space_operations fb_deferred_io_aops = {
- .set_page_dirty = fb_deferred_io_set_page_dirty,
+ .dirty_folio = noop_dirty_folio,
};
int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 99ecd9a6d844..2fc1b80a26ad 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -173,6 +173,8 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,
int count, int ypos, int xpos);
static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
static void fbcon_cursor(struct vc_data *vc, int mode);
+static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
+ int height, int width);
static int fbcon_switch(struct vc_data *vc);
static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
@@ -180,8 +182,16 @@ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
/*
* Internal routines
*/
+static __inline__ void ywrap_up(struct vc_data *vc, int count);
+static __inline__ void ywrap_down(struct vc_data *vc, int count);
+static __inline__ void ypan_up(struct vc_data *vc, int count);
+static __inline__ void ypan_down(struct vc_data *vc, int count);
+static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
+ int dy, int dx, int height, int width, u_int y_break);
static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
int unit);
+static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
+ int line, int count, int dy);
static void fbcon_modechanged(struct fb_info *info);
static void fbcon_set_all_vcs(struct fb_info *info);
static void fbcon_start(void);
@@ -1125,6 +1135,14 @@ static void fbcon_init(struct vc_data *vc, int init)
ops->graphics = 0;
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+ if ((info->flags & FBINFO_HWACCEL_COPYAREA) &&
+ !(info->flags & FBINFO_HWACCEL_DISABLED))
+ p->scrollmode = SCROLL_MOVE;
+ else /* default to something safe */
+ p->scrollmode = SCROLL_REDRAW;
+#endif
+
/*
* ++guenther: console.c:vc_allocate() relies on initializing
* vc_{cols,rows}, but we must not set those if we are only
@@ -1211,13 +1229,14 @@ finished:
* This system is now divided into two levels because of complications
* caused by hardware scrolling. Top level functions:
*
- * fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+ * fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
*
* handles y values in range [0, scr_height-1] that correspond to real
* screen positions. y_wrap shift means that first line of bitmap may be
* anywhere on this display. These functions convert lineoffsets to
* bitmap offsets and deal with the wrap-around case by splitting blits.
*
+ * fbcon_bmove_physical_8() -- These functions fast implementations
* fbcon_clear_physical_8() -- of original fbcon_XXX fns.
* fbcon_putc_physical_8() -- (font width != 8) may be added later
*
@@ -1390,6 +1409,224 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
}
}
+static __inline__ void ywrap_up(struct vc_data *vc, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+
+ p->yscroll += count;
+ if (p->yscroll >= p->vrows) /* Deal with wrap */
+ p->yscroll -= p->vrows;
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode |= FB_VMODE_YWRAP;
+ ops->update_start(info);
+ scrollback_max += count;
+ if (scrollback_max > scrollback_phys_max)
+ scrollback_max = scrollback_phys_max;
+ scrollback_current = 0;
+}
+
+static __inline__ void ywrap_down(struct vc_data *vc, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+
+ p->yscroll -= count;
+ if (p->yscroll < 0) /* Deal with wrap */
+ p->yscroll += p->vrows;
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode |= FB_VMODE_YWRAP;
+ ops->update_start(info);
+ scrollback_max -= count;
+ if (scrollback_max < 0)
+ scrollback_max = 0;
+ scrollback_current = 0;
+}
+
+static __inline__ void ypan_up(struct vc_data *vc, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+ struct fbcon_ops *ops = info->fbcon_par;
+
+ p->yscroll += count;
+ if (p->yscroll > p->vrows - vc->vc_rows) {
+ ops->bmove(vc, info, p->vrows - vc->vc_rows,
+ 0, 0, 0, vc->vc_rows, vc->vc_cols);
+ p->yscroll -= p->vrows - vc->vc_rows;
+ }
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode &= ~FB_VMODE_YWRAP;
+ ops->update_start(info);
+ fbcon_clear_margins(vc, 1);
+ scrollback_max += count;
+ if (scrollback_max > scrollback_phys_max)
+ scrollback_max = scrollback_phys_max;
+ scrollback_current = 0;
+}
+
+static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+
+ p->yscroll += count;
+
+ if (p->yscroll > p->vrows - vc->vc_rows) {
+ p->yscroll -= p->vrows - vc->vc_rows;
+ fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t);
+ }
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode &= ~FB_VMODE_YWRAP;
+ ops->update_start(info);
+ fbcon_clear_margins(vc, 1);
+ scrollback_max += count;
+ if (scrollback_max > scrollback_phys_max)
+ scrollback_max = scrollback_phys_max;
+ scrollback_current = 0;
+}
+
+static __inline__ void ypan_down(struct vc_data *vc, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+ struct fbcon_ops *ops = info->fbcon_par;
+
+ p->yscroll -= count;
+ if (p->yscroll < 0) {
+ ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows,
+ 0, vc->vc_rows, vc->vc_cols);
+ p->yscroll += p->vrows - vc->vc_rows;
+ }
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode &= ~FB_VMODE_YWRAP;
+ ops->update_start(info);
+ fbcon_clear_margins(vc, 1);
+ scrollback_max -= count;
+ if (scrollback_max < 0)
+ scrollback_max = 0;
+ scrollback_current = 0;
+}
+
+static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+
+ p->yscroll -= count;
+
+ if (p->yscroll < 0) {
+ p->yscroll += p->vrows - vc->vc_rows;
+ fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count);
+ }
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = p->yscroll * vc->vc_font.height;
+ ops->var.vmode &= ~FB_VMODE_YWRAP;
+ ops->update_start(info);
+ fbcon_clear_margins(vc, 1);
+ scrollback_max -= count;
+ if (scrollback_max < 0)
+ scrollback_max = 0;
+ scrollback_current = 0;
+}
+
+static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
+ int line, int count, int dy)
+{
+ unsigned short *s = (unsigned short *)
+ (vc->vc_origin + vc->vc_size_row * line);
+
+ while (count--) {
+ unsigned short *start = s;
+ unsigned short *le = advance_row(s, 1);
+ unsigned short c;
+ int x = 0;
+ unsigned short attr = 1;
+
+ do {
+ c = scr_readw(s);
+ if (attr != (c & 0xff00)) {
+ attr = c & 0xff00;
+ if (s > start) {
+ fbcon_putcs(vc, start, s - start,
+ dy, x);
+ x += s - start;
+ start = s;
+ }
+ }
+ console_conditional_schedule();
+ s++;
+ } while (s < le);
+ if (s > start)
+ fbcon_putcs(vc, start, s - start, dy, x);
+ console_conditional_schedule();
+ dy++;
+ }
+}
+
+static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
+ struct fbcon_display *p, int line, int count, int ycount)
+{
+ int offset = ycount * vc->vc_cols;
+ unsigned short *d = (unsigned short *)
+ (vc->vc_origin + vc->vc_size_row * line);
+ unsigned short *s = d + offset;
+ struct fbcon_ops *ops = info->fbcon_par;
+
+ while (count--) {
+ unsigned short *start = s;
+ unsigned short *le = advance_row(s, 1);
+ unsigned short c;
+ int x = 0;
+
+ do {
+ c = scr_readw(s);
+
+ if (c == scr_readw(d)) {
+ if (s > start) {
+ ops->bmove(vc, info, line + ycount, x,
+ line, x, 1, s-start);
+ x += s - start + 1;
+ start = s + 1;
+ } else {
+ x++;
+ start++;
+ }
+ }
+
+ scr_writew(c, d);
+ console_conditional_schedule();
+ s++;
+ d++;
+ } while (s < le);
+ if (s > start)
+ ops->bmove(vc, info, line + ycount, x, line, x, 1,
+ s-start);
+ console_conditional_schedule();
+ if (ycount > 0)
+ line++;
+ else {
+ line--;
+ /* NOTE: We subtract two lines from these pointers */
+ s -= vc->vc_size_row;
+ d -= vc->vc_size_row;
+ }
+ }
+}
+
static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
int line, int count, int offset)
{
@@ -1450,6 +1687,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
{
struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
struct fbcon_display *p = &fb_display[vc->vc_num];
+ int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
if (fbcon_is_inactive(vc, info))
return true;
@@ -1466,32 +1704,291 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
case SM_UP:
if (count > vc->vc_rows) /* Maximum realistic size */
count = vc->vc_rows;
- fbcon_redraw(vc, p, t, b - t - count,
- count * vc->vc_cols);
- fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
- scr_memsetw((unsigned short *) (vc->vc_origin +
- vc->vc_size_row *
- (b - count)),
- vc->vc_video_erase_char,
- vc->vc_size_row * count);
- return true;
+ if (logo_shown >= 0)
+ goto redraw_up;
+ switch (fb_scrollmode(p)) {
+ case SCROLL_MOVE:
+ fbcon_redraw_blit(vc, info, p, t, b - t - count,
+ count);
+ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+ scr_memsetw((unsigned short *) (vc->vc_origin +
+ vc->vc_size_row *
+ (b - count)),
+ vc->vc_video_erase_char,
+ vc->vc_size_row * count);
+ return true;
+
+ case SCROLL_WRAP_MOVE:
+ if (b - t - count > 3 * vc->vc_rows >> 2) {
+ if (t > 0)
+ fbcon_bmove(vc, 0, 0, count, 0, t,
+ vc->vc_cols);
+ ywrap_up(vc, count);
+ if (vc->vc_rows - b > 0)
+ fbcon_bmove(vc, b - count, 0, b, 0,
+ vc->vc_rows - b,
+ vc->vc_cols);
+ } else if (info->flags & FBINFO_READS_FAST)
+ fbcon_bmove(vc, t + count, 0, t, 0,
+ b - t - count, vc->vc_cols);
+ else
+ goto redraw_up;
+ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_PAN_REDRAW:
+ if ((p->yscroll + count <=
+ 2 * (p->vrows - vc->vc_rows))
+ && ((!scroll_partial && (b - t == vc->vc_rows))
+ || (scroll_partial
+ && (b - t - count >
+ 3 * vc->vc_rows >> 2)))) {
+ if (t > 0)
+ fbcon_redraw_move(vc, p, 0, t, count);
+ ypan_up_redraw(vc, t, count);
+ if (vc->vc_rows - b > 0)
+ fbcon_redraw_move(vc, p, b,
+ vc->vc_rows - b, b);
+ } else
+ fbcon_redraw_move(vc, p, t + count, b - t - count, t);
+ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_PAN_MOVE:
+ if ((p->yscroll + count <=
+ 2 * (p->vrows - vc->vc_rows))
+ && ((!scroll_partial && (b - t == vc->vc_rows))
+ || (scroll_partial
+ && (b - t - count >
+ 3 * vc->vc_rows >> 2)))) {
+ if (t > 0)
+ fbcon_bmove(vc, 0, 0, count, 0, t,
+ vc->vc_cols);
+ ypan_up(vc, count);
+ if (vc->vc_rows - b > 0)
+ fbcon_bmove(vc, b - count, 0, b, 0,
+ vc->vc_rows - b,
+ vc->vc_cols);
+ } else if (info->flags & FBINFO_READS_FAST)
+ fbcon_bmove(vc, t + count, 0, t, 0,
+ b - t - count, vc->vc_cols);
+ else
+ goto redraw_up;
+ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_REDRAW:
+ redraw_up:
+ fbcon_redraw(vc, p, t, b - t - count,
+ count * vc->vc_cols);
+ fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+ scr_memsetw((unsigned short *) (vc->vc_origin +
+ vc->vc_size_row *
+ (b - count)),
+ vc->vc_video_erase_char,
+ vc->vc_size_row * count);
+ return true;
+ }
+ break;
case SM_DOWN:
if (count > vc->vc_rows) /* Maximum realistic size */
count = vc->vc_rows;
- fbcon_redraw(vc, p, b - 1, b - t - count,
- -count * vc->vc_cols);
- fbcon_clear(vc, t, 0, count, vc->vc_cols);
- scr_memsetw((unsigned short *) (vc->vc_origin +
- vc->vc_size_row *
- t),
- vc->vc_video_erase_char,
- vc->vc_size_row * count);
- return true;
+ if (logo_shown >= 0)
+ goto redraw_down;
+ switch (fb_scrollmode(p)) {
+ case SCROLL_MOVE:
+ fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
+ -count);
+ fbcon_clear(vc, t, 0, count, vc->vc_cols);
+ scr_memsetw((unsigned short *) (vc->vc_origin +
+ vc->vc_size_row *
+ t),
+ vc->vc_video_erase_char,
+ vc->vc_size_row * count);
+ return true;
+
+ case SCROLL_WRAP_MOVE:
+ if (b - t - count > 3 * vc->vc_rows >> 2) {
+ if (vc->vc_rows - b > 0)
+ fbcon_bmove(vc, b, 0, b - count, 0,
+ vc->vc_rows - b,
+ vc->vc_cols);
+ ywrap_down(vc, count);
+ if (t > 0)
+ fbcon_bmove(vc, count, 0, 0, 0, t,
+ vc->vc_cols);
+ } else if (info->flags & FBINFO_READS_FAST)
+ fbcon_bmove(vc, t, 0, t + count, 0,
+ b - t - count, vc->vc_cols);
+ else
+ goto redraw_down;
+ fbcon_clear(vc, t, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_PAN_MOVE:
+ if ((count - p->yscroll <= p->vrows - vc->vc_rows)
+ && ((!scroll_partial && (b - t == vc->vc_rows))
+ || (scroll_partial
+ && (b - t - count >
+ 3 * vc->vc_rows >> 2)))) {
+ if (vc->vc_rows - b > 0)
+ fbcon_bmove(vc, b, 0, b - count, 0,
+ vc->vc_rows - b,
+ vc->vc_cols);
+ ypan_down(vc, count);
+ if (t > 0)
+ fbcon_bmove(vc, count, 0, 0, 0, t,
+ vc->vc_cols);
+ } else if (info->flags & FBINFO_READS_FAST)
+ fbcon_bmove(vc, t, 0, t + count, 0,
+ b - t - count, vc->vc_cols);
+ else
+ goto redraw_down;
+ fbcon_clear(vc, t, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_PAN_REDRAW:
+ if ((count - p->yscroll <= p->vrows - vc->vc_rows)
+ && ((!scroll_partial && (b - t == vc->vc_rows))
+ || (scroll_partial
+ && (b - t - count >
+ 3 * vc->vc_rows >> 2)))) {
+ if (vc->vc_rows - b > 0)
+ fbcon_redraw_move(vc, p, b, vc->vc_rows - b,
+ b - count);
+ ypan_down_redraw(vc, t, count);
+ if (t > 0)
+ fbcon_redraw_move(vc, p, count, t, 0);
+ } else
+ fbcon_redraw_move(vc, p, t, b - t - count, t + count);
+ fbcon_clear(vc, t, 0, count, vc->vc_cols);
+ break;
+
+ case SCROLL_REDRAW:
+ redraw_down:
+ fbcon_redraw(vc, p, b - 1, b - t - count,
+ -count * vc->vc_cols);
+ fbcon_clear(vc, t, 0, count, vc->vc_cols);
+ scr_memsetw((unsigned short *) (vc->vc_origin +
+ vc->vc_size_row *
+ t),
+ vc->vc_video_erase_char,
+ vc->vc_size_row * count);
+ return true;
+ }
}
return false;
}
+
+static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
+ int height, int width)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+
+ if (fbcon_is_inactive(vc, info))
+ return;
+
+ if (!width || !height)
+ return;
+
+ /* Split blits that cross physical y_wrap case.
+ * Pathological case involves 4 blits, better to use recursive
+ * code rather than unrolled case
+ *
+ * Recursive invocations don't need to erase the cursor over and
+ * over again, so we use fbcon_bmove_rec()
+ */
+ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width,
+ p->vrows - p->yscroll);
+}
+
+static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
+ int dy, int dx, int height, int width, u_int y_break)
+{
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ u_int b;
+
+ if (sy < y_break && sy + height > y_break) {
+ b = y_break - sy;
+ if (dy < sy) { /* Avoid trashing self */
+ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+ y_break);
+ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+ height - b, width, y_break);
+ } else {
+ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+ height - b, width, y_break);
+ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+ y_break);
+ }
+ return;
+ }
+
+ if (dy < y_break && dy + height > y_break) {
+ b = y_break - dy;
+ if (dy < sy) { /* Avoid trashing self */
+ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+ y_break);
+ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+ height - b, width, y_break);
+ } else {
+ fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+ height - b, width, y_break);
+ fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+ y_break);
+ }
+ return;
+ }
+ ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
+ height, width);
+}
+
+static void updatescrollmode_accel(struct fbcon_display *p,
+ struct fb_info *info,
+ struct vc_data *vc)
+{
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+ struct fbcon_ops *ops = info->fbcon_par;
+ int cap = info->flags;
+ u16 t = 0;
+ int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep,
+ info->fix.xpanstep);
+ int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t);
+ int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual,
+ info->var.xres_virtual);
+ int good_pan = (cap & FBINFO_HWACCEL_YPAN) &&
+ divides(ypan, vc->vc_font.height) && vyres > yres;
+ int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) &&
+ divides(ywrap, vc->vc_font.height) &&
+ divides(vc->vc_font.height, vyres) &&
+ divides(vc->vc_font.height, yres);
+ int reading_fast = cap & FBINFO_READS_FAST;
+ int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) &&
+ !(cap & FBINFO_HWACCEL_DISABLED);
+ int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) &&
+ !(cap & FBINFO_HWACCEL_DISABLED);
+
+ if (good_wrap || good_pan) {
+ if (reading_fast || fast_copyarea)
+ p->scrollmode = good_wrap ?
+ SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE;
+ else
+ p->scrollmode = good_wrap ? SCROLL_REDRAW :
+ SCROLL_PAN_REDRAW;
+ } else {
+ if (reading_fast || (fast_copyarea && !fast_imageblit))
+ p->scrollmode = SCROLL_MOVE;
+ else
+ p->scrollmode = SCROLL_REDRAW;
+ }
+#endif
+}
+
static void updatescrollmode(struct fbcon_display *p,
struct fb_info *info,
struct vc_data *vc)
@@ -1507,6 +2004,9 @@ static void updatescrollmode(struct fbcon_display *p,
p->vrows -= (yres - (fh * vc->vc_rows)) / fh;
if ((yres % fh) && (vyres % fh < yres % fh))
p->vrows--;
+
+ /* update scrollmode in case hardware acceleration is used */
+ updatescrollmode_accel(p, info, vc);
}
#define PITCH(w) (((w) + 7) >> 3)
@@ -1664,7 +2164,21 @@ static int fbcon_switch(struct vc_data *vc)
updatescrollmode(p, info, vc);
- scrollback_phys_max = 0;
+ switch (fb_scrollmode(p)) {
+ case SCROLL_WRAP_MOVE:
+ scrollback_phys_max = p->vrows - vc->vc_rows;
+ break;
+ case SCROLL_PAN_MOVE:
+ case SCROLL_PAN_REDRAW:
+ scrollback_phys_max = p->vrows - 2 * vc->vc_rows;
+ if (scrollback_phys_max < 0)
+ scrollback_phys_max = 0;
+ break;
+ default:
+ scrollback_phys_max = 0;
+ break;
+ }
+
scrollback_max = 0;
scrollback_current = 0;
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index a00603b4451a..969d41ecede5 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -29,6 +29,9 @@ struct fbcon_display {
/* Filled in by the low-level console driver */
const u_char *fontdata;
int userfont; /* != 0 if fontdata kmalloc()ed */
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+ u_short scrollmode; /* Scroll Method, use fb_scrollmode() */
+#endif
u_short inverse; /* != 0 text black on white as default */
short yscroll; /* Hardware scrolling */
int vrows; /* number of virtual rows */
@@ -51,6 +54,8 @@ struct fbcon_display {
};
struct fbcon_ops {
+ void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width);
void (*clear)(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width);
void (*putcs)(struct vc_data *vc, struct fb_info *info,
@@ -149,6 +154,73 @@ static inline int attr_col_ec(int shift, struct vc_data *vc,
#define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
#define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
+ /*
+ * Scroll Method
+ */
+
+/* There are several methods fbcon can use to move text around the screen:
+ *
+ * Operation Pan Wrap
+ *---------------------------------------------
+ * SCROLL_MOVE copyarea No No
+ * SCROLL_PAN_MOVE copyarea Yes No
+ * SCROLL_WRAP_MOVE copyarea No Yes
+ * SCROLL_REDRAW imageblit No No
+ * SCROLL_PAN_REDRAW imageblit Yes No
+ * SCROLL_WRAP_REDRAW imageblit No Yes
+ *
+ * (SCROLL_WRAP_REDRAW is not implemented yet)
+ *
+ * In general, fbcon will choose the best scrolling
+ * method based on the rule below:
+ *
+ * Pan/Wrap > accel imageblit > accel copyarea >
+ * soft imageblit > (soft copyarea)
+ *
+ * Exception to the rule: Pan + accel copyarea is
+ * preferred over Pan + accel imageblit.
+ *
+ * The above is typical for PCI/AGP cards. Unless
+ * overridden, fbcon will never use soft copyarea.
+ *
+ * If you need to override the above rule, set the
+ * appropriate flags in fb_info->flags. For example,
+ * to prefer copyarea over imageblit, set
+ * FBINFO_READS_FAST.
+ *
+ * Other notes:
+ * + use the hardware engine to move the text
+ * (hw-accelerated copyarea() and fillrect())
+ * + use hardware-supported panning on a large virtual screen
+ * + amifb can not only pan, but also wrap the display by N lines
+ * (i.e. visible line i = physical line (i+N) % yres).
+ * + read what's already rendered on the screen and
+ * write it in a different place (this is cfb_copyarea())
+ * + re-render the text to the screen
+ *
+ * Whether to use wrapping or panning can only be figured out at
+ * runtime (when we know whether our font height is a multiple
+ * of the pan/wrap step)
+ *
+ */
+
+#define SCROLL_MOVE 0x001
+#define SCROLL_PAN_MOVE 0x002
+#define SCROLL_WRAP_MOVE 0x003
+#define SCROLL_REDRAW 0x004
+#define SCROLL_PAN_REDRAW 0x005
+
+static inline u_short fb_scrollmode(struct fbcon_display *fb)
+{
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+ return fb->scrollmode;
+#else
+ /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */
+ return SCROLL_REDRAW;
+#endif
+}
+
+
#ifdef CONFIG_FB_TILEBLITTING
extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
#endif
diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c
index ffa78936eaab..2789ace79634 100644
--- a/drivers/video/fbdev/core/fbcon_ccw.c
+++ b/drivers/video/fbdev/core/fbcon_ccw.c
@@ -59,12 +59,31 @@ static void ccw_update_attr(u8 *dst, u8 *src, int attribute,
}
}
+
+static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+ u32 vyres = GETVYRES(ops->p, info);
+
+ area.sx = sy * vc->vc_font.height;
+ area.sy = vyres - ((sx + width) * vc->vc_font.width);
+ area.dx = dy * vc->vc_font.height;
+ area.dy = vyres - ((dx + width) * vc->vc_font.width);
+ area.width = height * vc->vc_font.height;
+ area.height = width * vc->vc_font.width;
+
+ info->fbops->fb_copyarea(info, &area);
+}
+
static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width)
{
+ struct fbcon_ops *ops = info->fbcon_par;
struct fb_fillrect region;
int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
- u32 vyres = info->var.yres;
+ u32 vyres = GETVYRES(ops->p, info);
region.color = attr_bgcol_ec(bgshift,vc,info);
region.dx = sy * vc->vc_font.height;
@@ -121,7 +140,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info,
u32 cnt, pitch, size;
u32 attribute = get_attribute(info, scr_readw(s));
u8 *dst, *buf = NULL;
- u32 vyres = info->var.yres;
+ u32 vyres = GETVYRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -210,7 +229,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
int err = 1, dx, dy;
char *src;
- u32 vyres = info->var.yres;
+ u32 vyres = GETVYRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -368,7 +387,7 @@ static int ccw_update_start(struct fb_info *info)
{
struct fbcon_ops *ops = info->fbcon_par;
u32 yoffset;
- u32 vyres = info->var.yres;
+ u32 vyres = GETVYRES(ops->p, info);
int err;
yoffset = (vyres - info->var.yres) - ops->var.xoffset;
@@ -383,6 +402,7 @@ static int ccw_update_start(struct fb_info *info)
void fbcon_rotate_ccw(struct fbcon_ops *ops)
{
+ ops->bmove = ccw_bmove;
ops->clear = ccw_clear;
ops->putcs = ccw_putcs;
ops->clear_margins = ccw_clear_margins;
diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c
index 92e5b7fb51ee..86a254c1b2b7 100644
--- a/drivers/video/fbdev/core/fbcon_cw.c
+++ b/drivers/video/fbdev/core/fbcon_cw.c
@@ -44,12 +44,31 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute,
}
}
+
+static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+ u32 vxres = GETVXRES(ops->p, info);
+
+ area.sx = vxres - ((sy + height) * vc->vc_font.height);
+ area.sy = sx * vc->vc_font.width;
+ area.dx = vxres - ((dy + height) * vc->vc_font.height);
+ area.dy = dx * vc->vc_font.width;
+ area.width = height * vc->vc_font.height;
+ area.height = width * vc->vc_font.width;
+
+ info->fbops->fb_copyarea(info, &area);
+}
+
static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width)
{
+ struct fbcon_ops *ops = info->fbcon_par;
struct fb_fillrect region;
int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
- u32 vxres = info->var.xres;
+ u32 vxres = GETVXRES(ops->p, info);
region.color = attr_bgcol_ec(bgshift,vc,info);
region.dx = vxres - ((sy + height) * vc->vc_font.height);
@@ -106,7 +125,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info,
u32 cnt, pitch, size;
u32 attribute = get_attribute(info, scr_readw(s));
u8 *dst, *buf = NULL;
- u32 vxres = info->var.xres;
+ u32 vxres = GETVXRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -193,7 +212,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
int err = 1, dx, dy;
char *src;
- u32 vxres = info->var.xres;
+ u32 vxres = GETVXRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -350,7 +369,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
static int cw_update_start(struct fb_info *info)
{
struct fbcon_ops *ops = info->fbcon_par;
- u32 vxres = info->var.xres;
+ u32 vxres = GETVXRES(ops->p, info);
u32 xoffset;
int err;
@@ -366,6 +385,7 @@ static int cw_update_start(struct fb_info *info)
void fbcon_rotate_cw(struct fbcon_ops *ops)
{
+ ops->bmove = cw_bmove;
ops->clear = cw_clear;
ops->putcs = cw_putcs;
ops->clear_margins = cw_clear_margins;
diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h
index b528b2e54283..01cbe303b8a2 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.h
+++ b/drivers/video/fbdev/core/fbcon_rotate.h
@@ -11,6 +11,15 @@
#ifndef _FBCON_ROTATE_H
#define _FBCON_ROTATE_H
+#define GETVYRES(s,i) ({ \
+ (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE) ? \
+ (i)->var.yres : (i)->var.yres_virtual; })
+
+#define GETVXRES(s,i) ({ \
+ (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
+ (i)->var.xres : (i)->var.xres_virtual; })
+
+
static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
{
u32 tmp = (y * pitch) + x, index = tmp / 8, bit = tmp % 8;
diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c
index 09619bd8e021..23bc045769d0 100644
--- a/drivers/video/fbdev/core/fbcon_ud.c
+++ b/drivers/video/fbdev/core/fbcon_ud.c
@@ -44,13 +44,33 @@ static void ud_update_attr(u8 *dst, u8 *src, int attribute,
}
}
+
+static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+ u32 vyres = GETVYRES(ops->p, info);
+ u32 vxres = GETVXRES(ops->p, info);
+
+ area.sy = vyres - ((sy + height) * vc->vc_font.height);
+ area.sx = vxres - ((sx + width) * vc->vc_font.width);
+ area.dy = vyres - ((dy + height) * vc->vc_font.height);
+ area.dx = vxres - ((dx + width) * vc->vc_font.width);
+ area.height = height * vc->vc_font.height;
+ area.width = width * vc->vc_font.width;
+
+ info->fbops->fb_copyarea(info, &area);
+}
+
static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width)
{
+ struct fbcon_ops *ops = info->fbcon_par;
struct fb_fillrect region;
int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
- u32 vyres = info->var.yres;
- u32 vxres = info->var.xres;
+ u32 vyres = GETVYRES(ops->p, info);
+ u32 vxres = GETVXRES(ops->p, info);
region.color = attr_bgcol_ec(bgshift,vc,info);
region.dy = vyres - ((sy + height) * vc->vc_font.height);
@@ -142,8 +162,8 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info,
u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
u32 attribute = get_attribute(info, scr_readw(s));
u8 *dst, *buf = NULL;
- u32 vyres = info->var.yres;
- u32 vxres = info->var.xres;
+ u32 vyres = GETVYRES(ops->p, info);
+ u32 vxres = GETVXRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -239,8 +259,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode,
int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
int err = 1, dx, dy;
char *src;
- u32 vyres = info->var.yres;
- u32 vxres = info->var.xres;
+ u32 vyres = GETVYRES(ops->p, info);
+ u32 vxres = GETVXRES(ops->p, info);
if (!ops->fontbuffer)
return;
@@ -390,8 +410,8 @@ static int ud_update_start(struct fb_info *info)
{
struct fbcon_ops *ops = info->fbcon_par;
int xoffset, yoffset;
- u32 vyres = info->var.yres;
- u32 vxres = info->var.xres;
+ u32 vyres = GETVYRES(ops->p, info);
+ u32 vxres = GETVXRES(ops->p, info);
int err;
xoffset = vxres - info->var.xres - ops->var.xoffset;
@@ -409,6 +429,7 @@ static int ud_update_start(struct fb_info *info)
void fbcon_rotate_ud(struct fbcon_ops *ops)
{
+ ops->bmove = ud_bmove;
ops->clear = ud_clear;
ops->putcs = ud_putcs;
ops->clear_margins = ud_clear_margins;
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 0fa7ede94fa6..13083ad8d751 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1160,6 +1160,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
ret = fbcon_set_con2fb_map_ioctl(argp);
break;
case FBIOBLANK:
+ if (arg > FB_BLANK_POWERDOWN)
+ return -EINVAL;
console_lock();
lock_fb_info(info);
ret = fb_blank(info, arg);
diff --git a/drivers/video/fbdev/core/tileblit.c b/drivers/video/fbdev/core/tileblit.c
index 72af95053bcb..2768eff247ba 100644
--- a/drivers/video/fbdev/core/tileblit.c
+++ b/drivers/video/fbdev/core/tileblit.c
@@ -16,6 +16,21 @@
#include <asm/types.h>
#include "fbcon.h"
+static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ struct fb_tilearea area;
+
+ area.sx = sx;
+ area.sy = sy;
+ area.dx = dx;
+ area.dy = dy;
+ area.height = height;
+ area.width = width;
+
+ info->tileops->fb_tilecopy(info, &area);
+}
+
static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy,
int sx, int height, int width)
{
@@ -118,6 +133,7 @@ void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info)
struct fb_tilemap map;
struct fbcon_ops *ops = info->fbcon_par;
+ ops->bmove = tile_bmove;
ops->clear = tile_clear;
ops->putcs = tile_putcs;
ops->clear_margins = tile_clear_margins;
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index 23999df52739..c8e0ea27caf1 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -287,8 +287,6 @@ struct hvfb_par {
static uint screen_width = HVFB_WIDTH;
static uint screen_height = HVFB_HEIGHT;
-static uint screen_width_max = HVFB_WIDTH;
-static uint screen_height_max = HVFB_HEIGHT;
static uint screen_depth;
static uint screen_fb_size;
static uint dio_fb_size; /* FB size for deferred IO */
@@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
int ret = 0;
unsigned long t;
u8 index;
- int i;
memset(msg, 0, sizeof(struct synthvid_msg));
msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST;
@@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
goto out;
}
- for (i = 0; i < msg->resolution_resp.resolution_count; i++) {
- screen_width_max = max_t(unsigned int, screen_width_max,
- msg->resolution_resp.supported_resolution[i].width);
- screen_height_max = max_t(unsigned int, screen_height_max,
- msg->resolution_resp.supported_resolution[i].height);
- }
-
screen_width =
msg->resolution_resp.supported_resolution[index].width;
screen_height =
@@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info)
if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN ||
(synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) &&
- (x > screen_width_max || y > screen_height_max)) ||
+ (x * y * screen_depth / 8 > screen_fb_size)) ||
(par->synthvid_version == SYNTHVID_VERSION_WIN8 &&
x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) ||
(par->synthvid_version == SYNTHVID_VERSION_WIN7 &&
@@ -1194,8 +1184,8 @@ static int hvfb_probe(struct hv_device *hdev,
}
hvfb_get_option(info);
- pr_info("Screen resolution: %dx%d, Color depth: %d\n",
- screen_width, screen_height, screen_depth);
+ pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n",
+ screen_width, screen_height, screen_depth, screen_fb_size);
ret = hvfb_getmem(hdev, info);
if (ret) {
diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
index a75ae0c9b14c..03cff39d392d 100644
--- a/drivers/video/fbdev/omap/lcd_mipid.c
+++ b/drivers/video/fbdev/omap/lcd_mipid.c
@@ -570,14 +570,12 @@ static int mipid_spi_probe(struct spi_device *spi)
return 0;
}
-static int mipid_spi_remove(struct spi_device *spi)
+static void mipid_spi_remove(struct spi_device *spi)
{
struct mipid_device *md = dev_get_drvdata(&spi->dev);
mipid_disable(&md->panel);
kfree(md);
-
- return 0;
}
static struct spi_driver mipid_spi_driver = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
index 1bec7a4422e8..aab67721263d 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
@@ -316,7 +316,7 @@ err_gpio:
return r;
}
-static int lb035q02_panel_spi_remove(struct spi_device *spi)
+static void lb035q02_panel_spi_remove(struct spi_device *spi)
{
struct panel_drv_data *ddata = spi_get_drvdata(spi);
struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -328,8 +328,6 @@ static int lb035q02_panel_spi_remove(struct spi_device *spi)
lb035q02_disconnect(dssdev);
omap_dss_put_device(in);
-
- return 0;
}
static const struct of_device_id lb035q02_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
index dff9ebbadfc0..be9910ff6e62 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
@@ -327,7 +327,7 @@ err_gpio:
return r;
}
-static int nec_8048_remove(struct spi_device *spi)
+static void nec_8048_remove(struct spi_device *spi)
{
struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -341,8 +341,6 @@ static int nec_8048_remove(struct spi_device *spi)
nec_8048_disconnect(dssdev);
omap_dss_put_device(in);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
index 8d8b5ff7d43c..a909b5385ca5 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
@@ -857,7 +857,7 @@ err_gpio:
return r;
}
-static int acx565akm_remove(struct spi_device *spi)
+static void acx565akm_remove(struct spi_device *spi)
{
struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -874,8 +874,6 @@ static int acx565akm_remove(struct spi_device *spi)
acx565akm_disconnect(dssdev);
omap_dss_put_device(in);
-
- return 0;
}
static const struct of_device_id acx565akm_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
index 595ebd8bd5dc..3c0f887d3092 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
@@ -425,7 +425,7 @@ err_reg:
return r;
}
-static int td028ttec1_panel_remove(struct spi_device *spi)
+static void td028ttec1_panel_remove(struct spi_device *spi)
{
struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -439,8 +439,6 @@ static int td028ttec1_panel_remove(struct spi_device *spi)
td028ttec1_panel_disconnect(dssdev);
omap_dss_put_device(in);
-
- return 0;
}
static const struct of_device_id td028ttec1_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
index afac1d9445aa..58bbba7c037f 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
@@ -564,7 +564,7 @@ err_regulator:
return r;
}
-static int tpo_td043_remove(struct spi_device *spi)
+static void tpo_td043_remove(struct spi_device *spi)
{
struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -580,8 +580,6 @@ static int tpo_td043_remove(struct spi_device *spi)
omap_dss_put_device(in);
sysfs_remove_group(&spi->dev.kobj, &tpo_td043_attr_group);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c
index 0fe922f726e9..bcacfb6934fa 100644
--- a/drivers/video/fbdev/skeletonfb.c
+++ b/drivers/video/fbdev/skeletonfb.c
@@ -505,15 +505,15 @@ void xxxfb_fillrect(struct fb_info *p, const struct fb_fillrect *region)
}
/**
- * xxxfb_copyarea - OBSOLETE function.
+ * xxxfb_copyarea - REQUIRED function. Can use generic routines if
+ * non acclerated hardware and packed pixel based.
* Copies one area of the screen to another area.
- * Will be deleted in a future version
*
* @info: frame buffer structure that represents a single frame buffer
* @area: Structure providing the data to copy the framebuffer contents
* from one region to another.
*
- * This drawing operation copied a rectangular area from one area of the
+ * This drawing operation copies a rectangular area from one area of the
* screen to another area.
*/
void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
@@ -645,9 +645,9 @@ static const struct fb_ops xxxfb_ops = {
.fb_setcolreg = xxxfb_setcolreg,
.fb_blank = xxxfb_blank,
.fb_pan_display = xxxfb_pan_display,
- .fb_fillrect = xxxfb_fillrect, /* Needed !!! */
- .fb_copyarea = xxxfb_copyarea, /* Obsolete */
- .fb_imageblit = xxxfb_imageblit, /* Needed !!! */
+ .fb_fillrect = xxxfb_fillrect, /* Needed !!! */
+ .fb_copyarea = xxxfb_copyarea, /* Needed !!! */
+ .fb_imageblit = xxxfb_imageblit, /* Needed !!! */
.fb_cursor = xxxfb_cursor, /* Optional !!! */
.fb_sync = xxxfb_sync,
.fb_ioctl = xxxfb_ioctl,
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 265865610edc..bebb2eea6448 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1041,6 +1041,47 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
SETUP_FB(fb);
}
+#define ARTIST_VRAM_SIZE 0x000804
+#define ARTIST_VRAM_SRC 0x000808
+#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL 0x000a04
+#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE 0x000b00
+#define ARTIST_SRC_BM_ACCESS 0x018008
+#define ARTIST_FGCOLOR 0x018010
+#define ARTIST_BGCOLOR 0x018014
+#define ARTIST_BITMAP_OP 0x01801c
+
+static void
+stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+ if (rect->rop != ROP_COPY)
+ return cfb_fillrect(info, rect);
+
+ SETUP_HW(fb);
+
+ if (fb->info.var.bits_per_pixel == 32) {
+ WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+ } else {
+ WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+ }
+
+ WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP);
+ WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS);
+ NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000);
+ NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color);
+ WRITE_WORD(0, fb, ARTIST_BGCOLOR);
+
+ NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy));
+ SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height));
+
+ SETUP_FB(fb);
+}
+
static void __init
stifb_init_display(struct stifb_info *fb)
{
@@ -1105,7 +1146,7 @@ static const struct fb_ops stifb_ops = {
.owner = THIS_MODULE,
.fb_setcolreg = stifb_setcolreg,
.fb_blank = stifb_blank,
- .fb_fillrect = cfb_fillrect,
+ .fb_fillrect = stifb_fillrect,
.fb_copyarea = stifb_copyarea,
.fb_imageblit = cfb_imageblit,
};
@@ -1297,7 +1338,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
goto out_err0;
}
info->screen_size = fix->smem_len;
- info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
+ info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
info->pseudo_palette = &fb->pseudo_palette;
/* This has to be done !!! */
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 8061e8ef449f..121b9293c737 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -13,6 +13,17 @@ menuconfig VIRT_DRIVERS
if VIRT_DRIVERS
+config VMGENID
+ tristate "Virtual Machine Generation ID driver"
+ default y
+ depends on ACPI
+ help
+ Say Y here to use the hypervisor-provided Virtual Machine Generation ID
+ to reseed the RNG when the VM is cloned. This is highly recommended if
+ you intend to do any rollback / cloning / snapshotting of VMs.
+
+ Prefer Y to M so that this protection is activated very early.
+
config FSL_HV_MANAGER
tristate "Freescale hypervisor management driver"
depends on FSL_SOC
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index 3e272ea60cd9..108d0ffcc9aa 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -4,6 +4,7 @@
#
obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o
+obj-$(CONFIG_VMGENID) += vmgenid.o
obj-y += vboxguest/
obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
new file mode 100644
index 000000000000..0ae1a39f2e28
--- /dev/null
+++ b/drivers/virt/vmgenid.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * The "Virtual Machine Generation ID" is exposed via ACPI and changes when a
+ * virtual machine forks or is cloned. This driver exists for shepherding that
+ * information to random.c.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/random.h>
+
+ACPI_MODULE_NAME("vmgenid");
+
+enum { VMGENID_SIZE = 16 };
+
+struct vmgenid_state {
+ u8 *next_id;
+ u8 this_id[VMGENID_SIZE];
+};
+
+static int vmgenid_add(struct acpi_device *device)
+{
+ struct acpi_buffer parsed = { ACPI_ALLOCATE_BUFFER };
+ struct vmgenid_state *state;
+ union acpi_object *obj;
+ phys_addr_t phys_addr;
+ acpi_status status;
+ int ret = 0;
+
+ state = devm_kmalloc(&device->dev, sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ status = acpi_evaluate_object(device->handle, "ADDR", NULL, &parsed);
+ if (ACPI_FAILURE(status)) {
+ ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
+ return -ENODEV;
+ }
+ obj = parsed.pointer;
+ if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 2 ||
+ obj->package.elements[0].type != ACPI_TYPE_INTEGER ||
+ obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ phys_addr = (obj->package.elements[0].integer.value << 0) |
+ (obj->package.elements[1].integer.value << 32);
+ state->next_id = devm_memremap(&device->dev, phys_addr, VMGENID_SIZE, MEMREMAP_WB);
+ if (IS_ERR(state->next_id)) {
+ ret = PTR_ERR(state->next_id);
+ goto out;
+ }
+
+ memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+ add_device_randomness(state->this_id, sizeof(state->this_id));
+
+ device->driver_data = state;
+
+out:
+ ACPI_FREE(parsed.pointer);
+ return ret;
+}
+
+static void vmgenid_notify(struct acpi_device *device, u32 event)
+{
+ struct vmgenid_state *state = acpi_driver_data(device);
+ u8 old_id[VMGENID_SIZE];
+
+ memcpy(old_id, state->this_id, sizeof(old_id));
+ memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+ if (!memcmp(old_id, state->this_id, sizeof(old_id)))
+ return;
+ add_vmfork_randomness(state->this_id, sizeof(state->this_id));
+}
+
+static const struct acpi_device_id vmgenid_ids[] = {
+ { "VM_GEN_COUNTER", 0 },
+ { }
+};
+
+static struct acpi_driver vmgenid_driver = {
+ .name = "vmgenid",
+ .ids = vmgenid_ids,
+ .owner = THIS_MODULE,
+ .ops = {
+ .add = vmgenid_add,
+ .notify = vmgenid_notify
+ }
+};
+
+module_acpi_driver(vmgenid_driver);
+
+MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
+MODULE_DESCRIPTION("Virtual Machine Generation ID");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 34f80b7a8a64..492fc26f0b65 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -105,7 +105,6 @@ config VIRTIO_BALLOON
config VIRTIO_MEM
tristate "Virtio mem driver"
- default m
depends on X86_64
depends on VIRTIO
depends on MEMORY_HOTPLUG
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 00ac9db792a4..22f15f444f75 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
}
EXPORT_SYMBOL_GPL(virtio_add_status);
-int virtio_finalize_features(struct virtio_device *dev)
+/* Do some validation, then set FEATURES_OK */
+static int virtio_features_ok(struct virtio_device *dev)
{
- int ret = dev->config->finalize_features(dev);
unsigned status;
+ int ret;
might_sleep();
- if (ret)
- return ret;
ret = arch_has_restricted_virtio_memory_access();
if (ret) {
@@ -202,8 +201,23 @@ int virtio_finalize_features(struct virtio_device *dev)
}
return 0;
}
-EXPORT_SYMBOL_GPL(virtio_finalize_features);
+/**
+ * virtio_reset_device - quiesce device for removal
+ * @dev: the device to reset
+ *
+ * Prevents device from sending interrupts and accessing memory.
+ *
+ * Generally used for cleanup during driver / device removal.
+ *
+ * Once this has been invoked, caller must ensure that
+ * virtqueue_notify / virtqueue_kick are not in progress.
+ *
+ * Note: this guarantees that vq callbacks are not in progress, however caller
+ * is responsible for preventing access from other contexts, such as a system
+ * call/workqueue/bh. Invoking virtio_break_device then flushing any such
+ * contexts is one way to handle that.
+ * */
void virtio_reset_device(struct virtio_device *dev)
{
dev->config->reset(dev);
@@ -245,17 +259,6 @@ static int virtio_dev_probe(struct device *_d)
driver_features_legacy = driver_features;
}
- /*
- * Some devices detect legacy solely via F_VERSION_1. Write
- * F_VERSION_1 to force LE config space accesses before FEATURES_OK for
- * these when needed.
- */
- if (drv->validate && !virtio_legacy_is_little_endian()
- && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
- dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
- dev->config->finalize_features(dev);
- }
-
if (device_features & (1ULL << VIRTIO_F_VERSION_1))
dev->features = driver_features & device_features;
else
@@ -266,13 +269,26 @@ static int virtio_dev_probe(struct device *_d)
if (device_features & (1ULL << i))
__virtio_set_bit(dev, i);
+ err = dev->config->finalize_features(dev);
+ if (err)
+ goto err;
+
if (drv->validate) {
+ u64 features = dev->features;
+
err = drv->validate(dev);
if (err)
goto err;
+
+ /* Did validation change any features? Then write them again. */
+ if (features != dev->features) {
+ err = dev->config->finalize_features(dev);
+ if (err)
+ goto err;
+ }
}
- err = virtio_finalize_features(dev);
+ err = virtio_features_ok(dev);
if (err)
goto err;
@@ -496,7 +512,11 @@ int virtio_device_restore(struct virtio_device *dev)
/* We have a driver! */
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
- ret = virtio_finalize_features(dev);
+ ret = dev->config->finalize_features(dev);
+ if (ret)
+ goto err;
+
+ ret = virtio_features_ok(dev);
if (ret)
goto err;
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 38becd8d578c..e7d6b679596d 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -2476,13 +2476,10 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
/*
- * We want subblocks to span at least MAX_ORDER_NR_PAGES and
- * pageblock_nr_pages pages. This:
- * - Is required for now for alloc_contig_range() to work reliably -
- * it doesn't properly handle smaller granularity on ZONE_NORMAL.
+ * TODO: once alloc_contig_range() works reliably with pageblock
+ * granularity on ZONE_NORMAL, use pageblock_nr_pages instead.
*/
- sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES,
- pageblock_nr_pages) * PAGE_SIZE;
+ sb_size = PAGE_SIZE * MAX_ORDER_NR_PAGES;
sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
if (sb_size < memory_block_size_bytes() && !force_bbm) {
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 7767a7f0119b..76504559bc25 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
- return vdpa_set_features(vdpa, vdev->features, false);
+ return vdpa_set_features(vdpa, vdev->features);
}
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 3fa40c723e8e..edb0acd0b832 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -169,20 +169,14 @@ undo:
__del_gref(gref);
}
- /* It's possible for the target domain to map the just-allocated grant
- * references by blindly guessing their IDs; if this is done, then
- * __del_gref will leave them in the queue_gref list. They need to be
- * added to the global list so that we can free them when they are no
- * longer referenced.
- */
- if (unlikely(!list_empty(&queue_gref)))
- list_splice_tail(&queue_gref, &gref_list);
mutex_unlock(&gref_mutex);
return rc;
}
static void __del_gref(struct gntalloc_gref *gref)
{
+ unsigned long addr;
+
if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
uint8_t *tmp = kmap(gref->page);
tmp[gref->notify.pgoff] = 0;
@@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref)
gref->notify.flags = 0;
if (gref->gref_id) {
- if (gnttab_query_foreign_access(gref->gref_id))
- return;
-
- if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
- return;
-
- gnttab_free_grant_reference(gref->gref_id);
+ if (gref->page) {
+ addr = (unsigned long)page_to_virt(gref->page);
+ gnttab_end_foreign_access(gref->gref_id, 0, addr);
+ } else
+ gnttab_free_grant_reference(gref->gref_id);
}
gref_size--;
list_del(&gref->next_gref);
- if (gref->page)
- __free_page(gref->page);
-
kfree(gref);
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3729bea0c989..5c83d41766c8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -134,12 +134,9 @@ struct gnttab_ops {
*/
unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
/*
- * Query the status of a grant entry. Ref parameter is reference of
- * queried grant entry, return value is the status of queried entry.
- * Detailed status(writing/reading) can be gotten from the return value
- * by bit operations.
+ * Read the frame number related to a given grant reference.
*/
- int (*query_foreign_access)(grant_ref_t ref);
+ unsigned long (*read_frame)(grant_ref_t ref);
};
struct unmap_refs_callback_data {
@@ -284,22 +281,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
-static int gnttab_query_foreign_access_v1(grant_ref_t ref)
-{
- return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
-}
-
-static int gnttab_query_foreign_access_v2(grant_ref_t ref)
-{
- return grstatus[ref] & (GTF_reading|GTF_writing);
-}
-
-int gnttab_query_foreign_access(grant_ref_t ref)
-{
- return gnttab_interface->query_foreign_access(ref);
-}
-EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
-
static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
@@ -353,6 +334,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
+{
+ return gnttab_shared.v1[ref].frame;
+}
+
+static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
+{
+ return gnttab_shared.v2[ref].full_page.frame;
+}
+
struct deferred_entry {
struct list_head list;
grant_ref_t ref;
@@ -382,12 +373,9 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
put_free_entry(entry->ref);
- if (entry->page) {
- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
- entry->ref, page_to_pfn(entry->page));
- put_page(entry->page);
- } else
- pr_info("freeing g.e. %#x\n", entry->ref);
+ pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+ entry->ref, page_to_pfn(entry->page));
+ put_page(entry->page);
kfree(entry);
entry = NULL;
} else {
@@ -412,9 +400,18 @@ static void gnttab_handle_deferred(struct timer_list *unused)
static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
struct page *page)
{
- struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ struct deferred_entry *entry;
+ gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
const char *what = KERN_WARNING "leaking";
+ entry = kmalloc(sizeof(*entry), gfp);
+ if (!page) {
+ unsigned long gfn = gnttab_interface->read_frame(ref);
+
+ page = pfn_to_page(gfn_to_pfn(gfn));
+ get_page(page);
+ }
+
if (entry) {
unsigned long flags;
@@ -435,11 +432,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
what, ref, page ? page_to_pfn(page) : -1);
}
+int gnttab_try_end_foreign_access(grant_ref_t ref)
+{
+ int ret = _gnttab_end_foreign_access_ref(ref, 0);
+
+ if (ret)
+ put_free_entry(ref);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
+
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page)
{
- if (gnttab_end_foreign_access_ref(ref, readonly)) {
- put_free_entry(ref);
+ if (gnttab_try_end_foreign_access(ref)) {
if (page != 0)
put_page(virt_to_page(page));
} else
@@ -1417,7 +1424,7 @@ static const struct gnttab_ops gnttab_v1_ops = {
.update_entry = gnttab_update_entry_v1,
.end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
.end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1,
- .query_foreign_access = gnttab_query_foreign_access_v1,
+ .read_frame = gnttab_read_frame_v1,
};
static const struct gnttab_ops gnttab_v2_ops = {
@@ -1429,7 +1436,7 @@ static const struct gnttab_ops gnttab_v2_ops = {
.update_entry = gnttab_update_entry_v2,
.end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
.end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
- .query_foreign_access = gnttab_query_foreign_access_v2,
+ .read_frame = gnttab_read_frame_v2,
};
static bool gnttab_need_v2(void)
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 2c890f4f2cbc..72d4e3f193af 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -264,7 +264,7 @@ struct xen_device_domain_owner {
};
static DEFINE_SPINLOCK(dev_domain_list_spinlock);
-static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+static LIST_HEAD(dev_domain_list);
static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
{
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 3c9ae156b597..0ca351f30a6d 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map)
if (!map->active.ring)
return;
- free_pages((unsigned long)map->active.data.in,
- map->active.ring->ring_order);
+ free_pages_exact(map->active.data.in,
+ PAGE_SIZE << map->active.ring->ring_order);
free_page((unsigned long)map->active.ring);
}
@@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map)
goto out;
map->active.ring->ring_order = PVCALLS_RING_ORDER;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- PVCALLS_RING_ORDER);
+ bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER,
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes)
goto out;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index e8bed1cb76ba..df6890681231 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -379,7 +379,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
unsigned int nr_pages, grant_ref_t *grefs)
{
int err;
- int i, j;
+ unsigned int i;
+ grant_ref_t gref_head;
+
+ err = gnttab_alloc_grant_references(nr_pages, &gref_head);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "granting access to ring page");
+ return err;
+ }
for (i = 0; i < nr_pages; i++) {
unsigned long gfn;
@@ -389,23 +396,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
else
gfn = virt_to_gfn(vaddr);
- err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
- if (err < 0) {
- xenbus_dev_fatal(dev, err,
- "granting access to ring page");
- goto fail;
- }
- grefs[i] = err;
+ grefs[i] = gnttab_claim_grant_reference(&gref_head);
+ gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+ gfn, 0);
vaddr = vaddr + XEN_PAGE_SIZE;
}
return 0;
-
-fail:
- for (j = 0; j < i; j++)
- gnttab_end_foreign_access_ref(grefs[j], 0);
- return err;
}
EXPORT_SYMBOL_GPL(xenbus_grant_ring);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6aab046c98e2..79df61fe0e59 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -96,12 +96,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
dentry, dentry, from_kuid(&init_user_ns, uid),
any);
ret = NULL;
-
- if (d_inode(dentry))
- ret = v9fs_fid_find_inode(d_inode(dentry), uid);
-
/* we'll recheck under lock if there's anything to look in */
- if (!ret && dentry->d_fsdata) {
+ if (dentry->d_fsdata) {
struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
spin_lock(&dentry->d_lock);
@@ -113,6 +109,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
}
}
spin_unlock(&dentry->d_lock);
+ } else {
+ if (dentry->d_inode)
+ ret = v9fs_fid_find_inode(dentry->d_inode, uid);
}
return ret;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9a10e68c5f30..76956c9d2af9 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -158,18 +158,9 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
return 1;
}
-/**
- * v9fs_invalidate_page - Invalidate a page completely or partially
- * @page: The page to be invalidated
- * @offset: offset of the invalidated region
- * @length: length of the invalidated region
- */
-
-static void v9fs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length)
+static void v9fs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct folio *folio = page_folio(page);
-
folio_wait_fscache(folio);
}
@@ -249,16 +240,8 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
return retval;
}
-/**
- * v9fs_launder_page - Writeback a dirty page
- * @page: The page to be cleaned up
- *
- * Returns 0 on success.
- */
-
-static int v9fs_launder_page(struct page *page)
+static int v9fs_launder_folio(struct folio *folio)
{
- struct folio *folio = page_folio(page);
int retval;
if (folio_clear_dirty_for_io(folio)) {
@@ -376,25 +359,25 @@ out:
* Mark a page as having been made dirty and thus needing writeback. We also
* need to pin the cache object to write back to.
*/
-static int v9fs_set_page_dirty(struct page *page)
+static bool v9fs_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- struct v9fs_inode *v9inode = V9FS_I(page->mapping->host);
+ struct v9fs_inode *v9inode = V9FS_I(mapping->host);
- return fscache_set_page_dirty(page, v9fs_inode_cookie(v9inode));
+ return fscache_dirty_folio(mapping, folio, v9fs_inode_cookie(v9inode));
}
#else
-#define v9fs_set_page_dirty __set_page_dirty_nobuffers
+#define v9fs_dirty_folio filemap_dirty_folio
#endif
const struct address_space_operations v9fs_addr_operations = {
.readpage = v9fs_vfs_readpage,
.readahead = v9fs_vfs_readahead,
- .set_page_dirty = v9fs_set_page_dirty,
+ .dirty_folio = v9fs_dirty_folio,
.writepage = v9fs_vfs_writepage,
.write_begin = v9fs_write_begin,
.write_end = v9fs_write_end,
.releasepage = v9fs_release_page,
- .invalidatepage = v9fs_invalidate_page,
- .launder_page = v9fs_launder_page,
+ .invalidate_folio = v9fs_invalidate_folio,
+ .launder_folio = v9fs_launder_folio,
.direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2a10242c79c7..84c3cf7dffa5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -228,7 +228,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
{
struct v9fs_inode *v9inode;
- v9inode = kmem_cache_alloc(v9fs_inode_cache, GFP_KERNEL);
+ v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
if (!v9inode)
return NULL;
#ifdef CONFIG_9P_FSCACHE
diff --git a/fs/Kconfig b/fs/Kconfig
index 7a2b11c0b803..30b751c7f11a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -48,7 +48,7 @@ config FS_DAX
bool "File system based Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
- select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
+ depends on ZONE_DEVICE || FS_DAX_LIMITED
select FS_IOMAP
select DAX
help
@@ -344,7 +344,7 @@ config LOCKD
config LOCKD_V4
bool
- depends on NFSD_V3 || NFS_V3
+ depends on NFSD || NFS_V3
depends on FILE_LOCKING
default y
@@ -369,8 +369,8 @@ source "fs/ksmbd/Kconfig"
config SMBFS_COMMON
tristate
- default y if CIFS=y
- default m if CIFS=m
+ default y if CIFS=y || SMB_SERVER=y
+ default m if CIFS=m || SMB_SERVER=m
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4d5ae61580aa..21c6332fa785 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -28,6 +28,16 @@ config BINFMT_ELF
ld.so (check the file <file:Documentation/Changes> for location and
latest version).
+config BINFMT_ELF_KUNIT_TEST
+ bool "Build KUnit tests for ELF binary support" if !KUNIT_ALL_TESTS
+ depends on KUNIT=y && BINFMT_ELF=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the ELF loader KUnit tests, which try to gather
+ prior bug fixes into a regression test collection. This is really
+ only needed for debugging. Note that with CONFIG_COMPAT=y, the
+ compat_binfmt_elf KUnit test is also created.
+
config COMPAT_BINFMT_ELF
def_bool y
depends on COMPAT && BINFMT_ELF
@@ -36,6 +46,9 @@ config COMPAT_BINFMT_ELF
config ARCH_BINFMT_ELF_STATE
bool
+config ARCH_BINFMT_ELF_EXTRA_PHDRS
+ bool
+
config ARCH_HAVE_ELF_PROT
bool
diff --git a/fs/Makefile b/fs/Makefile
index dab324aea08f..208a74e0b00e 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -96,7 +96,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/
obj-$(CONFIG_NFSD) += nfsd/
obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
-obj-$(CONFIG_UNICODE) += unicode/
+obj-y += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/
obj-$(CONFIG_CIFS) += cifs/
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 5156821bfe6a..561bc748c04a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -73,7 +73,8 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
}
static const struct address_space_operations adfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = adfs_readpage,
.writepage = adfs_writepage,
.write_begin = adfs_write_begin,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index bdbd26e571ed..e8bfc38239cd 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -220,7 +220,7 @@ static struct kmem_cache *adfs_inode_cachep;
static struct inode *adfs_alloc_inode(struct super_block *sb)
{
struct adfs_inode_info *ei;
- ei = kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, adfs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 75ebd2b576ca..b3f81d84ff4c 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -453,7 +453,8 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations affs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
@@ -834,7 +835,8 @@ err_bh:
}
const struct address_space_operations affs_aops_ofs = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = affs_readpage_ofs,
//.writepage = affs_writepage_ofs,
.write_begin = affs_write_begin_ofs,
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c609005a9eaa..4c5f30a83336 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -100,7 +100,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
{
struct affs_inode_info *i;
- i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
+ i = alloc_inode_sb(sb, affs_inode_cachep, GFP_KERNEL);
if (!i)
return NULL;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index da9b4f8577a1..932e61e28e5d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -42,10 +42,11 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags);
-static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
+static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length);
-static int afs_dir_set_page_dirty(struct page *page)
+static bool afs_dir_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
BUG(); /* This should never happen. */
}
@@ -73,9 +74,9 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
- .set_page_dirty = afs_dir_set_page_dirty,
+ .dirty_folio = afs_dir_dirty_folio,
.releasepage = afs_dir_releasepage,
- .invalidatepage = afs_dir_invalidatepage,
+ .invalidate_folio = afs_dir_invalidate_folio,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -2019,13 +2020,12 @@ static int afs_dir_releasepage(struct page *subpage, gfp_t gfp_flags)
/*
* Invalidate part or all of a folio.
*/
-static void afs_dir_invalidatepage(struct page *subpage, unsigned int offset,
- unsigned int length)
+static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct folio *folio = page_folio(subpage);
struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
- _enter("{%lu},%u,%u", folio_index(folio), offset, length);
+ _enter("{%lu},%zu,%zu", folio->index, offset, length);
BUG_ON(!folio_test_locked(folio));
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 720818a7c166..0f9fdb284a20 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -21,8 +21,8 @@
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
static int afs_readpage(struct file *file, struct page *page);
static int afs_symlink_readpage(struct file *file, struct page *page);
-static void afs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
+static void afs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length);
static int afs_releasepage(struct page *page, gfp_t gfp_flags);
static void afs_readahead(struct readahead_control *ractl);
@@ -54,10 +54,10 @@ const struct inode_operations afs_file_inode_operations = {
const struct address_space_operations afs_file_aops = {
.readpage = afs_readpage,
.readahead = afs_readahead,
- .set_page_dirty = afs_set_page_dirty,
- .launder_page = afs_launder_page,
+ .dirty_folio = afs_dirty_folio,
+ .launder_folio = afs_launder_folio,
.releasepage = afs_releasepage,
- .invalidatepage = afs_invalidatepage,
+ .invalidate_folio = afs_invalidate_folio,
.write_begin = afs_write_begin,
.write_end = afs_write_end,
.writepage = afs_writepage,
@@ -67,7 +67,7 @@ const struct address_space_operations afs_file_aops = {
const struct address_space_operations afs_symlink_aops = {
.readpage = afs_symlink_readpage,
.releasepage = afs_releasepage,
- .invalidatepage = afs_invalidatepage,
+ .invalidate_folio = afs_invalidate_folio,
};
static const struct vm_operations_struct afs_vm_ops = {
@@ -427,8 +427,8 @@ int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
* Adjust the dirty region of the page on truncation or full invalidation,
* getting rid of the markers altogether if the region is entirely invalidated.
*/
-static void afs_invalidate_dirty(struct folio *folio, unsigned int offset,
- unsigned int length)
+static void afs_invalidate_dirty(struct folio *folio, size_t offset,
+ size_t length)
{
struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
unsigned long priv;
@@ -485,16 +485,14 @@ full_invalidate:
* - release a page and clean up its private data if offset is 0 (indicating
* the entire page)
*/
-static void afs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void afs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct folio *folio = page_folio(page);
-
- _enter("{%lu},%u,%u", folio_index(folio), offset, length);
+ _enter("{%lu},%zu,%zu", folio->index, offset, length);
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
- if (PagePrivate(page))
+ if (folio_get_private(folio))
afs_invalidate_dirty(folio, offset, length);
folio_wait_fscache(folio);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b6f02321fc09..dc5032e10244 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1521,9 +1521,9 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
* write.c
*/
#ifdef CONFIG_AFS_FSCACHE
-extern int afs_set_page_dirty(struct page *);
+bool afs_dirty_folio(struct address_space *, struct folio *);
#else
-#define afs_set_page_dirty __set_page_dirty_nobuffers
+#define afs_dirty_folio filemap_dirty_folio
#endif
extern int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -1537,7 +1537,7 @@ extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
-extern int afs_launder_page(struct page *);
+int afs_launder_folio(struct folio *);
/*
* xattr.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 5ec9fd97eccc..7592c0f469f1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -679,7 +679,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
{
struct afs_vnode *vnode;
- vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
+ vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL);
if (!vnode)
return NULL;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 5e9157d0da29..e1c17081d18e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -22,9 +22,10 @@ static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len
* Mark a page as having been made dirty and thus needing writeback. We also
* need to pin the cache object to write back to.
*/
-int afs_set_page_dirty(struct page *page)
+bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- return fscache_set_page_dirty(page, afs_vnode_cache(AFS_FS_I(page->mapping->host)));
+ return fscache_dirty_folio(mapping, folio,
+ afs_vnode_cache(AFS_FS_I(mapping->host)));
}
static void afs_folio_start_fscache(bool caching, struct folio *folio)
{
@@ -703,7 +704,7 @@ static int afs_writepages_region(struct address_space *mapping,
struct folio *folio;
struct page *head_page;
ssize_t ret;
- int n;
+ int n, skips = 0;
_enter("%llx,%llx,", start, end);
@@ -754,8 +755,15 @@ static int afs_writepages_region(struct address_space *mapping,
#ifdef CONFIG_AFS_FSCACHE
folio_wait_fscache(folio);
#endif
+ } else {
+ start += folio_size(folio);
}
folio_put(folio);
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ if (skips >= 5 || need_resched())
+ break;
+ skips++;
+ }
continue;
}
@@ -972,9 +980,8 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
/*
* Clean up a page during invalidation.
*/
-int afs_launder_page(struct page *subpage)
+int afs_launder_folio(struct folio *folio)
{
- struct folio *folio = page_folio(subpage);
struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
struct iov_iter iter;
struct bio_vec bv[1];
@@ -982,7 +989,7 @@ int afs_launder_page(struct page *subpage)
unsigned int f, t;
int ret = 0;
- _enter("{%lx}", folio_index(folio));
+ _enter("{%lx}", folio->index);
priv = (unsigned long)folio_get_private(folio);
if (folio_clear_dirty_for_io(folio)) {
diff --git a/fs/aio.c b/fs/aio.c
index 4ceba13a7db0..d6b7160c2a77 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -478,7 +478,7 @@ out:
#endif
static const struct address_space_operations aio_ctx_aops = {
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
#if IS_ENABLED(CONFIG_MIGRATION)
.migratepage = aio_migratepage,
#endif
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index c1ba13d19024..b4b3567ac655 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb)
{
struct befs_inode_info *bi;
- bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL);
+ bi = alloc_inode_sb(sb, befs_inode_cachep, GFP_KERNEL);
if (!bi)
return NULL;
return &bi->vfs_inode;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 7f8544abf636..03139344568f 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -188,7 +188,8 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations bfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = bfs_readpage,
.writepage = bfs_writepage,
.write_begin = bfs_write_begin,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index fd691e4815c5..1926bec2c850 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -239,7 +239,7 @@ static struct kmem_cache *bfs_inode_cachep;
static struct inode *bfs_alloc_inode(struct super_block *sb)
{
struct bfs_inode_info *bi;
- bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL);
+ bi = alloc_inode_sb(sb, bfs_inode_cachep, GFP_KERNEL);
if (!bi)
return NULL;
return &bi->vfs_inode;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 605017eb9349..6556e13ed95f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -93,7 +93,7 @@ static int elf_core_dump(struct coredump_params *cprm);
#define ELF_CORE_EFLAGS 0
#endif
-#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
@@ -101,8 +101,10 @@ static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
+#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
+#endif
};
#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
@@ -170,8 +172,8 @@ static int padzero(unsigned long elf_bss)
static int
create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
- unsigned long load_addr, unsigned long interp_load_addr,
- unsigned long e_entry)
+ unsigned long interp_load_addr,
+ unsigned long e_entry, unsigned long phdr_addr)
{
struct mm_struct *mm = current->mm;
unsigned long p = bprm->p;
@@ -257,7 +259,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
- NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
+ NEW_AUX_ENT(AT_PHDR, phdr_addr);
NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
NEW_AUX_ENT(AT_BASE, interp_load_addr);
@@ -399,22 +401,21 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
-static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
+static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
{
- int i, first_idx = -1, last_idx = -1;
+ elf_addr_t min_addr = -1;
+ elf_addr_t max_addr = 0;
+ bool pt_load = false;
+ int i;
for (i = 0; i < nr; i++) {
- if (cmds[i].p_type == PT_LOAD) {
- last_idx = i;
- if (first_idx == -1)
- first_idx = i;
+ if (phdr[i].p_type == PT_LOAD) {
+ min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
+ max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
+ pt_load = true;
}
}
- if (first_idx == -1)
- return 0;
-
- return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
- ELF_PAGESTART(cmds[first_idx].p_vaddr);
+ return pt_load ? (max_addr - min_addr) : 0;
}
static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
@@ -823,8 +824,8 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL; /* to shut gcc up */
- unsigned long load_addr = 0, load_bias = 0;
- int load_addr_set = 0;
+ unsigned long load_bias = 0, phdr_addr = 0;
+ int first_pt_load = 1;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
@@ -1074,12 +1075,12 @@ out_free_interp:
vaddr = elf_ppnt->p_vaddr;
/*
- * The first time through the loop, load_addr_set is false:
+ * The first time through the loop, first_pt_load is true:
* layout will be calculated. Once set, use MAP_FIXED since
* we know we've already safely mapped the entire region with
* MAP_FIXED_NOREPLACE in the once-per-binary logic following.
*/
- if (load_addr_set) {
+ if (!first_pt_load) {
elf_flags |= MAP_FIXED;
} else if (elf_ex->e_type == ET_EXEC) {
/*
@@ -1117,7 +1118,7 @@ out_free_interp:
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
- if (alignment > ELF_MIN_ALIGN) {
+ if (interpreter || alignment > ELF_MIN_ALIGN) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
@@ -1135,14 +1136,25 @@ out_free_interp:
* is then page aligned.
*/
load_bias = ELF_PAGESTART(load_bias - vaddr);
- }
- /*
- * Calculate the entire size of the ELF mapping (total_size).
- * (Note that load_addr_set is set to true later once the
- * initial mapping is performed.)
- */
- if (!load_addr_set) {
+ /*
+ * Calculate the entire size of the ELF mapping
+ * (total_size), used for the initial mapping,
+ * due to load_addr_set which is set to true later
+ * once the initial mapping is performed.
+ *
+ * Note that this is only sensible when the LOAD
+ * segments are contiguous (or overlapping). If
+ * used for LOADs that are far apart, this would
+ * cause the holes between LOADs to be mapped,
+ * running the risk of having the mapping fail,
+ * as it would be larger than the ELF file itself.
+ *
+ * As a result, only ET_DYN does this, since
+ * some ET_EXEC (e.g. ia64) may have large virtual
+ * memory holes between LOADs.
+ *
+ */
total_size = total_mapping_size(elf_phdata,
elf_ex->e_phnum);
if (!total_size) {
@@ -1159,16 +1171,25 @@ out_free_interp:
goto out_free_dentry;
}
- if (!load_addr_set) {
- load_addr_set = 1;
- load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
+ if (first_pt_load) {
+ first_pt_load = 0;
if (elf_ex->e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
- load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
+
+ /*
+ * Figure out which segment in the file contains the Program
+ * Header table, and map to the associated memory address.
+ */
+ if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
+ elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
+ phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
+ elf_ppnt->p_vaddr;
+ }
+
k = elf_ppnt->p_vaddr;
if ((elf_ppnt->p_flags & PF_X) && k < start_code)
start_code = k;
@@ -1204,6 +1225,7 @@ out_free_interp:
}
e_entry = elf_ex->e_entry + load_bias;
+ phdr_addr += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
@@ -1267,8 +1289,8 @@ out_free_interp:
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- retval = create_elf_tables(bprm, elf_ex,
- load_addr, interp_load_addr, e_entry);
+ retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
+ e_entry, phdr_addr);
if (retval < 0)
goto out;
@@ -1619,17 +1641,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/
-static int fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n;
user_long_t *data;
user_long_t *start_end_ofs;
char *name_base, *name_curpos;
+ int i;
/* *Estimated* file count and total data size needed */
- count = mm->map_count;
+ count = cprm->vma_count;
if (count > UINT_MAX / 64)
return -EINVAL;
size = count * 64;
@@ -1651,11 +1672,12 @@ static int fill_files_note(struct memelfnote *note)
name_base = name_curpos = ((char *)data) + names_ofs;
remaining = size - names_ofs;
count = 0;
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *m = &cprm->vma_meta[i];
struct file *file;
const char *filename;
- file = vma->vm_file;
+ file = m->file;
if (!file)
continue;
filename = file_path(file, name_curpos, remaining);
@@ -1675,9 +1697,9 @@ static int fill_files_note(struct memelfnote *note)
memmove(name_curpos, filename, n);
name_curpos += n;
- *start_end_ofs++ = vma->vm_start;
- *start_end_ofs++ = vma->vm_end;
- *start_end_ofs++ = vma->vm_pgoff;
+ *start_end_ofs++ = m->start;
+ *start_end_ofs++ = m->end;
+ *start_end_ofs++ = m->pgoff;
count++;
}
@@ -1688,7 +1710,7 @@ static int fill_files_note(struct memelfnote *note)
* Count usually is less than mm->map_count,
* we need to move filenames down.
*/
- n = mm->map_count - count;
+ n = cprm->vma_count - count;
if (n != 0) {
unsigned shift_bytes = n * 3 * sizeof(data[0]);
memmove(name_base - shift_bytes, name_base,
@@ -1744,9 +1766,9 @@ static void do_thread_regset_writeback(struct task_struct *task,
static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset_view *view,
- long signr, size_t *total)
+ long signr, struct elf_note_info *info)
{
- unsigned int i;
+ unsigned int note_iter, view_iter;
/*
* NT_PRSTATUS is the one special case, because the regset data
@@ -1760,17 +1782,17 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
PRSTATUS_SIZE, &t->prstatus);
- *total += notesize(&t->notes[0]);
+ info->size += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
/*
* Each other regset might generate a note too. For each regset
- * that has no core_note_type or is inactive, we leave t->notes[i]
- * all zero and we'll know to skip writing it later.
+ * that has no core_note_type or is inactive, skip it.
*/
- for (i = 1; i < view->n; ++i) {
- const struct user_regset *regset = &view->regsets[i];
+ note_iter = 1;
+ for (view_iter = 1; view_iter < view->n; ++view_iter) {
+ const struct user_regset *regset = &view->regsets[view_iter];
int note_type = regset->core_note_type;
bool is_fpreg = note_type == NT_PRFPREG;
void *data;
@@ -1786,13 +1808,17 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
if (ret < 0)
continue;
+ if (WARN_ON_ONCE(note_iter >= info->thread_notes))
+ break;
+
if (is_fpreg)
SET_PR_FPVALID(&t->prstatus);
- fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
+ fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
note_type, ret, data);
- *total += notesize(&t->notes[i]);
+ info->size += notesize(&t->notes[note_iter]);
+ note_iter++;
}
return 1;
@@ -1800,7 +1826,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- const kernel_siginfo_t *siginfo, struct pt_regs *regs)
+ struct coredump_params *cprm)
{
struct task_struct *dump_task = current;
const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1872,7 +1898,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
* Now fill in each thread's information.
*/
for (t = info->thread; t != NULL; t = t->next)
- if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
+ if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
return 0;
/*
@@ -1881,13 +1907,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
info->size += notesize(&info->psinfo);
- fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
+ fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
info->size += notesize(&info->signote);
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
- if (fill_files_note(&info->files) == 0)
+ if (fill_files_note(&info->files, cprm) == 0)
info->size += notesize(&info->files);
return 1;
@@ -2029,7 +2055,7 @@ static int elf_note_info_init(struct elf_note_info *info)
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- const kernel_siginfo_t *siginfo, struct pt_regs *regs)
+ struct coredump_params *cprm)
{
struct core_thread *ct;
struct elf_thread_status *ets;
@@ -2050,13 +2076,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
list_for_each_entry(ets, &info->thread_list, list) {
int sz;
- sz = elf_dump_thread_status(siginfo->si_signo, ets);
+ sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
info->thread_status_size += sz;
}
/* now collect the dump for the current */
memset(info->prstatus, 0, sizeof(*info->prstatus));
- fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
- elf_core_copy_regs(&info->prstatus->pr_reg, regs);
+ fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
+ elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
/* Set up header */
fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
@@ -2072,18 +2098,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
sizeof(*info->psinfo), info->psinfo);
- fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
+ fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
fill_auxv_note(info->notes + 3, current->mm);
info->numnote = 4;
- if (fill_files_note(info->notes + info->numnote) == 0) {
+ if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
info->notes_files = info->notes + info->numnote;
info->numnote++;
}
/* Try to dump the FPU. */
- info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
- info->fpu);
+ info->prstatus->pr_fpvalid =
+ elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
if (info->prstatus->pr_fpvalid)
fill_note(info->notes + info->numnote++,
"CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
@@ -2169,8 +2195,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
- int vma_count, segs, i;
- size_t vma_data_size;
+ int segs, i;
struct elfhdr elf;
loff_t offset = 0, dataoff;
struct elf_note_info info = { };
@@ -2178,16 +2203,12 @@ static int elf_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
- struct core_vma_metadata *vma_meta;
-
- if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
- return 0;
/*
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
- segs = vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@@ -2201,7 +2222,7 @@ static int elf_core_dump(struct coredump_params *cprm)
* Collect all the non-memory information about the process for the
* notes. This also sets up the file header.
*/
- if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
+ if (!fill_note_info(&elf, e_phnum, &info, cprm))
goto end_coredump;
has_dumped = 1;
@@ -2226,7 +2247,7 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- offset += vma_data_size;
+ offset += cprm->vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@@ -2246,8 +2267,8 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Write program headers for segments dump */
- for (i = 0; i < vma_count; i++) {
- struct core_vma_metadata *meta = vma_meta + i;
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *meta = cprm->vma_meta + i;
struct elf_phdr phdr;
phdr.p_type = PT_LOAD;
@@ -2284,8 +2305,8 @@ static int elf_core_dump(struct coredump_params *cprm)
/* Align to page */
dump_skip_to(cprm, dataoff);
- for (i = 0; i < vma_count; i++) {
- struct core_vma_metadata *meta = vma_meta + i;
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *meta = cprm->vma_meta + i;
if (!dump_user_range(cprm, meta->start, meta->dump_size))
goto end_coredump;
@@ -2302,7 +2323,6 @@ static int elf_core_dump(struct coredump_params *cprm)
end_coredump:
free_note_info(&info);
kfree(shdr4extnum);
- kvfree(vma_meta);
kfree(phdr4note);
return has_dumped;
}
@@ -2324,3 +2344,7 @@ static void __exit exit_elf_binfmt(void)
core_initcall(init_elf_binfmt);
module_exit(exit_elf_binfmt);
MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
+#include "binfmt_elf_test.c"
+#endif
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c6f588dc4a9d..08d0c8797828 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -83,8 +83,8 @@ static struct linux_binfmt elf_fdpic_format = {
.load_binary = load_elf_fdpic_binary,
#ifdef CONFIG_ELF_CORE
.core_dump = elf_fdpic_core_dump,
-#endif
.min_coredump = ELF_EXEC_PAGESIZE,
+#endif
};
static int __init init_elf_fdpic_binfmt(void)
@@ -1465,7 +1465,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
static int elf_fdpic_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
- int vma_count, segs;
+ int segs;
int i;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff;
@@ -1480,8 +1480,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
elf_addr_t e_shoff;
struct core_thread *ct;
struct elf_thread_status *tmp;
- struct core_vma_metadata *vma_meta = NULL;
- size_t vma_data_size;
/* alloc memory for large data structures: too large to be on stack */
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
@@ -1491,9 +1489,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!psinfo)
goto end_coredump;
- if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
- goto end_coredump;
-
for (ct = current->signal->core_state->dumper.next;
ct; ct = ct->next) {
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
@@ -1513,7 +1508,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list;
thread_list = tmp;
- segs = vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@@ -1558,7 +1553,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
/* Page-align dumped data */
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- offset += vma_data_size;
+ offset += cprm->vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@@ -1578,8 +1573,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* write program headers for segments dump */
- for (i = 0; i < vma_count; i++) {
- struct core_vma_metadata *meta = vma_meta + i;
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *meta = cprm->vma_meta + i;
struct elf_phdr phdr;
size_t sz;
@@ -1628,7 +1623,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
dump_skip_to(cprm, dataoff);
- if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
+ if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count))
goto end_coredump;
if (!elf_core_write_extra_data(cprm))
@@ -1652,7 +1647,6 @@ end_coredump:
thread_list = thread_list->next;
kfree(tmp);
}
- kvfree(vma_meta);
kfree(phdr4note);
kfree(elf);
kfree(psinfo);
diff --git a/fs/binfmt_elf_test.c b/fs/binfmt_elf_test.c
new file mode 100644
index 000000000000..11d734fec366
--- /dev/null
+++ b/fs/binfmt_elf_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+
+static void total_mapping_size_test(struct kunit *test)
+{
+ struct elf_phdr empty[] = {
+ { .p_type = PT_LOAD, .p_vaddr = 0, .p_memsz = 0, },
+ { .p_type = PT_INTERP, .p_vaddr = 10, .p_memsz = 999999, },
+ };
+ /*
+ * readelf -lW /bin/mount | grep '^ .*0x0' | awk '{print "\t\t{ .p_type = PT_" \
+ * $1 ", .p_vaddr = " $3 ", .p_memsz = " $6 ", },"}'
+ */
+ struct elf_phdr mount[] = {
+ { .p_type = PT_PHDR, .p_vaddr = 0x00000040, .p_memsz = 0x0002d8, },
+ { .p_type = PT_INTERP, .p_vaddr = 0x00000318, .p_memsz = 0x00001c, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x00000000, .p_memsz = 0x0033a8, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x00004000, .p_memsz = 0x005c91, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x0000a000, .p_memsz = 0x0022f8, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x0000d330, .p_memsz = 0x000d40, },
+ { .p_type = PT_DYNAMIC, .p_vaddr = 0x0000d928, .p_memsz = 0x000200, },
+ { .p_type = PT_NOTE, .p_vaddr = 0x00000338, .p_memsz = 0x000030, },
+ { .p_type = PT_NOTE, .p_vaddr = 0x00000368, .p_memsz = 0x000044, },
+ { .p_type = PT_GNU_PROPERTY, .p_vaddr = 0x00000338, .p_memsz = 0x000030, },
+ { .p_type = PT_GNU_EH_FRAME, .p_vaddr = 0x0000b490, .p_memsz = 0x0001ec, },
+ { .p_type = PT_GNU_STACK, .p_vaddr = 0x00000000, .p_memsz = 0x000000, },
+ { .p_type = PT_GNU_RELRO, .p_vaddr = 0x0000d330, .p_memsz = 0x000cd0, },
+ };
+ size_t mount_size = 0xE070;
+ /* https://lore.kernel.org/linux-fsdevel/YfF18Dy85mCntXrx@fractal.localdomain */
+ struct elf_phdr unordered[] = {
+ { .p_type = PT_LOAD, .p_vaddr = 0x00000000, .p_memsz = 0x0033a8, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x0000d330, .p_memsz = 0x000d40, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x00004000, .p_memsz = 0x005c91, },
+ { .p_type = PT_LOAD, .p_vaddr = 0x0000a000, .p_memsz = 0x0022f8, },
+ };
+
+ /* No headers, no size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(NULL, 0), 0);
+ KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 0), 0);
+ /* Empty headers, no size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 1), 0);
+ /* No PT_LOAD headers, no size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(&empty[1], 1), 0);
+ /* Empty PT_LOAD and non-PT_LOAD headers, no size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 2), 0);
+
+ /* Normal set of PT_LOADS, and expected size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(mount, ARRAY_SIZE(mount)), mount_size);
+ /* Unordered PT_LOADs result in same size. */
+ KUNIT_EXPECT_EQ(test, total_mapping_size(unordered, ARRAY_SIZE(unordered)), mount_size);
+}
+
+static struct kunit_case binfmt_elf_test_cases[] = {
+ KUNIT_CASE(total_mapping_size_test),
+ {},
+};
+
+static struct kunit_suite binfmt_elf_test_suite = {
+ .name = KBUILD_MODNAME,
+ .test_cases = binfmt_elf_test_cases,
+};
+
+kunit_test_suite(binfmt_elf_test_suite);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5d776f80ee50..626898150011 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -37,6 +37,7 @@
#include <linux/flat.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/coredump.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
@@ -97,13 +98,17 @@ static int load_flat_shared_library(int id, struct lib_info *p);
#endif
static int load_flat_binary(struct linux_binprm *);
+#ifdef CONFIG_COREDUMP
static int flat_core_dump(struct coredump_params *cprm);
+#endif
static struct linux_binfmt flat_format = {
.module = THIS_MODULE,
.load_binary = load_flat_binary,
+#ifdef CONFIG_COREDUMP
.core_dump = flat_core_dump,
.min_coredump = PAGE_SIZE
+#endif
};
/****************************************************************************/
@@ -112,12 +117,14 @@ static struct linux_binfmt flat_format = {
* Currently only a stub-function.
*/
+#ifdef CONFIG_COREDUMP
static int flat_core_dump(struct coredump_params *cprm)
{
pr_warn("Process %s:%d received signr %d and should have core dumped\n",
current->comm, current->pid, cprm->siginfo->si_signo);
return 1;
}
+#endif
/****************************************************************************/
/*
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ddea6acbddde..e1eae7ea823a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -822,11 +822,7 @@ static int __init init_misc_binfmt(void)
int err = register_filesystem(&bm_fs_type);
if (!err)
insert_binfmt(&misc_format);
- if (!register_sysctl_mount_point("fs/binfmt_misc")) {
- pr_warn("Failed to create fs/binfmt_misc sysctl mount point");
- return -ENOMEM;
- }
- return 0;
+ return err;
}
static void __exit exit_misc_binfmt(void)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index c9ee579bc5a6..ebc392ea1d74 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
- } else if (!extent_buffer_uptodate(eb)) {
+ }
+ if (!extent_buffer_uptodate(eb)) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
}
+
if (lock)
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
@@ -1335,7 +1337,8 @@ again:
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
- } else if (!extent_buffer_uptodate(eb)) {
+ }
+ if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 1db24e6d6d90..c22d287e020b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
{
if (refcount_dec_and_test(&cache->refs)) {
WARN_ON(cache->pinned > 0);
- WARN_ON(cache->reserved > 0);
+ /*
+ * If there was a failure to cleanup a log tree, very likely due
+ * to an IO failure on a writeback attempt of one or more of its
+ * extent buffers, we could not do proper (and cheap) unaccounting
+ * of their reserved space, so don't warn on reserved > 0 in that
+ * case.
+ */
+ if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
+ WARN_ON(cache->reserved > 0);
/*
* A block_group shouldn't be on the discard_list anymore.
@@ -1513,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
- if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
+ sb_start_write(fs_info->sb);
+
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
+ sb_end_write(fs_info->sb);
return;
+ }
/*
* Long running balances can keep us blocked here for eternity, so
@@ -1522,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
+ sb_end_write(fs_info->sb);
return;
}
@@ -1596,6 +1610,7 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
+ sb_end_write(fs_info->sb);
}
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@@ -1997,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->length = key->offset;
cache->used = btrfs_stack_block_group_used(bgi);
cache->flags = btrfs_stack_block_group_flags(bgi);
+ cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
set_free_space_tree_thresholds(cache);
@@ -2279,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ block_group->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@@ -2435,6 +2451,27 @@ next:
btrfs_trans_release_chunk_metadata(trans);
}
+/*
+ * For extent tree v2 we use the block_group_item->chunk_offset to point at our
+ * global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
+ */
+static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
+{
+ u64 div = SZ_1G;
+ u64 index;
+
+ if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+ return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+
+ /* If we have a smaller fs index based on 128MiB. */
+ if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
+ div = SZ_128M;
+
+ offset = div64_u64(offset, div);
+ div64_u64_rem(offset, fs_info->nr_global_roots, &index);
+ return index;
+}
+
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 bytes_used, u64 type,
u64 chunk_offset, u64 size)
@@ -2455,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
+ cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
+
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1;
@@ -2544,6 +2583,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
int ret;
bool dirty_bg_running;
+ /*
+ * This can only happen when we are doing read-only scrub on read-only
+ * mount.
+ * In that case we should not start a new transaction on read-only fs.
+ * Thus here we skip all chunk allocations.
+ */
+ if (sb_rdonly(fs_info->sb)) {
+ mutex_lock(&fs_info->ro_block_group_mutex);
+ ret = inc_block_group_ro(cache, 0);
+ mutex_unlock(&fs_info->ro_block_group_mutex);
+ return ret;
+ }
+
do {
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
@@ -2671,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
btrfs_set_stack_block_group_used(&bgi, cache->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(leaf);
@@ -3974,9 +4026,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
* important and indicates a real bug if this happens.
*/
if (WARN_ON(space_info->bytes_pinned > 0 ||
- space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0))
btrfs_dump_space_info(info, space_info, 0, 0);
+
+ /*
+ * If there was a failure to cleanup a log tree, very likely due
+ * to an IO failure on a writeback attempt of one or more of its
+ * extent buffers, we could not do proper (and cheap) unaccounting
+ * of their reserved space, so don't warn on bytes_reserved > 0 in
+ * that case.
+ */
+ if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
+ if (WARN_ON(space_info->bytes_reserved > 0))
+ btrfs_dump_space_info(info, space_info, 0, 0);
+ }
+
WARN_ON(space_info->reclaim_size > 0);
list_del(&space_info->list);
btrfs_sysfs_remove_space_info(space_info);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 5878b7ce3b78..93aabc68bb6a 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -68,6 +68,7 @@ struct btrfs_block_group {
u64 bytes_super;
u64 flags;
u64 cache_generation;
+ u64 global_root_id;
/*
* If the free space extent count exceeds this number, convert the block
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b3e46aabc3d8..47e72d72f7d0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -14,6 +14,13 @@
#include "delayed-inode.h"
/*
+ * Since we search a directory based on f_pos (struct dir_context::pos) we have
+ * to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
+ * everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
+ */
+#define BTRFS_DIR_START_INDEX 2
+
+/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
@@ -173,8 +180,9 @@ struct btrfs_inode {
u64 disk_i_size;
/*
- * if this is a directory then index_cnt is the counter for the index
- * number for new files that are created
+ * If this is a directory then index_cnt is the counter for the index
+ * number for new files that are created. For an empty directory, this
+ * must be initialized to BTRFS_DIR_START_INDEX.
*/
u64 index_cnt;
@@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
spin_unlock(&inode->lock);
}
+/*
+ * Should be called while holding the inode's VFS lock in exclusive mode or in a
+ * context where no one else can access the inode concurrently (during inode
+ * creation or when loading an inode from disk).
+ */
+static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
+{
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ /*
+ * The inode may have been part of a reflink operation in the last
+ * transaction that modified it, and then a fsync has reset the
+ * last_reflink_trans to avoid subsequent fsyncs in the same
+ * transaction to do unnecessary work. So update last_reflink_trans
+ * to the last_trans value (we have to be pessimistic and assume a
+ * reflink happened).
+ *
+ * The ->last_trans is protected by the inode's spinlock and we can
+ * have a concurrent ordered extent completion update it. Also set
+ * last_reflink_trans to ->last_trans only if the former is less than
+ * the later, because we can be called in a context where
+ * last_reflink_trans was set to the current transaction generation
+ * while ->last_trans was not yet updated in the current transaction,
+ * and therefore has a lower value.
+ */
+ spin_lock(&inode->lock);
+ if (inode->last_reflink_trans < inode->last_trans)
+ inode->last_reflink_trans = inode->last_trans;
+ spin_unlock(&inode->lock);
+}
+
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
bool ret = false;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7e9f90fa0388..abac86a75840 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -78,7 +78,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mutex.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/string.h>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 71e5b2e9a1ba..be476f094300 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
bi_size += bvec->bv_len;
if (bio->bi_status)
- cb->errors = 1;
+ cb->status = bio->bi_status;
ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
@@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
return last_io;
}
-static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
+static void finish_compressed_bio_read(struct compressed_bio *cb)
{
unsigned int index;
struct page *page;
@@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
}
/* Do io completion on the original bio */
- if (cb->errors) {
- bio_io_error(cb->orig_bio);
+ if (cb->status != BLK_STS_OK) {
+ cb->orig_bio->bi_status = cb->status;
+ bio_endio(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
- ASSERT(bio);
- ASSERT(!bio->bi_status);
/*
* We have verified the checksum already, set page checked so
* the end_io handlers know about it
*/
- ASSERT(!bio_flagged(bio, BIO_CLONED));
+ ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset;
@@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
* Some IO in this cb have failed, just skip checksum as there
* is no way it could be correct.
*/
- if (cb->errors == 1)
+ if (cb->status != BLK_STS_OK)
goto csum_failed;
inode = cb->inode;
@@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)
csum_failed:
if (ret)
- cb->errors = 1;
- finish_compressed_bio_read(cb, bio);
+ cb->status = errno_to_blk_status(ret);
+ finish_compressed_bio_read(cb);
out:
bio_put(bio);
}
@@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
+ const int errno = blk_status_to_errno(cb->status);
int i;
int ret;
- if (cb->errors)
- mapping_set_error(inode->i_mapping, -EIO);
+ if (errno)
+ mapping_set_error(inode->i_mapping, errno);
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
@@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
continue;
}
for (i = 0; i < ret; i++) {
- if (cb->errors)
+ if (errno)
SetPageError(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
cb->start, cb->len);
@@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
*/
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
- !cb->errors);
+ cb->status == BLK_STS_OK);
- end_compressed_writeback(inode, cb);
+ if (cb->writeback)
+ end_compressed_writeback(inode, cb);
/* Note, our inode could be gone now */
/*
@@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
unsigned int write_flags,
- struct cgroup_subsys_state *blkcg_css)
+ struct cgroup_subsys_state *blkcg_css,
+ bool writeback)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
@@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (!cb)
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
- cb->errors = 0;
+ cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
+ cb->writeback = writeback;
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
@@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (submit) {
if (!skip_sum) {
- ret = btrfs_csum_one_bio(inode, bio, start, 1);
+ ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret)
goto finish_cb;
}
@@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- blk_status_t ret = BLK_STS_RESOURCE;
+ blk_status_t ret;
int faili = 0;
u8 *sums;
@@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
- if (!em)
- return BLK_STS_IOERR;
+ if (!em) {
+ ret = BLK_STS_IOERR;
+ goto out;
+ }
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
- if (!cb)
+ if (!cb) {
+ ret = BLK_STS_RESOURCE;
goto out;
+ }
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
- cb->errors = 0;
+ cb->status = BLK_STS_OK;
cb->inode = inode;
cb->mirror_num = mirror_num;
sums = cb->sums;
@@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS);
- if (!cb->compressed_pages)
+ if (!cb->compressed_pages) {
+ ret = BLK_STS_RESOURCE;
goto fail1;
+ }
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
@@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = NULL;
}
}
- return 0;
+ return BLK_STS_OK;
fail2:
while (faili >= 0) {
@@ -951,6 +960,8 @@ fail1:
kfree(cb);
out:
free_extent_map(em);
+ bio->bi_status = ret;
+ bio_endio(bio);
return ret;
finish_cb:
if (comp_bio) {
@@ -970,7 +981,7 @@ finish_cb:
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
- finish_compressed_bio_read(cb, NULL);
+ finish_compressed_bio_read(cb);
return ret;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 56eef0821e3e..ac5b20731d2a 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -22,6 +22,8 @@ struct btrfs_inode;
/* Maximum length of compressed data stored on disk */
#define BTRFS_MAX_COMPRESSED (SZ_128K)
+static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
+
/* Maximum size of data before compression */
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
@@ -52,8 +54,11 @@ struct compressed_bio {
/* The compression algorithm for this bio */
u8 compress_type;
+ /* Whether this is a write for writeback. */
+ bool writeback;
+
/* IO errors */
- u8 errors;
+ blk_status_t status;
int mirror_num;
/* for reads, this is the bio we are copying the data into */
@@ -95,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
unsigned int write_flags,
- struct cgroup_subsys_state *blkcg_css);
+ struct cgroup_subsys_state *blkcg_css,
+ bool writeback);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a7db3f6f1b7b..0eecf98d0abb 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
btrfs_header_owner(parent),
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
- if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb))
+ return eb;
+ if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
- eb = ERR_PTR(-EIO);
+ return ERR_PTR(-EIO);
}
return eb;
@@ -1436,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
- if (!ret) {
- *eb_ret = tmp;
- return 0;
+ if (ret) {
+ free_extent_buffer(tmp);
+ btrfs_release_path(p);
+ return -EIO;
}
- free_extent_buffer(tmp);
- btrfs_release_path(p);
- return -EIO;
+ *eb_ret = tmp;
+ return 0;
}
/*
@@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
gen, parent_level - 1, &first_key);
- if (!IS_ERR(tmp)) {
- /*
- * If the read above didn't mark this buffer up to date,
- * it will never end up being up to date. Set ret to EIO now
- * and give up so that our caller doesn't loop forever
- * on our EAGAINs.
- */
- if (!extent_buffer_uptodate(tmp))
- ret = -EIO;
- free_extent_buffer(tmp);
- } else {
- ret = PTR_ERR(tmp);
+ if (IS_ERR(tmp)) {
+ btrfs_release_path(p);
+ return PTR_ERR(tmp);
}
+ /*
+ * If the read above didn't mark this buffer up to date,
+ * it will never end up being up to date. Set ret to EIO now
+ * and give up so that our caller doesn't loop forever
+ * on our EAGAINs.
+ */
+ if (!extent_buffer_uptodate(tmp))
+ ret = -EIO;
+ free_extent_buffer(tmp);
btrfs_release_path(p);
return ret;
@@ -2990,16 +2992,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (free_space < data_size)
goto out_unlock;
- /* cow and double check */
ret = btrfs_cow_block(trans, root, right, upper,
slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
if (ret)
goto out_unlock;
- free_space = btrfs_leaf_free_space(right);
- if (free_space < data_size)
- goto out_unlock;
-
left_nritems = btrfs_header_nritems(left);
if (left_nritems == 0)
goto out_unlock;
@@ -3224,7 +3221,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- /* cow and double check */
ret = btrfs_cow_block(trans, root, left,
path->nodes[1], slot - 1, &left,
BTRFS_NESTING_LEFT_COW);
@@ -3235,12 +3231,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- free_space = btrfs_leaf_free_space(left);
- if (free_space < data_size) {
- ret = 1;
- goto out;
- }
-
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
goto out;
@@ -4170,24 +4160,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
- u32 last_off;
- u32 dsize = 0;
int ret = 0;
int wret;
- int i;
u32 nritems;
leaf = path->nodes[0];
- last_off = btrfs_item_offset(leaf, slot + nr - 1);
-
- for (i = 0; i < nr; i++)
- dsize += btrfs_item_size(leaf, slot + i);
-
nritems = btrfs_header_nritems(leaf);
if (slot + nr != nritems) {
- int data_end = leaf_data_end(leaf);
+ const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
+ const int data_end = leaf_data_end(leaf);
struct btrfs_map_token token;
+ u32 dsize = 0;
+ int i;
+
+ for (i = 0; i < nr; i++)
+ dsize += btrfs_item_size(leaf, slot + i);
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize,
@@ -4227,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
fixup_low_keys(path, &disk_key, 1);
}
- /* delete the leaf if it is mostly empty */
+ /*
+ * Try to delete the leaf if it is mostly empty. We do this by
+ * trying to move all its items into its left and right neighbours.
+ * If we can't move all the items, then we don't delete it - it's
+ * not ideal, but future insertions might fill the leaf with more
+ * items, or items from other leaves might be moved later into our
+ * leaf due to deletions on those leaves.
+ */
if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
+ u32 min_push_space;
+
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
* for possible call to del_ptr below
*/
slot = path->slots[1];
atomic_inc(&leaf->refs);
-
- wret = push_leaf_left(trans, root, path, 1, 1,
- 1, (u32)-1);
+ /*
+ * We want to be able to at least push one item to the
+ * left neighbour leaf, and that's the first item.
+ */
+ min_push_space = sizeof(struct btrfs_item) +
+ btrfs_item_size(leaf, 0);
+ wret = push_leaf_left(trans, root, path, 0,
+ min_push_space, 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1,
- 1, 1, 0);
+ /*
+ * If we were not able to push all items from our
+ * leaf to its left neighbour, then attempt to
+ * either push all the remaining items to the
+ * right neighbour or none. There's no advantage
+ * in pushing only some items, instead of all, as
+ * it's pointless to end up with a leaf having
+ * too few items while the neighbours can be full
+ * or nearly full.
+ */
+ nritems = btrfs_header_nritems(leaf);
+ min_push_space = leaf_space_used(leaf, 0, nritems);
+ wret = push_leaf_right(trans, root, path, 0,
+ min_push_space, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b4a9b1c58d22..b7631b88426e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
struct btrfs_ordered_sum;
struct btrfs_ref;
struct btrfs_bio;
+struct btrfs_ioctl_encoded_io_args;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@@ -145,6 +146,11 @@ enum {
BTRFS_FS_STATE_DUMMY_FS_INFO,
BTRFS_FS_STATE_NO_CSUMS,
+
+ /* Indicates there was an error cleaning up a log tree. */
+ BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+
+ BTRFS_FS_STATE_COUNT
};
#define BTRFS_BACKREF_REV_MAX 256
@@ -271,8 +277,14 @@ struct btrfs_super_block {
/* the UUID written into btree blocks */
u8 metadata_uuid[BTRFS_FSID_SIZE];
+ /* Extent tree v2 */
+ __le64 block_group_root;
+ __le64 block_group_root_generation;
+ u8 block_group_root_level;
+
/* future expansion */
- __le64 reserved[28];
+ u8 reserved8[7];
+ __le64 reserved[25];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
@@ -297,6 +309,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
+ */
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
+ BTRFS_FEATURE_INCOMPAT_RAID56 | \
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
+ BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
+ BTRFS_FEATURE_INCOMPAT_ZONED | \
+ BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
+#else
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@@ -311,6 +343,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_ZONED)
+#endif
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@@ -599,6 +632,9 @@ enum {
/* Indicate that we want the transaction kthread to commit right now. */
BTRFS_FS_COMMIT_TRANS,
+ /* Indicate we have half completed snapshot deletions pending. */
+ BTRFS_FS_UNFINISHED_DROPS,
+
#if BITS_PER_LONG == 32
/* Indicate if we have error/warn message printed on 32bit systems */
BTRFS_FS_32BIT_ERROR,
@@ -630,6 +666,7 @@ struct btrfs_fs_info {
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
struct btrfs_root *data_reloc_root;
+ struct btrfs_root *block_group_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@@ -1024,6 +1061,8 @@ struct btrfs_fs_info {
spinlock_t relocation_bg_lock;
u64 data_reloc_bg;
+ u64 nr_global_roots;
+
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
@@ -1103,8 +1142,15 @@ enum {
BTRFS_ROOT_QGROUP_FLUSHING,
/* We started the orphan cleanup for this root. */
BTRFS_ROOT_ORPHAN_CLEANUP,
+ /* This root has a drop operation that was started previously. */
+ BTRFS_ROOT_UNFINISHED_DROP,
};
+static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
+{
+ clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
+}
+
/*
* Record swapped tree blocks of a subvolume tree for delayed subtree trace
* code. For detail check comment in fs/btrfs/qgroup.c.
@@ -1596,25 +1642,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
const type *s) \
{ \
- BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
u##bits val) \
{ \
- BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
const type *s) \
{ \
- BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
} \
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
type *s, u##bits val) \
{ \
- BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
}
@@ -1645,8 +1691,8 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s)
{
- BUILD_BUG_ON(sizeof(u64) !=
- sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+ static_assert(sizeof(u64) ==
+ sizeof(((struct btrfs_dev_item *)0))->total_bytes);
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
total_bytes));
}
@@ -1654,8 +1700,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s,
u64 val)
{
- BUILD_BUG_ON(sizeof(u64) !=
- sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+ static_assert(sizeof(u64) ==
+ sizeof(((struct btrfs_dev_item *)0))->total_bytes);
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
}
@@ -2315,6 +2361,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
+/*
+ * For extent tree v2 we overload the extent root with the block group root, as
+ * we will have multiple extent roots.
+ */
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
+ extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
+ extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
+ struct btrfs_root_backup, extent_root_level, 8);
+
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
@@ -2449,6 +2506,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
+ block_group_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
+ struct btrfs_super_block,
+ block_group_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
+ block_group_root_level, 8);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
@@ -2826,7 +2890,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ u64 disk_num_bytes);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
@@ -3142,7 +3207,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
- u64 file_start, int contig);
+ u64 offset, bool one_ordered);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@@ -3243,6 +3308,11 @@ int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate);
+ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+ struct btrfs_ioctl_encoded_io_args *encoded);
+ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+ const struct btrfs_ioctl_encoded_io_args *encoded);
+
extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
@@ -3288,7 +3358,7 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
int __init btrfs_auto_defrag_init(void);
void __cold btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode);
+ struct btrfs_inode *inode, u32 extent_thresh);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
@@ -3305,6 +3375,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
struct btrfs_trans_handle **trans_out);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end);
+ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct btrfs_ioctl_encoded_io_args *encoded);
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
@@ -3593,6 +3665,9 @@ do { \
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
&(fs_info)->fs_state)))
+#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
+ (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
+ &(fs_info)->fs_state)))
__printf(5, 6)
__cold
@@ -3758,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_recover_relocation(struct btrfs_root *root);
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
@@ -3870,5 +3945,8 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
#define PageOrdered(page) PagePrivate2(page)
#define SetPageOrdered(page) SetPagePrivate2(page)
#define ClearPageOrdered(page) ClearPagePrivate2(page)
+#define folio_test_ordered(folio) folio_test_private_2(folio)
+#define folio_set_ordered(folio) folio_set_private_2(folio)
+#define folio_clear_ordered(folio) folio_clear_private_2(folio)
#endif
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index fb46a28f5065..bd8267c4687d 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
}
static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
- u64 num_bytes, u64 *meta_reserve,
- u64 *qgroup_reserve)
+ u64 num_bytes, u64 disk_num_bytes,
+ u64 *meta_reserve, u64 *qgroup_reserve)
{
u64 nr_extents = count_max_extents(num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
@@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
*qgroup_reserve = nr_extents * fs_info->nodesize;
}
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ u64 disk_num_bytes)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
}
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+ disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
/*
* We always want to do it this way, every other way is wrong and ends
@@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
* everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done.
*/
- calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
- &qgroup_reserve);
+ calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+ &meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret)
return ret;
@@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += num_bytes;
+ inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
@@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
- ret = btrfs_delalloc_reserve_metadata(inode, len);
+ ret = btrfs_delalloc_reserve_metadata(inode, len, len);
if (ret < 0) {
btrfs_free_reserved_data_space(inode, *reserved, start, len);
extent_changeset_free(*reserved);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 62b9651ea662..71fd99b48283 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev,
struct btrfs_device **device_out)
{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
struct block_device *bdev;
struct rcu_string *name;
@@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
sync_blockdev(bdev);
- list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev == bdev) {
btrfs_err(fs_info,
"target device is in the filesystem!");
@@ -302,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
goto error;
}
rcu_assign_pointer(device->name, name);
+ ret = lookup_bdev(device_path, &device->devt);
+ if (ret)
+ goto error;
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = 0;
@@ -320,17 +324,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->mode = FMODE_EXCL;
device->dev_stats_valid = 1;
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
- device->fs_devices = fs_info->fs_devices;
+ device->fs_devices = fs_devices;
ret = btrfs_get_dev_zone_info(device, false);
if (ret)
goto error;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- list_add(&device->dev_list, &fs_info->fs_devices->devices);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_add(&device->dev_list, &fs_devices->devices);
+ fs_devices->num_devices++;
+ fs_devices->open_devices++;
+ mutex_unlock(&fs_devices->device_list_mutex);
*device_out = device;
return 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 87a5addbedf6..b30309f187cf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
else
ret = btrfs_check_leaf_full(eb);
- if (ret < 0) {
- btrfs_print_tree(eb, 0);
+ if (ret < 0)
+ goto error;
+
+ /*
+ * Also check the generation, the eb reached here must be newer than
+ * last committed. Or something seriously wrong happened.
+ */
+ if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
+ ret = -EUCLEAN;
btrfs_err(fs_info,
- "block=%llu write time tree block corruption detected",
- eb->start);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
- return ret;
+ "block=%llu bad generation, have %llu expect > %llu",
+ eb->start, btrfs_header_generation(eb),
+ fs_info->last_trans_committed);
+ goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
return 0;
+
+error:
+ btrfs_print_tree(eb, 0);
+ btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
+ eb->start);
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ return ret;
}
/* Checksum all dirty extent buffers in one bio_vec */
@@ -999,41 +1013,40 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
return try_release_extent_buffer(page);
}
-static void btree_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void btree_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- extent_invalidatepage(tree, page, offset);
- btree_releasepage(page, GFP_NOFS);
- if (PagePrivate(page)) {
- btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
- "page private not zero on page %llu",
- (unsigned long long)page_offset(page));
- detach_page_private(page);
+ tree = &BTRFS_I(folio->mapping->host)->io_tree;
+ extent_invalidate_folio(tree, folio, offset);
+ btree_releasepage(&folio->page, GFP_NOFS);
+ if (folio_get_private(folio)) {
+ btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
+ "folio private not zero on folio %llu",
+ (unsigned long long)folio_pos(folio));
+ folio_detach_private(folio);
}
}
-static int btree_set_page_dirty(struct page *page)
-{
#ifdef DEBUG
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+static bool btree_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
int cur_bit = 0;
- u64 page_start = page_offset(page);
+ u64 page_start = folio_pos(folio);
if (fs_info->sectorsize == PAGE_SIZE) {
- BUG_ON(!PagePrivate(page));
- eb = (struct extent_buffer *)page->private;
+ eb = folio_get_private(folio);
BUG_ON(!eb);
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(!atomic_read(&eb->refs));
btrfs_assert_tree_write_locked(eb);
- return __set_page_dirty_nobuffers(page);
+ return filemap_dirty_folio(mapping, folio);
}
- ASSERT(PagePrivate(page) && page->private);
- subpage = (struct btrfs_subpage *)page->private;
+ subpage = folio_get_private(folio);
ASSERT(subpage->dirty_bitmap);
while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
@@ -1059,18 +1072,20 @@ static int btree_set_page_dirty(struct page *page)
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
}
-#endif
- return __set_page_dirty_nobuffers(page);
+ return filemap_dirty_folio(mapping, folio);
}
+#else
+#define btree_dirty_folio filemap_dirty_folio
+#endif
static const struct address_space_operations btree_aops = {
.writepages = btree_writepages,
.releasepage = btree_releasepage,
- .invalidatepage = btree_invalidatepage,
+ .invalidate_folio = btree_invalidate_folio,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,
#endif
- .set_page_dirty = btree_set_page_dirty,
+ .dirty_folio = btree_dirty_folio,
};
struct extent_buffer *btrfs_find_create_tree_block(
@@ -1289,12 +1304,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
return root;
}
+static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+ struct btrfs_block_group *block_group;
+ u64 ret;
+
+ if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+ return 0;
+
+ if (bytenr)
+ block_group = btrfs_lookup_block_group(fs_info, bytenr);
+ else
+ block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
+ ASSERT(block_group);
+ if (!block_group)
+ return 0;
+ ret = block_group->global_root_id;
+ btrfs_put_block_group(block_group);
+
+ return ret;
+}
+
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_key key = {
.objectid = BTRFS_CSUM_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
- .offset = 0,
+ .offset = btrfs_global_root_id(fs_info, bytenr),
};
return btrfs_global_root(fs_info, &key);
@@ -1305,7 +1341,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
struct btrfs_key key = {
.objectid = BTRFS_EXTENT_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
- .offset = 0,
+ .offset = btrfs_global_root_id(fs_info, bytenr),
};
return btrfs_global_root(fs_info, &key);
@@ -1522,7 +1558,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
ret = PTR_ERR(root->node);
root->node = NULL;
goto fail;
- } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+ }
+ if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
goto fail;
}
@@ -1727,6 +1764,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_info->uuid_root);
btrfs_put_root(fs_info->fs_root);
btrfs_put_root(fs_info->data_reloc_root);
+ btrfs_put_root(fs_info->block_group_root);
btrfs_check_leaked_roots(fs_info);
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
@@ -1925,8 +1963,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
static int cleaner_kthread(void *arg)
{
- struct btrfs_root *root = arg;
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
int again;
while (1) {
@@ -1959,7 +1996,7 @@ static int cleaner_kthread(void *arg)
btrfs_run_delayed_iputs(fs_info);
- again = btrfs_clean_one_deleted_snapshot(root);
+ again = btrfs_clean_one_deleted_snapshot(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
/*
@@ -2095,8 +2132,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
{
const int next_backup = info->backup_root_index;
struct btrfs_root_backup *root_backup;
- struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
- struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
root_backup = info->super_for_commit->super_roots + next_backup;
@@ -2121,11 +2156,30 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_chunk_root_level(root_backup,
btrfs_header_level(info->chunk_root->node));
- btrfs_set_backup_extent_root(root_backup, extent_root->node->start);
- btrfs_set_backup_extent_root_gen(root_backup,
- btrfs_header_generation(extent_root->node));
- btrfs_set_backup_extent_root_level(root_backup,
- btrfs_header_level(extent_root->node));
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
+ btrfs_set_backup_block_group_root(root_backup,
+ info->block_group_root->node->start);
+ btrfs_set_backup_block_group_root_gen(root_backup,
+ btrfs_header_generation(info->block_group_root->node));
+ btrfs_set_backup_block_group_root_level(root_backup,
+ btrfs_header_level(info->block_group_root->node));
+ } else {
+ struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+ struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
+
+ btrfs_set_backup_extent_root(root_backup,
+ extent_root->node->start);
+ btrfs_set_backup_extent_root_gen(root_backup,
+ btrfs_header_generation(extent_root->node));
+ btrfs_set_backup_extent_root_level(root_backup,
+ btrfs_header_level(extent_root->node));
+
+ btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
+ btrfs_set_backup_csum_root_gen(root_backup,
+ btrfs_header_generation(csum_root->node));
+ btrfs_set_backup_csum_root_level(root_backup,
+ btrfs_header_level(csum_root->node));
+ }
/*
* we might commit during log recovery, which happens before we set
@@ -2146,12 +2200,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_dev_root_level(root_backup,
btrfs_header_level(info->dev_root->node));
- btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
- btrfs_set_backup_csum_root_gen(root_backup,
- btrfs_header_generation(csum_root->node));
- btrfs_set_backup_csum_root_level(root_backup,
- btrfs_header_level(csum_root->node));
-
btrfs_set_backup_total_bytes(root_backup,
btrfs_super_total_bytes(info->super_copy));
btrfs_set_backup_bytes_used(root_backup,
@@ -2269,6 +2317,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
free_root_extent_buffers(info->uuid_root);
free_root_extent_buffers(info->fs_root);
free_root_extent_buffers(info->data_reloc_root);
+ free_root_extent_buffers(info->block_group_root);
if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
}
@@ -2504,11 +2553,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
log_tree_root->node = NULL;
btrfs_put_root(log_tree_root);
return ret;
- } else if (!extent_buffer_uptodate(log_tree_root->node)) {
+ }
+ if (!extent_buffer_uptodate(log_tree_root->node)) {
btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root);
return -EIO;
}
+
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
@@ -2533,6 +2584,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
{
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_root *root;
+ u64 max_global_id = 0;
int ret;
struct btrfs_key key = {
.objectid = objectid,
@@ -2568,6 +2620,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
break;
btrfs_release_path(path);
+ /*
+ * Just worry about this for extent tree, it'll be the same for
+ * everybody.
+ */
+ if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ max_global_id = max(max_global_id, key.offset);
+
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
@@ -2585,6 +2644,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
}
btrfs_release_path(path);
+ if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ fs_info->nr_global_roots = max_global_id + 1;
+
if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
@@ -2930,6 +2992,56 @@ out:
return ret;
}
+static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
+{
+ int ret = 0;
+
+ root->node = read_tree_block(root->fs_info, bytenr,
+ root->root_key.objectid, gen, level, NULL);
+ if (IS_ERR(root->node)) {
+ ret = PTR_ERR(root->node);
+ root->node = NULL;
+ return ret;
+ }
+ if (!extent_buffer_uptodate(root->node)) {
+ free_extent_buffer(root->node);
+ root->node = NULL;
+ return -EIO;
+ }
+
+ btrfs_set_root_node(&root->root_item, root->node);
+ root->commit_root = btrfs_root_node(root);
+ btrfs_set_root_refs(&root->root_item, 1);
+ return ret;
+}
+
+static int load_important_roots(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 gen, bytenr;
+ int level, ret;
+
+ bytenr = btrfs_super_root(sb);
+ gen = btrfs_super_generation(sb);
+ level = btrfs_super_root_level(sb);
+ ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
+ if (ret) {
+ btrfs_warn(fs_info, "couldn't read tree root");
+ return ret;
+ }
+
+ if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+ return 0;
+
+ bytenr = btrfs_super_block_group_root(sb);
+ gen = btrfs_super_block_group_root_generation(sb);
+ level = btrfs_super_block_group_root_level(sb);
+ ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
+ if (ret)
+ btrfs_warn(fs_info, "couldn't read block group root");
+ return ret;
+}
+
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
{
int backup_index = find_newest_super_backup(fs_info);
@@ -2939,10 +3051,17 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
int ret = 0;
int i;
- for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
- u64 generation;
- int level;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ struct btrfs_root *root;
+
+ root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
+ GFP_KERNEL);
+ if (!root)
+ return -ENOMEM;
+ fs_info->block_group_root = root;
+ }
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
if (handle_error) {
if (!IS_ERR(tree_root->node))
free_extent_buffer(tree_root->node);
@@ -2967,29 +3086,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
if (ret < 0)
return ret;
}
- generation = btrfs_super_generation(sb);
- level = btrfs_super_root_level(sb);
- tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
- BTRFS_ROOT_TREE_OBJECTID,
- generation, level, NULL);
- if (IS_ERR(tree_root->node)) {
- handle_error = true;
- ret = PTR_ERR(tree_root->node);
- tree_root->node = NULL;
- btrfs_warn(fs_info, "couldn't read tree root");
- continue;
- } else if (!extent_buffer_uptodate(tree_root->node)) {
+ ret = load_important_roots(fs_info);
+ if (ret) {
handle_error = true;
- ret = -EIO;
- btrfs_warn(fs_info, "error while reading tree root");
continue;
}
- btrfs_set_root_node(&tree_root->root_item, tree_root->node);
- tree_root->commit_root = btrfs_root_node(tree_root);
- btrfs_set_root_refs(&tree_root->root_item, 1);
-
/*
* No need to hold btrfs_root::objectid_mutex since the fs
* hasn't been fully initialised and we are the only user
@@ -3009,8 +3112,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
}
/* All successful */
- fs_info->generation = generation;
- fs_info->last_trans_committed = generation;
+ fs_info->generation = btrfs_header_generation(tree_root->node);
+ fs_info->last_trans_committed = fs_info->generation;
fs_info->last_reloc_trans = 0;
/* Always begin writing backup roots after the one being used */
@@ -3293,7 +3396,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
up_read(&fs_info->cleanup_work_sem);
mutex_lock(&fs_info->cleaner_mutex);
- ret = btrfs_recover_relocation(fs_info->tree_root);
+ ret = btrfs_recover_relocation(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
@@ -3594,21 +3697,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
generation = btrfs_super_chunk_root_generation(disk_super);
level = btrfs_super_chunk_root_level(disk_super);
-
- chunk_root->node = read_tree_block(fs_info,
- btrfs_super_chunk_root(disk_super),
- BTRFS_CHUNK_TREE_OBJECTID,
- generation, level, NULL);
- if (IS_ERR(chunk_root->node) ||
- !extent_buffer_uptodate(chunk_root->node)) {
+ ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
+ generation, level);
+ if (ret) {
btrfs_err(fs_info, "failed to read chunk root");
- if (!IS_ERR(chunk_root->node))
- free_extent_buffer(chunk_root->node);
- chunk_root->node = NULL;
goto fail_tree_roots;
}
- btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
- chunk_root->commit_root = btrfs_root_node(chunk_root);
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
offsetof(struct btrfs_header, chunk_tree_uuid),
@@ -3728,7 +3822,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
- fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+ fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
goto fail_sysfs;
@@ -3813,6 +3907,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
+ /* Kick the cleaner thread so it'll start deleting snapshots. */
+ if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
+ wake_up_process(fs_info->cleaner_kthread);
+
clear_oneshot:
btrfs_clear_oneshot_options(fs_info);
return 0;
@@ -4029,8 +4127,9 @@ static int write_dev_supers(struct btrfs_device *device,
* to do I/O, so we don't lose the ability to do integrity
* checking.
*/
- bio = bio_alloc(GFP_NOFS, 1);
- bio_set_dev(bio, device->bdev);
+ bio = bio_alloc(device->bdev, 1,
+ REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO,
+ GFP_NOFS);
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
@@ -4042,7 +4141,6 @@ static int write_dev_supers(struct btrfs_device *device,
* go down lazy and there's a short window where the on-disk
* copies might still contain the older version.
*/
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
bio->bi_opf |= REQ_FUA;
@@ -4154,10 +4252,8 @@ static void write_dev_flush(struct btrfs_device *device)
return;
#endif
- bio_reset(bio);
+ bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
bio->bi_end_io = btrfs_end_empty_barrier;
- bio_set_dev(bio, device->bdev);
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
@@ -4538,6 +4634,12 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
kthread_park(fs_info->cleaner_kthread);
+ /*
+ * If we had UNFINISHED_DROPS we could still be processing them, so
+ * clear that bit and wake up relocation so it can stop.
+ */
+ btrfs_wake_unfinished_drop(fs_info);
+
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info, false);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 5e8bef4b7563..2e10514ecda8 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+ return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0);
}
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 04083ee5ae6e..c3eb52dbe61c 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -244,8 +244,8 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits);
-int extent_invalidatepage(struct extent_io_tree *tree,
- struct page *page, unsigned long offset);
+int extent_invalidate_folio(struct extent_io_tree *tree,
+ struct folio *folio, size_t offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d89273c4b6b8..f477035a2ac2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -598,7 +598,7 @@ fail:
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- int refs_to_drop, int *last_ref)
+ int refs_to_drop)
{
struct btrfs_key key;
struct btrfs_extent_data_ref *ref1 = NULL;
@@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path);
- *last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1072,8 +1071,7 @@ static noinline_for_stack
void update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
- struct btrfs_delayed_extent_op *extent_op,
- int *last_ref)
+ struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_extent_item *ei;
@@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
else
btrfs_set_shared_data_ref_count(leaf, sref, refs);
} else {
- *last_ref = 1;
size = btrfs_extent_inline_ref_size(type);
item_size = btrfs_item_size(leaf, path->slots[0]);
ptr = (unsigned long)iref;
@@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
return -EUCLEAN;
}
- update_inline_extent_backref(path, iref, refs_to_add,
- extent_op, NULL);
+ update_inline_extent_backref(path, iref, refs_to_add, extent_op);
} else if (ret == -ENOENT) {
setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset,
@@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
- int refs_to_drop, int is_data, int *last_ref)
+ int refs_to_drop, int is_data)
{
int ret = 0;
BUG_ON(!is_data && refs_to_drop != 1);
- if (iref) {
- update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
- last_ref);
- } else if (is_data) {
- ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
- last_ref);
- } else {
- *last_ref = 1;
+ if (iref)
+ update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+ else if (is_data)
+ ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+ else
ret = btrfs_del_item(trans, root, path);
- }
return ret;
}
@@ -2766,12 +2758,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_unlock(&cache->lock);
if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
- u64 to_add = len;
-
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
- to_add = min(len, global_rsv->size -
- global_rsv->reserved);
+ u64 to_add = min(len, global_rsv->size -
+ global_rsv->reserved);
+
global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(fs_info,
space_info, to_add);
@@ -2862,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
return 0;
}
+static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, bool is_data)
+{
+ int ret;
+
+ if (is_data) {
+ struct btrfs_root *csum_root;
+
+ csum_root = btrfs_csum_root(trans->fs_info, bytenr);
+ ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ }
+
+ ret = add_to_free_space_tree(trans, bytenr, num_bytes);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ return ret;
+}
+
/*
* Drop one or more refs of @node.
*
@@ -2943,7 +2963,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 refs;
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
- int last_ref = 0;
bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
extent_root = btrfs_extent_root(info, bytenr);
@@ -3010,8 +3029,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
- NULL, refs_to_drop, is_data,
- &last_ref);
+ NULL, refs_to_drop, is_data);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3136,8 +3154,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
- iref, refs_to_drop, is_data,
- &last_ref);
+ iref, refs_to_drop, is_data);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3182,7 +3199,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
- last_ref = 1;
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
@@ -3191,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- if (is_data) {
- struct btrfs_root *csum_root;
- csum_root = btrfs_csum_root(info, bytenr);
- ret = btrfs_del_csums(trans, csum_root, bytenr,
- num_bytes);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
- }
-
- ret = add_to_free_space_tree(trans, bytenr, num_bytes);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
-
- ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
+ ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
}
btrfs_release_path(path);
@@ -4605,6 +4600,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
return ret;
}
+static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ int ret;
+
+ ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
+ if (ret)
+ return ret;
+
+ ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
+ if (ret) {
+ ASSERT(!ret);
+ btrfs_err(fs_info, "update block group failed for %llu %llu",
+ bytenr, num_bytes);
+ return ret;
+ }
+
+ trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
+ return 0;
+}
+
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
@@ -4665,18 +4682,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
- if (ret)
- return ret;
-
- ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
- if (ret) { /* -ENOENT, logic error */
- btrfs_err(fs_info, "update block group failed for %llu %llu",
- ins->objectid, ins->offset);
- BUG();
- }
- trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
- return ret;
+ return alloc_reserved_extent(trans, ins->objectid, ins->offset);
}
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
@@ -4694,7 +4700,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_delayed_tree_ref *ref;
u32 size = sizeof(*extent_item) + sizeof(*iref);
- u64 num_bytes;
u64 flags = extent_op->flags_to_set;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@@ -4704,12 +4709,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) {
extent_key.offset = ref->level;
extent_key.type = BTRFS_METADATA_ITEM_KEY;
- num_bytes = fs_info->nodesize;
} else {
extent_key.offset = node->num_bytes;
extent_key.type = BTRFS_EXTENT_ITEM_KEY;
size += sizeof(*block_info);
- num_bytes = node->num_bytes;
}
path = btrfs_alloc_path();
@@ -4754,22 +4757,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, extent_key.objectid,
- num_bytes);
- if (ret)
- return ret;
-
- ret = btrfs_update_block_group(trans, extent_key.objectid,
- fs_info->nodesize, true);
- if (ret) { /* -ENOENT, logic error */
- btrfs_err(fs_info, "update block group failed for %llu %llu",
- extent_key.objectid, extent_key.offset);
- BUG();
- }
-
- trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
- fs_info->nodesize);
- return ret;
+ return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
}
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -5622,6 +5610,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
int ret;
int level;
bool root_dropped = false;
+ bool unfinished_drop = false;
btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
@@ -5664,6 +5653,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
* already dropped.
*/
set_bit(BTRFS_ROOT_DELETING, &root->state);
+ unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
+
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
@@ -5839,6 +5830,13 @@ out_free:
btrfs_free_path(path);
out:
/*
+ * We were an unfinished drop root, check to see if there are any
+ * pending, and if not clear and wake up any waiters.
+ */
+ if (!err && unfinished_drop)
+ btrfs_maybe_wake_unfinished_drop(fs_info);
+
+ /*
* So if we need to stop dropping the snapshot for whatever reason we
* need to make sure to add it back to the dead root list so that we
* keep trying to do the work later. This also cleans up roots if we
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 409bad3928db..d78b3a2d04e3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1507,17 +1507,17 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
{
+ struct address_space *mapping = inode->i_mapping;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
while (index <= end_index) {
- page = find_get_page(inode->i_mapping, index);
- BUG_ON(!page); /* Pages should be in the extent_io_tree */
- __set_page_dirty_nobuffers(page);
- account_page_redirty(page);
- put_page(page);
- index++;
+ folio = filemap_get_folio(mapping, index);
+ filemap_dirty_folio(mapping, folio);
+ folio_account_redirty(folio);
+ index += folio_nr_pages(folio);
+ folio_put(folio);
}
}
@@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
* a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us.
*/
+ ASSERT(failed_mirror);
failrec->failed_mirror = failed_mirror;
failrec->this_mirror++;
if (failrec->this_mirror == failed_mirror)
@@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio;
struct btrfs_bio *repair_bbio;
- blk_status_t status;
btrfs_debug(fs_info,
"repair read error: read error at %llu", start);
@@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
"repair read error: submitting new read to mirror %d",
failrec->this_mirror);
- status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
- failrec->bio_flags);
- if (status) {
- free_io_failure(failure_tree, tree, failrec);
- bio_put(repair_bio);
- }
- return blk_status_to_errno(status);
+ /*
+ * At this point we have a bio, so any errors from submit_bio_hook()
+ * will be handled by the endio on the repair_bio, so we can't return an
+ * error here.
+ */
+ submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
+ return BLK_STS_OK;
}
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
@@ -3068,6 +3068,14 @@ static void end_bio_extent_readpage(struct bio *bio)
if (is_data_inode(inode)) {
/*
+ * If we failed to submit the IO at all we'll have a
+ * mirror_num == 0, in which case we need to just mark
+ * the page with an error and unlock it and carry on.
+ */
+ if (mirror == 0)
+ goto readpage_ok;
+
+ /*
* btrfs_submit_read_repair() will handle all the good
* and bad sectors, we just continue to the next bvec.
*/
@@ -3143,7 +3151,7 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
struct bio *bio;
ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
- bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
+ bio = bio_alloc_bioset(NULL, nr_iovecs, 0, GFP_NOFS, &btrfs_bioset);
btrfs_bio_init(btrfs_bio(bio));
return bio;
}
@@ -3154,7 +3162,7 @@ struct bio *btrfs_bio_clone(struct bio *bio)
struct bio *new;
/* Bio allocation backed by a bioset does not fail */
- new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
+ new = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(new);
btrfs_bio_init(bbio);
bbio->iter = bio->bi_iter;
@@ -3169,7 +3177,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
/* this will never fail when it's backed by a bioset */
- bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
+ bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
bbio = btrfs_bio(bio);
@@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
}
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
- if (em_cached && !IS_ERR_OR_NULL(em)) {
+ if (em_cached && !IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
@@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
u64 cur_end;
struct extent_map *em;
int ret = 0;
- int nr = 0;
size_t pg_offset = 0;
size_t iosize;
size_t blocksize = inode->i_sb->s_blocksize;
@@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, em_cached);
- if (IS_ERR_OR_NULL(em)) {
+ if (IS_ERR(em)) {
unlock_extent(tree, cur, end);
end_page_read(page, false, cur, end + 1 - cur);
+ ret = PTR_ERR(em);
break;
}
extent_offset = cur - em->start;
@@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
end_bio_extent_readpage, 0,
this_bio_flag,
force_bio_submit);
- if (!ret) {
- nr++;
- } else {
+ if (ret) {
unlock_extent(tree, cur, cur + iosize - 1);
end_page_read(page, false, cur, iosize);
goto out;
@@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
}
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
- if (IS_ERR_OR_NULL(em)) {
+ if (IS_ERR(em)) {
btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
ret = PTR_ERR_OR_ZERO(em);
break;
@@ -4048,6 +4054,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct extent_page_data *epd)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u64 page_start = page_offset(page);
@@ -4068,8 +4075,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
- unlock_page(page);
+ folio_invalidate(folio, 0, folio_size(folio));
+ folio_unlock(folio);
return 0;
}
@@ -4780,11 +4787,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
return ret;
}
if (cache) {
- /* Impiles write in zoned mode */
- btrfs_put_block_group(cache);
- /* Mark the last eb in a block group */
+ /*
+ * Implies write in zoned mode. Mark the last eb in a block group.
+ */
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+ btrfs_put_block_group(cache);
}
ret = write_one_eb(eb, wbc, epd);
free_extent_buffer(eb);
@@ -5218,17 +5226,17 @@ void extent_readahead(struct readahead_control *rac)
}
/*
- * basic invalidatepage code, this waits on any locked or writeback
- * ranges corresponding to the page, and then deletes any extent state
+ * basic invalidate_folio code, this waits on any locked or writeback
+ * ranges corresponding to the folio, and then deletes any extent state
* records from the tree
*/
-int extent_invalidatepage(struct extent_io_tree *tree,
- struct page *page, unsigned long offset)
+int extent_invalidate_folio(struct extent_io_tree *tree,
+ struct folio *folio, size_t offset)
{
struct extent_state *cached_state = NULL;
- u64 start = page_offset(page);
- u64 end = start + PAGE_SIZE - 1;
- size_t blocksize = page->mapping->host->i_sb->s_blocksize;
+ u64 start = folio_pos(folio);
+ u64 end = start + folio_size(folio) - 1;
+ size_t blocksize = folio->mapping->host->i_sb->s_blocksize;
/* This function is only called for the btree inode */
ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
@@ -5238,7 +5246,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
return 0;
lock_extent_bits(tree, start, end, &cached_state);
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
/*
* Currently for btree io tree, only EXTENT_LOCKED is utilized,
@@ -5390,7 +5398,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
break;
len = ALIGN(len, sectorsize);
em = btrfs_get_extent_fiemap(inode, offset, len);
- if (IS_ERR_OR_NULL(em))
+ if (IS_ERR(em))
return em;
/* if this isn't a hole return it */
@@ -6841,14 +6849,24 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
{
struct btrfs_fs_info *fs_info = eb->fs_info;
+ /*
+ * If we are using the commit root we could potentially clear a page
+ * Uptodate while we're using the extent buffer that we've previously
+ * looked up. We don't want to complain in this case, as the page was
+ * valid before, we just didn't write it out. Instead we want to catch
+ * the case where we didn't actually read the block properly, which
+ * would have !PageUptodate && !PageError, as we clear PageError before
+ * reading.
+ */
if (fs_info->sectorsize < PAGE_SIZE) {
- bool uptodate;
+ bool uptodate, error;
uptodate = btrfs_subpage_test_uptodate(fs_info, page,
eb->start, eb->len);
- WARN_ON(!uptodate);
+ error = btrfs_subpage_test_error(fs_info, page, eb->start, eb->len);
+ WARN_ON(!uptodate && !error);
} else {
- WARN_ON(!PageUptodate(page));
+ WARN_ON(!PageUptodate(page) && !PageError(page));
}
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 5a36add21305..6fee14ce2e6b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -261,6 +261,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation);
+ set_bit(EXTENT_FLAG_MERGED, &em->flags);
rb_erase_cached(&merge->rb_node, &tree->map);
RB_CLEAR_NODE(&merge->rb_node);
@@ -278,6 +279,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&merge->rb_node);
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation);
+ set_bit(EXTENT_FLAG_MERGED, &em->flags);
free_extent_map(merge);
}
}
@@ -490,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
*/
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
+ lockdep_assert_held_write(&tree->lock);
+
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
@@ -504,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *new,
int modified)
{
+ lockdep_assert_held_write(&tree->lock);
+
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
ASSERT(extent_map_in_tree(cur));
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 8e217337dff9..d2fa32ffe304 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -25,6 +25,8 @@ enum {
EXTENT_FLAG_FILLING,
/* filesystem extent mapping type */
EXTENT_FLAG_FS_MAPPING,
+ /* This em is merged from two or more physically adjacent ems */
+ EXTENT_FLAG_MERGED,
};
struct extent_map {
@@ -40,6 +42,12 @@ struct extent_map {
u64 ram_bytes;
u64 block_start;
u64 block_len;
+
+ /*
+ * Generation of the extent map, for merged em it's the highest
+ * generation of all merged ems.
+ * For non-merged extents, it's from btrfs_file_extent_item::generation.
+ */
u64 generation;
unsigned long flags;
/* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90c5c38836ab..c828f971a346 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -305,7 +305,7 @@ found:
read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
ret * csum_size);
out:
- if (ret == -ENOENT)
+ if (ret == -ENOENT || ret == -EFBIG)
ret = 0;
return ret;
}
@@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_bio *bbio = NULL;
struct btrfs_path *path;
const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
@@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
u8 *csum;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
int count = 0;
+ blk_status_t ret = BLK_STS_OK;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
@@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
return BLK_STS_RESOURCE;
if (!dst) {
- struct btrfs_bio *bbio = btrfs_bio(bio);
+ bbio = btrfs_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
@@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
search_len, csum_dst);
- if (count <= 0) {
- /*
- * Either we hit a critical error or we didn't find
- * the csum.
- * Either way, we put zero into the csums dst, and skip
- * to the next sector.
- */
+ if (count < 0) {
+ ret = errno_to_blk_status(count);
+ if (bbio)
+ btrfs_bio_free_csum(bbio);
+ break;
+ }
+
+ /*
+ * We didn't find a csum for this range. We need to make sure
+ * we complain loudly about this, because we are not NODATASUM.
+ *
+ * However for the DATA_RELOC inode we could potentially be
+ * relocating data extents for a NODATASUM inode, so the inode
+ * itself won't be marked with NODATASUM, but the extent we're
+ * copying is in fact NODATASUM. If we don't find a csum we
+ * assume this is the case.
+ */
+ if (count == 0) {
memset(csum_dst, 0, csum_size);
count = 1;
- /*
- * For data reloc inode, we need to mark the range
- * NODATASUM so that balance won't report false csum
- * error.
- */
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset;
@@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
}
btrfs_free_path(path);
- return BLK_STS_OK;
+ return ret;
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
@@ -612,32 +620,33 @@ fail:
return ret;
}
-/*
- * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
+/**
+ * Calculate checksums of the data contained inside a bio
+ *
* @inode: Owner of the data inside the bio
* @bio: Contains the data to be checksummed
- * @file_start: offset in file this bio begins to describe
- * @contig: Boolean. If true/1 means all bio vecs in this bio are
- * contiguous and they begin at @file_start in the file. False/0
- * means this bio can contain potentially discontiguous bio vecs
- * so the logical offset of each should be calculated separately.
+ * @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the
+ * file offsets are determined from the page offsets in the bio.
+ * Otherwise, this is the starting file offset of the bio vecs in
+ * @bio, which must be contiguous.
+ * @one_ordered: If true, @bio only refers to one ordered extent.
*/
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
- u64 file_start, int contig)
+ u64 offset, bool one_ordered)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
+ const bool use_page_offsets = (offset == (u64)-1);
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
int index;
- int nr_sectors;
+ unsigned int blockcount;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
int i;
- u64 offset;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@@ -651,18 +660,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
- if (contig)
- offset = file_start;
- else
- offset = 0; /* shut up gcc */
-
sums->bytenr = bio->bi_iter.bi_sector << 9;
index = 0;
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
- if (!contig)
+ if (use_page_offsets)
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
if (!ordered) {
@@ -681,13 +685,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
}
}
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
+ blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
- for (i = 0; i < nr_sectors; i++) {
- if (offset >= ordered->file_offset + ordered->num_bytes ||
- offset < ordered->file_offset) {
+ for (i = 0; i < blockcount; i++) {
+ if (!one_ordered &&
+ !in_range(offset, ordered->file_offset,
+ ordered->num_bytes)) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
@@ -1211,6 +1216,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
extent_start = key.offset;
extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+ em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 11204dbbe053..9f455c96c974 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -50,11 +50,14 @@ struct inode_defrag {
/* root objectid */
u64 root;
- /* last offset we were able to defrag */
- u64 last_offset;
-
- /* if we've wrapped around back to zero once already */
- int cycled;
+ /*
+ * The extent size threshold for autodefrag.
+ *
+ * This value is different for compressed/non-compressed extents,
+ * thus needs to be passed from higher layer.
+ * (aka, inode_should_defrag())
+ */
+ u32 extent_thresh;
};
static int __compare_inode_defrag(struct inode_defrag *defrag1,
@@ -107,8 +110,8 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
*/
if (defrag->transid < entry->transid)
entry->transid = defrag->transid;
- if (defrag->last_offset > entry->last_offset)
- entry->last_offset = defrag->last_offset;
+ entry->extent_thresh = min(defrag->extent_thresh,
+ entry->extent_thresh);
return -EEXIST;
}
}
@@ -134,7 +137,7 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
* enabled
*/
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode, u32 extent_thresh)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -160,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->ino = btrfs_ino(inode);
defrag->transid = transid;
defrag->root = root->root_key.objectid;
+ defrag->extent_thresh = extent_thresh;
spin_lock(&fs_info->defrag_inodes_lock);
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
@@ -179,34 +183,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
}
/*
- * Requeue the defrag object. If there is a defrag object that points to
- * the same inode in the tree, we will merge them together (by
- * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
- */
-static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
- struct inode_defrag *defrag)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- int ret;
-
- if (!__need_auto_defrag(fs_info))
- goto out;
-
- /*
- * Here we don't check the IN_DEFRAG flag, because we need merge
- * them together.
- */
- spin_lock(&fs_info->defrag_inodes_lock);
- ret = __btrfs_add_inode_defrag(inode, defrag);
- spin_unlock(&fs_info->defrag_inodes_lock);
- if (ret)
- goto out;
- return;
-out:
- kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
-}
-
-/*
* pick the defragable inode that we want, if it doesn't exist, we will get
* the next one.
*/
@@ -278,8 +254,14 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct btrfs_root *inode_root;
struct inode *inode;
struct btrfs_ioctl_defrag_range_args range;
- int num_defrag;
- int ret;
+ int ret = 0;
+ u64 cur = 0;
+
+again:
+ if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
+ goto cleanup;
+ if (!__need_auto_defrag(fs_info))
+ goto cleanup;
/* get the inode */
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
@@ -295,39 +277,30 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
goto cleanup;
}
+ if (cur >= i_size_read(inode)) {
+ iput(inode);
+ goto cleanup;
+ }
+
/* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
memset(&range, 0, sizeof(range));
range.len = (u64)-1;
- range.start = defrag->last_offset;
+ range.start = cur;
+ range.extent_thresh = defrag->extent_thresh;
sb_start_write(fs_info->sb);
- num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+ ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
BTRFS_DEFRAG_BATCH);
sb_end_write(fs_info->sb);
- /*
- * if we filled the whole defrag batch, there
- * must be more work to do. Queue this defrag
- * again
- */
- if (num_defrag == BTRFS_DEFRAG_BATCH) {
- defrag->last_offset = range.start;
- btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
- } else if (defrag->last_offset && !defrag->cycled) {
- /*
- * we didn't fill our defrag batch, but
- * we didn't start at zero. Make sure we loop
- * around to the start of the file.
- */
- defrag->last_offset = 0;
- defrag->cycled = 1;
- btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
- } else {
- kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
- }
-
iput(inode);
- return 0;
+
+ if (ret < 0)
+ goto cleanup;
+
+ cur = max(cur + fs_info->sectorsize, range.start);
+ goto again;
+
cleanup:
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return ret;
@@ -718,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int modify_tree = -1;
int update_refs;
int found = 0;
- int leafs_visited = 0;
struct btrfs_path *path = args->path;
args->bytes_found = 0;
@@ -756,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path->slots[0]--;
}
ret = 0;
- leafs_visited++;
next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -768,7 +739,6 @@ next_slot:
ret = 0;
break;
}
- leafs_visited++;
leaf = path->nodes[0];
recow = 1;
}
@@ -1014,7 +984,7 @@ delete_extent_item:
* which case it unlocked our path, so check path->locks[0] matches a
* write lock.
*/
- if (!ret && args->replace_extent && leafs_visited == 1 &&
+ if (!ret && args->replace_extent &&
path->locks[0] == BTRFS_WRITE_LOCK &&
btrfs_leaf_free_space(leaf) >=
sizeof(struct btrfs_item) + args->extent_item_size) {
@@ -1749,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
fs_info->sectorsize);
WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
- reserve_bytes);
+ reserve_bytes,
+ reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(BTRFS_I(inode),
@@ -2066,12 +2037,43 @@ out:
return err < 0 ? err : written;
}
-static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
- struct iov_iter *from)
+static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+ const struct btrfs_ioctl_encoded_io_args *encoded)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ loff_t count;
+ ssize_t ret;
+
+ btrfs_inode_lock(inode, 0);
+ count = encoded->len;
+ ret = generic_write_checks_count(iocb, &count);
+ if (ret == 0 && count != encoded->len) {
+ /*
+ * The write got truncated by generic_write_checks_count(). We
+ * can't do a partial encoded write.
+ */
+ ret = -EFBIG;
+ }
+ if (ret || encoded->len == 0)
+ goto out;
+
+ ret = btrfs_write_check(iocb, from, encoded->len);
+ if (ret < 0)
+ goto out;
+
+ ret = btrfs_do_encoded_write(iocb, from, encoded);
+out:
+ btrfs_inode_unlock(inode, 0);
+ return ret;
+}
+
+ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct btrfs_ioctl_encoded_io_args *encoded)
{
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
- ssize_t num_written = 0;
+ ssize_t num_written, num_sync;
const bool sync = iocb->ki_flags & IOCB_DSYNC;
/*
@@ -2082,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (BTRFS_FS_ERROR(inode->root->fs_info))
return -EROFS;
- if (!(iocb->ki_flags & IOCB_DIRECT) &&
- (iocb->ki_flags & IOCB_NOWAIT))
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EOPNOTSUPP;
if (sync)
atomic_inc(&inode->sync_writers);
- if (iocb->ki_flags & IOCB_DIRECT)
- num_written = btrfs_direct_write(iocb, from);
- else
- num_written = btrfs_buffered_write(iocb, from);
+ if (encoded) {
+ num_written = btrfs_encoded_write(iocb, from, encoded);
+ num_sync = encoded->len;
+ } else if (iocb->ki_flags & IOCB_DIRECT) {
+ num_written = num_sync = btrfs_direct_write(iocb, from);
+ } else {
+ num_written = num_sync = btrfs_buffered_write(iocb, from);
+ }
btrfs_set_inode_last_sub_trans(inode);
- if (num_written > 0)
- num_written = generic_write_sync(iocb, num_written);
+ if (num_sync > 0) {
+ num_sync = generic_write_sync(iocb, num_sync);
+ if (num_sync < 0)
+ num_written = num_sync;
+ }
if (sync)
atomic_dec(&inode->sync_writers);
@@ -2106,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
return num_written;
}
+static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ return btrfs_do_write_iter(iocb, from, NULL);
+}
+
int btrfs_release_file(struct inode *inode, struct file *filp)
{
struct btrfs_file_private *private = filp->private_data;
@@ -2501,7 +2514,7 @@ out:
hole_em = alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ btrfs_set_inode_full_sync(inode);
} else {
hole_em->start = offset;
hole_em->len = end - offset;
@@ -2522,8 +2535,7 @@ out:
} while (ret == -EEXIST);
free_extent_map(hole_em);
if (ret)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &inode->runtime_flags);
+ btrfs_set_inode_full_sync(inode);
}
return 0;
@@ -2877,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* maps for the replacement extents (or holes).
*/
if (extent_info && !extent_info->is_new_extent)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ btrfs_set_inode_full_sync(inode);
if (ret)
goto out_trans;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 655aad0f9e1c..0ae54d8c10d6 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
.offset = 0,
};
+ if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
+ key.offset = block_group->global_root_id;
return btrfs_global_root(block_group->fs_info, &key);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b2403b6127f..aa0a60ee26cb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -66,6 +66,11 @@ struct btrfs_dio_data {
struct extent_changeset *data_reserved;
};
+struct btrfs_rename_ctx {
+ /* Output field. Stores the index number of the old directory entry. */
+ u64 index;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
@@ -234,12 +239,14 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
* no overlapping inline items exist in the btree
*/
static int insert_inline_extent(struct btrfs_trans_handle *trans,
- struct btrfs_path *path, bool extent_inserted,
- struct btrfs_root *root, struct inode *inode,
- u64 start, size_t size, size_t compressed_size,
+ struct btrfs_path *path,
+ struct btrfs_inode *inode, bool extent_inserted,
+ size_t size, size_t compressed_size,
int compress_type,
- struct page **compressed_pages)
+ struct page **compressed_pages,
+ bool update_i_size)
{
+ struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct page *page = NULL;
char *kaddr;
@@ -247,7 +254,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item *ei;
int ret;
size_t cur_size = size;
- unsigned long offset;
+ u64 i_size;
ASSERT((compressed_size > 0 && compressed_pages) ||
(compressed_size == 0 && !compressed_pages));
@@ -259,8 +266,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_key key;
size_t datasize;
- key.objectid = btrfs_ino(BTRFS_I(inode));
- key.offset = start;
+ key.objectid = btrfs_ino(inode);
+ key.offset = 0;
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -298,12 +305,10 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_compression(leaf, ei,
compress_type);
} else {
- page = find_get_page(inode->i_mapping,
- start >> PAGE_SHIFT);
+ page = find_get_page(inode->vfs_inode.i_mapping, 0);
btrfs_set_file_extent_compression(leaf, ei, 0);
kaddr = kmap_atomic(page);
- offset = offset_in_page(start);
- write_extent_buffer(leaf, kaddr + offset, ptr, size);
+ write_extent_buffer(leaf, kaddr, ptr, size);
kunmap_atomic(kaddr);
put_page(page);
}
@@ -314,21 +319,25 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
* We align size to sectorsize for inline extents just for simplicity
* sake.
*/
- size = ALIGN(size, root->fs_info->sectorsize);
- ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
+ ret = btrfs_inode_set_file_extent_range(inode, 0,
+ ALIGN(size, root->fs_info->sectorsize));
if (ret)
goto fail;
/*
- * we're an inline extent, so nobody can
- * extend the file past i_size without locking
- * a page we already have locked.
+ * We're an inline extent, so nobody can extend the file past i_size
+ * without locking a page we already have locked.
*
- * We must do any isize and inode updates
- * before we unlock the pages. Otherwise we
- * could end up racing with unlink.
+ * We must do any i_size and inode updates before we unlock the pages.
+ * Otherwise we could end up racing with unlink.
*/
- BTRFS_I(inode)->disk_i_size = inode->i_size;
+ i_size = i_size_read(&inode->vfs_inode);
+ if (update_i_size && size > i_size) {
+ i_size_write(&inode->vfs_inode, size);
+ i_size = size;
+ }
+ inode->disk_i_size = i_size;
+
fail:
return ret;
}
@@ -339,35 +348,31 @@ fail:
* does the checks required to make sure the data is small enough
* to fit as an inline extent.
*/
-static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
- u64 end, size_t compressed_size,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
+ size_t compressed_size,
int compress_type,
- struct page **compressed_pages)
+ struct page **compressed_pages,
+ bool update_i_size)
{
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
- u64 isize = i_size_read(&inode->vfs_inode);
- u64 actual_end = min(end + 1, isize);
- u64 inline_len = actual_end - start;
- u64 aligned_end = ALIGN(end, fs_info->sectorsize);
- u64 data_len = inline_len;
+ u64 data_len = (compressed_size ?: size);
int ret;
struct btrfs_path *path;
- if (compressed_size)
- data_len = compressed_size;
-
- if (start > 0 ||
- actual_end > fs_info->sectorsize ||
+ /*
+ * We can create an inline extent if it ends at or beyond the current
+ * i_size, is no larger than a sector (decompressed), and the (possibly
+ * compressed) data fits in a leaf and the configured maximum inline
+ * size.
+ */
+ if (size < i_size_read(&inode->vfs_inode) ||
+ size > fs_info->sectorsize ||
data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
- (!compressed_size &&
- (actual_end & (fs_info->sectorsize - 1)) == 0) ||
- end + 1 < isize ||
- data_len > fs_info->max_inline) {
+ data_len > fs_info->max_inline)
return 1;
- }
path = btrfs_alloc_path();
if (!path)
@@ -381,30 +386,20 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
trans->block_rsv = &inode->block_rsv;
drop_args.path = path;
- drop_args.start = start;
- drop_args.end = aligned_end;
+ drop_args.start = 0;
+ drop_args.end = fs_info->sectorsize;
drop_args.drop_cache = true;
drop_args.replace_extent = true;
-
- if (compressed_size && compressed_pages)
- drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
- compressed_size);
- else
- drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
- inline_len);
-
+ drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len);
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- if (isize > actual_end)
- inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
- root, &inode->vfs_inode, start,
- inline_len, compressed_size,
- compress_type, compressed_pages);
+ ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
+ size, compressed_size, compress_type,
+ compressed_pages, update_i_size);
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -413,7 +408,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
goto out;
}
- btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+ btrfs_update_inode_bytes(inode, size, drop_args.bytes_found);
ret = btrfs_update_inode(trans, root, inode);
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
@@ -423,7 +418,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
goto out;
}
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ btrfs_set_inode_full_sync(inode);
out:
/*
* Don't forget to free the reserved space, as for inlined extent
@@ -560,12 +555,12 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
}
static inline void inode_should_defrag(struct btrfs_inode *inode,
- u64 start, u64 end, u64 num_bytes, u64 small_write)
+ u64 start, u64 end, u64 num_bytes, u32 small_write)
{
/* If this is a small write inside eof, kick off a defrag */
if (num_bytes < small_write &&
(start > 0 || end + 1 < inode->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
+ btrfs_add_inode_defrag(NULL, inode, small_write);
}
/*
@@ -624,7 +619,6 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
again:
will_compress = 0;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
- BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
nr_pages = min_t(unsigned long, nr_pages,
BTRFS_MAX_COMPRESSED / PAGE_SIZE);
@@ -735,14 +729,15 @@ cont:
/* we didn't compress the entire range, try
* to make an uncompressed inline extent.
*/
- ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+ ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
0, BTRFS_COMPRESS_NONE,
- NULL);
+ NULL, false);
} else {
/* try making a compressed inline extent */
- ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+ ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
total_compressed,
- compress_type, pages);
+ compress_type, pages,
+ false);
}
if (ret <= 0) {
unsigned long clear_flags = EXTENT_DELALLOC |
@@ -981,11 +976,14 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent_compress(inode, start, /* file_offset */
- ins.objectid, /* disk_bytenr */
- async_extent->ram_size, /* num_bytes */
- ins.offset, /* disk_num_bytes */
- async_extent->compress_type);
+ ret = btrfs_add_ordered_extent(inode, start, /* file_offset */
+ async_extent->ram_size, /* num_bytes */
+ async_extent->ram_size, /* ram_bytes */
+ ins.objectid, /* disk_bytenr */
+ ins.offset, /* disk_num_bytes */
+ 0, /* offset */
+ 1 << BTRFS_ORDERED_COMPRESSED,
+ async_extent->compress_type);
if (ret) {
btrfs_drop_extent_cache(inode, start, end, 0);
goto out_free_reserve;
@@ -1003,7 +1001,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
async_chunk->write_flags,
- async_chunk->blkcg_css)) {
+ async_chunk->blkcg_css, true)) {
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
@@ -1152,9 +1150,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* So here we skip inline extent creation completely.
*/
if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+ u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
+ end + 1);
+
/* lets try to make an inline extent */
- ret = cow_file_range_inline(inode, start, end, 0,
- BTRFS_COMPRESS_NONE, NULL);
+ ret = cow_file_range_inline(inode, actual_end, 0,
+ BTRFS_COMPRESS_NONE, NULL, false);
if (ret == 0) {
/*
* We use DO_ACCOUNTING here because we need the
@@ -1234,9 +1235,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
- ram_size, cur_alloc_size,
- BTRFS_ORDERED_REGULAR);
+ ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
+ ins.objectid, cur_alloc_size, 0,
+ 1 << BTRFS_ORDERED_REGULAR,
+ BTRFS_COMPRESS_NONE);
if (ret)
goto out_drop_extent_cache;
@@ -1895,10 +1897,11 @@ out_check:
goto error;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, cur_offset,
- disk_bytenr, num_bytes,
- num_bytes,
- BTRFS_ORDERED_PREALLOC);
+ ret = btrfs_add_ordered_extent(inode,
+ cur_offset, num_bytes, num_bytes,
+ disk_bytenr, num_bytes, 0,
+ 1 << BTRFS_ORDERED_PREALLOC,
+ BTRFS_COMPRESS_NONE);
if (ret) {
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + num_bytes - 1,
@@ -1907,9 +1910,11 @@ out_check:
}
} else {
ret = btrfs_add_ordered_extent(inode, cur_offset,
+ num_bytes, num_bytes,
disk_bytenr, num_bytes,
- num_bytes,
- BTRFS_ORDERED_NOCOW);
+ 0,
+ 1 << BTRFS_ORDERED_NOCOW,
+ BTRFS_COMPRESS_NONE);
if (ret)
goto error;
}
@@ -2310,7 +2315,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
u64 dio_file_offset)
{
- return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
+ return btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
}
/*
@@ -2538,10 +2543,15 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
goto out;
if (bio_flags & EXTENT_BIO_COMPRESSED) {
+ /*
+ * btrfs_submit_compressed_read will handle completing
+ * the bio if there were any errors, so just return
+ * here.
+ */
ret = btrfs_submit_compressed_read(inode, bio,
mirror_num,
bio_flags);
- goto out;
+ goto out_no_endio;
} else {
/*
* Lookup bio sums does extra checks around whether we
@@ -2562,7 +2572,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
0, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
- ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
if (ret)
goto out;
}
@@ -2575,6 +2585,7 @@ out:
bio->bi_status = ret;
bio_endio(bio);
}
+out_no_endio:
return ret;
}
@@ -2870,6 +2881,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_key ins;
u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
+ u64 offset = btrfs_stack_file_extent_offset(stack_fi);
u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
struct btrfs_drop_extents_args drop_args = { 0 };
@@ -2944,7 +2956,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
goto out;
ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
- file_pos, qgroup_reserved, &ins);
+ file_pos - offset,
+ qgroup_reserved, &ins);
out:
btrfs_free_path(path);
@@ -2970,20 +2983,20 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_extent *oe)
{
struct btrfs_file_extent_item stack_fi;
- u64 logical_len;
bool update_inode_bytes;
+ u64 num_bytes = oe->num_bytes;
+ u64 ram_bytes = oe->ram_bytes;
memset(&stack_fi, 0, sizeof(stack_fi));
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
oe->disk_num_bytes);
+ btrfs_set_stack_file_extent_offset(&stack_fi, oe->offset);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
- logical_len = oe->truncated_len;
- else
- logical_len = oe->num_bytes;
- btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
- btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
+ num_bytes = ram_bytes = oe->truncated_len;
+ btrfs_set_stack_file_extent_num_bytes(&stack_fi, num_bytes);
+ btrfs_set_stack_file_extent_ram_bytes(&stack_fi, ram_bytes);
btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
/* Encryption and other encoding is reserved and all 0 */
@@ -2994,6 +3007,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
* except if the ordered extent was truncated.
*/
update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+ test_bit(BTRFS_ORDERED_ENCODED, &oe->flags) ||
test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
@@ -3028,7 +3042,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
- !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
+ !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags) &&
+ !test_bit(BTRFS_ORDERED_ENCODED, &ordered_extent->flags))
clear_bits |= EXTENT_DELALLOC_NEW;
freespace_inode = btrfs_is_free_space_inode(inode);
@@ -4062,7 +4077,8 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
- const char *name, int name_len)
+ const char *name, int name_len,
+ struct btrfs_rename_ctx *rename_ctx)
{
struct btrfs_root *root = dir->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4118,15 +4134,27 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto err;
}
skip_backref:
+ if (rename_ctx)
+ rename_ctx->index = index;
+
ret = btrfs_delete_delayed_dir_index(trans, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto err;
}
- btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
- dir_ino);
- btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index);
+ /*
+ * If we are in a rename context, we don't need to update anything in the
+ * log. That will be done later during the rename by btrfs_log_new_name().
+ * Besides that, doing it here would only cause extra unncessary btree
+ * operations on the log tree, increasing latency for applications.
+ */
+ if (!rename_ctx) {
+ btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+ dir_ino);
+ btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+ index);
+ }
/*
* If we have a pending delayed iput we could end up with the final iput
@@ -4158,7 +4186,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
const char *name, int name_len)
{
int ret;
- ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len);
+ ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
if (!ret) {
drop_nlink(&inode->vfs_inode);
ret = btrfs_update_inode(trans, inode->root, inode);
@@ -4565,14 +4593,21 @@ out_up_write:
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
int err = 0;
struct btrfs_trans_handle *trans;
u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
- if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) {
+ if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) {
+ btrfs_err(fs_info,
+ "extent tree v2 doesn't support snapshot deletion yet");
+ return -EOPNOTSUPP;
+ }
return btrfs_delete_subvolume(dir, dentry);
+ }
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
@@ -4611,7 +4646,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
}
out:
btrfs_end_transaction(trans);
- btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
+ btrfs_btree_balance_dirty(fs_info);
return err;
}
@@ -4664,7 +4699,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
goto out;
}
}
- ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
+ ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize);
if (ret < 0) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode, data_reserved,
@@ -4876,8 +4911,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
if (!hole_em) {
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &inode->runtime_flags);
+ btrfs_set_inode_full_sync(inode);
goto next;
}
hole_em->start = cur_offset;
@@ -5046,16 +5080,17 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
}
/*
- * While truncating the inode pages during eviction, we get the VFS calling
- * btrfs_invalidatepage() against each page of the inode. This is slow because
- * the calls to btrfs_invalidatepage() result in a huge amount of calls to
- * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
- * extent_state structures over and over, wasting lots of time.
+ * While truncating the inode pages during eviction, we get the VFS
+ * calling btrfs_invalidate_folio() against each folio of the inode. This
+ * is slow because the calls to btrfs_invalidate_folio() result in a
+ * huge amount of calls to lock_extent_bits() and clear_extent_bit(),
+ * which keep merging and splitting extent_state structures over and over,
+ * wasting lots of time.
*
- * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
- * those expensive operations on a per page basis and do only the ordered io
- * finishing, while we release here the extent_map and extent_state structures,
- * without the excessive merging and splitting.
+ * Therefore if the inode is being evicted, let btrfs_invalidate_folio()
+ * skip all those expensive operations on a per folio basis and do only
+ * the ordered io finishing, while we release here the extent_map and
+ * extent_state structures, without the excessive merging and splitting.
*/
static void evict_inode_truncate_pages(struct inode *inode)
{
@@ -5121,7 +5156,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
* If still has DELALLOC flag, the extent didn't reach disk,
* and its reserved space won't be freed by delayed_ref.
* So we need to free its reserved space here.
- * (Refer to comment in btrfs_invalidatepage, case 2)
+ * (Refer to comment in btrfs_invalidate_folio, case 2)
*
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
@@ -5584,21 +5619,17 @@ static struct inode *new_simple_dir(struct super_block *s,
return inode;
}
+static_assert(BTRFS_FT_UNKNOWN == FT_UNKNOWN);
+static_assert(BTRFS_FT_REG_FILE == FT_REG_FILE);
+static_assert(BTRFS_FT_DIR == FT_DIR);
+static_assert(BTRFS_FT_CHRDEV == FT_CHRDEV);
+static_assert(BTRFS_FT_BLKDEV == FT_BLKDEV);
+static_assert(BTRFS_FT_FIFO == FT_FIFO);
+static_assert(BTRFS_FT_SOCK == FT_SOCK);
+static_assert(BTRFS_FT_SYMLINK == FT_SYMLINK);
+
static inline u8 btrfs_inode_type(struct inode *inode)
{
- /*
- * Compile-time asserts that generic FT_* types still match
- * BTRFS_FT_* types
- */
- BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
- BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
- BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
- BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
- BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
- BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
- BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
- BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
-
return fs_umode_to_ftype(inode->i_mode);
}
@@ -5971,14 +6002,8 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
goto out;
ret = 0;
- /*
- * MAGIC NUMBER EXPLANATION:
- * since we search a directory based on f_pos we have to start at 2
- * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
- * else has to start at 2
- */
if (path->slots[0] == 0) {
- inode->index_cnt = 2;
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
goto out;
}
@@ -5989,7 +6014,7 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
if (found_key.objectid != btrfs_ino(inode) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
- inode->index_cnt = 2;
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
goto out;
}
@@ -6140,7 +6165,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
* sync since it will be a full sync anyway and this will blow away the
* old info in the log.
*/
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+ btrfs_set_inode_full_sync(BTRFS_I(inode));
key[0].objectid = objectid;
key[0].type = BTRFS_INODE_ITEM_KEY;
@@ -6537,7 +6562,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
d_instantiate(dentry, inode);
- btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
+ btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
}
fail:
@@ -7040,8 +7065,11 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
if (IS_ERR(em))
goto out;
}
- ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
- block_len, type);
+ ret = btrfs_add_ordered_extent(inode, start, len, len, block_start,
+ block_len, 0,
+ (1 << type) |
+ (1 << BTRFS_ORDERED_DIRECT),
+ BTRFS_COMPRESS_NONE);
if (ret) {
if (em) {
free_extent_map(em);
@@ -7441,7 +7469,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct extent_map *em2;
/* We can NOCOW, so only need to reserve metadata space. */
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len);
if (ret < 0) {
/* Our caller expects us to free the input extent map. */
free_extent_map(em);
@@ -7600,6 +7628,34 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
}
len = min(len, em->len - (start - em->start));
+
+ /*
+ * If we have a NOWAIT request and the range contains multiple extents
+ * (or a mix of extents and holes), then we return -EAGAIN to make the
+ * caller fallback to a context where it can do a blocking (without
+ * NOWAIT) request. This way we avoid doing partial IO and returning
+ * success to the caller, which is not optimal for writes and for reads
+ * it can result in unexpected behaviour for an application.
+ *
+ * When doing a read, because we use IOMAP_DIO_PARTIAL when calling
+ * iomap_dio_rw(), we can end up returning less data then what the caller
+ * asked for, resulting in an unexpected, and incorrect, short read.
+ * That is, the caller asked to read N bytes and we return less than that,
+ * which is wrong unless we are crossing EOF. This happens if we get a
+ * page fault error when trying to fault in pages for the buffer that is
+ * associated to the struct iov_iter passed to iomap_dio_rw(), and we
+ * have previously submitted bios for other extents in the range, in
+ * which case iomap_dio_rw() may return us EIOCBQUEUED if not all of
+ * those bios have completed by the time we get the page fault error,
+ * which we return back to our caller - we should only return EIOCBQUEUED
+ * after we have submitted bios for all the extents in the range.
+ */
+ if ((flags & IOMAP_NOWAIT) && len < length) {
+ free_extent_map(em);
+ ret = -EAGAIN;
+ goto unlock_err;
+ }
+
if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
start, len);
@@ -7803,7 +7859,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio,
u64 dio_file_offset)
{
- return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, 1);
+ return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false);
}
static void btrfs_end_dio_bio(struct bio *bio)
@@ -7860,7 +7916,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
* If we aren't doing async submit, calculate the csum of the
* bio now.
*/
- ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
if (ret)
goto err;
} else {
@@ -8076,8 +8132,13 @@ int btrfs_readpage(struct file *file, struct page *page)
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
- if (bio_ctrl.bio)
- ret = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
+ if (bio_ctrl.bio) {
+ int ret2;
+
+ ret2 = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
+ if (ret == 0)
+ ret = ret2;
+ }
return ret;
}
@@ -8118,8 +8179,8 @@ static void btrfs_readahead(struct readahead_control *rac)
}
/*
- * For releasepage() and invalidatepage() we have a race window where
- * end_page_writeback() is called but the subpage spinlock is not yet released.
+ * For releasepage() and invalidate_folio() we have a race window where
+ * folio_end_writeback() is called but the subpage spinlock is not yet released.
* If we continue to release/invalidate the page, we could cause use-after-free
* for subpage spinlock. So this function is to spin and wait for subpage
* spinlock.
@@ -8195,48 +8256,48 @@ static int btrfs_migratepage(struct address_space *mapping,
}
#endif
-static void btrfs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
- u64 page_start = page_offset(page);
- u64 page_end = page_start + PAGE_SIZE - 1;
+ u64 page_start = folio_pos(folio);
+ u64 page_end = page_start + folio_size(folio) - 1;
u64 cur;
int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
/*
- * We have page locked so no new ordered extent can be created on this
- * page, nor bio can be submitted for this page.
+ * We have folio locked so no new ordered extent can be created on this
+ * page, nor bio can be submitted for this folio.
*
- * But already submitted bio can still be finished on this page.
- * Furthermore, endio function won't skip page which has Ordered
+ * But already submitted bio can still be finished on this folio.
+ * Furthermore, endio function won't skip folio which has Ordered
* (Private2) already cleared, so it's possible for endio and
- * invalidatepage to do the same ordered extent accounting twice
- * on one page.
+ * invalidate_folio to do the same ordered extent accounting twice
+ * on one folio.
*
* So here we wait for any submitted bios to finish, so that we won't
- * do double ordered extent accounting on the same page.
+ * do double ordered extent accounting on the same folio.
*/
- wait_on_page_writeback(page);
- wait_subpage_spinlock(page);
+ folio_wait_writeback(folio);
+ wait_subpage_spinlock(&folio->page);
/*
* For subpage case, we have call sites like
* btrfs_punch_hole_lock_range() which passes range not aligned to
* sectorsize.
- * If the range doesn't cover the full page, we don't need to and
- * shouldn't clear page extent mapped, as page->private can still
+ * If the range doesn't cover the full folio, we don't need to and
+ * shouldn't clear page extent mapped, as folio->private can still
* record subpage dirty bits for other part of the range.
*
- * For cases that can invalidate the full even the range doesn't
- * cover the full page, like invalidating the last page, we're
+ * For cases that invalidate the full folio even the range doesn't
+ * cover the full folio, like invalidating the last folio, we're
* still safe to wait for ordered extent to finish.
*/
if (!(offset == 0 && length == PAGE_SIZE)) {
- btrfs_releasepage(page, GFP_NOFS);
+ btrfs_releasepage(&folio->page, GFP_NOFS);
return;
}
@@ -8277,7 +8338,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
page_end);
ASSERT(range_end + 1 - cur < U32_MAX);
range_len = range_end + 1 - cur;
- if (!btrfs_page_test_ordered(fs_info, page, cur, range_len)) {
+ if (!btrfs_page_test_ordered(fs_info, &folio->page, cur, range_len)) {
/*
* If Ordered (Private2) is cleared, it means endio has
* already been executed for the range.
@@ -8287,7 +8348,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
delete_states = false;
goto next;
}
- btrfs_page_clear_ordered(fs_info, page, cur, range_len);
+ btrfs_page_clear_ordered(fs_info, &folio->page, cur, range_len);
/*
* IO on this page will never be started, so we need to account
@@ -8357,11 +8418,11 @@ next:
* should not have Ordered (Private2) anymore, or the above iteration
* did something wrong.
*/
- ASSERT(!PageOrdered(page));
- btrfs_page_clear_checked(fs_info, page, page_offset(page), PAGE_SIZE);
+ ASSERT(!folio_test_ordered(folio));
+ btrfs_page_clear_checked(fs_info, &folio->page, folio_pos(folio), folio_size(folio));
if (!inode_evicting)
- __btrfs_releasepage(page, GFP_NOFS);
- clear_page_extent_mapped(page);
+ __btrfs_releasepage(&folio->page, GFP_NOFS);
+ clear_page_extent_mapped(&folio->page);
}
/*
@@ -8706,7 +8767,7 @@ out:
* extents beyond i_size to drop.
*/
if (control.extents_found > 0)
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+ btrfs_set_inode_full_sync(BTRFS_I(inode));
return ret;
}
@@ -8759,7 +8820,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
struct btrfs_inode *ei;
struct inode *inode;
- ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, btrfs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
@@ -9002,14 +9063,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
struct timespec64 ctime = current_time(old_inode);
+ struct btrfs_rename_ctx old_rename_ctx;
+ struct btrfs_rename_ctx new_rename_ctx;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
int ret;
int ret2;
- bool root_log_pinned = false;
- bool dest_log_pinned = false;
bool need_abort = false;
/*
@@ -9112,29 +9173,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode), 1);
}
- /*
- * Now pin the logs of the roots. We do it to ensure that no other task
- * can sync the logs while we are in progress with the rename, because
- * that could result in an inconsistency in case any of the inodes that
- * are part of this rename operation were logged before.
- *
- * We pin the logs even if at this precise moment none of the inodes was
- * logged before. This is because right after we checked for that, some
- * other task fsyncing some other inode not involved with this rename
- * operation could log that one of our inodes exists.
- *
- * We don't need to pin the logs before the above calls to
- * btrfs_insert_inode_ref(), since those don't ever need to change a log.
- */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_pin_log_trans(root);
- root_log_pinned = true;
- }
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_pin_log_trans(dest);
- dest_log_pinned = true;
- }
-
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9142,7 +9180,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
old_dentry->d_name.name,
- old_dentry->d_name.len);
+ old_dentry->d_name.len,
+ &old_rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
@@ -9158,7 +9197,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
new_dentry->d_name.name,
- new_dentry->d_name.len);
+ new_dentry->d_name.len,
+ &new_rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
}
@@ -9188,46 +9228,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (new_inode->i_nlink == 1)
BTRFS_I(new_inode)->dir_index = new_idx;
- if (root_log_pinned) {
- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
- new_dentry->d_parent);
- btrfs_end_log_trans(root);
- root_log_pinned = false;
- }
- if (dest_log_pinned) {
- btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
- old_dentry->d_parent);
- btrfs_end_log_trans(dest);
- dest_log_pinned = false;
- }
-out_fail:
/*
- * If we have pinned a log and an error happened, we unpin tasks
- * trying to sync the log and force them to fallback to a transaction
- * commit if the log currently contains any of the inodes involved in
- * this rename operation (to ensure we do not persist a log with an
- * inconsistent state for any of these inodes or leading to any
- * inconsistencies when replayed). If the transaction was aborted, the
- * abortion reason is propagated to userspace when attempting to commit
- * the transaction. If the log does not contain any of these inodes, we
- * allow the tasks to sync it.
+ * Now pin the logs of the roots. We do it to ensure that no other task
+ * can sync the logs while we are in progress with the rename, because
+ * that could result in an inconsistency in case any of the inodes that
+ * are part of this rename operation were logged before.
*/
- if (ret && (root_log_pinned || dest_log_pinned)) {
- if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
- btrfs_set_log_full_commit(trans);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_pin_log_trans(root);
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_pin_log_trans(dest);
- if (root_log_pinned) {
- btrfs_end_log_trans(root);
- root_log_pinned = false;
- }
- if (dest_log_pinned) {
- btrfs_end_log_trans(dest);
- dest_log_pinned = false;
- }
- }
+ /* Do the log updates for all inodes. */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
+ old_rename_ctx.index, new_dentry->d_parent);
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
+ new_rename_ctx.index, old_dentry->d_parent);
+
+ /* Now unpin the logs. */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_end_log_trans(root);
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_end_log_trans(dest);
+out_fail:
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -9302,11 +9327,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
+ struct btrfs_rename_ctx rename_ctx;
u64 index = 0;
int ret;
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
- bool log_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -9411,29 +9436,11 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
- /*
- * Now pin the log. We do it to ensure that no other task can
- * sync the log while we are in progress with the rename, as
- * that could result in an inconsistency in case any of the
- * inodes that are part of this rename operation were logged
- * before.
- *
- * We pin the log even if at this precise moment none of the
- * inodes was logged before. This is because right after we
- * checked for that, some other task fsyncing some other inode
- * not involved with this rename operation could log that one of
- * our inodes exists.
- *
- * We don't need to pin the logs before the above call to
- * btrfs_insert_inode_ref(), since that does not need to change
- * a log.
- */
- btrfs_pin_log_trans(root);
- log_pinned = true;
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
- old_dentry->d_name.len);
+ old_dentry->d_name.len,
+ &rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
@@ -9475,12 +9482,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
- if (log_pinned) {
- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
- new_dentry->d_parent);
- btrfs_end_log_trans(root);
- log_pinned = false;
- }
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
+ rename_ctx.index, new_dentry->d_parent);
if (flags & RENAME_WHITEOUT) {
ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
@@ -9492,28 +9496,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
}
}
out_fail:
- /*
- * If we have pinned the log and an error happened, we unpin tasks
- * trying to sync the log and force them to fallback to a transaction
- * commit if the log currently contains any of the inodes involved in
- * this rename operation (to ensure we do not persist a log with an
- * inconsistent state for any of these inodes or leading to any
- * inconsistencies when replayed). If the transaction was aborted, the
- * abortion reason is propagated to userspace when attempting to commit
- * the transaction. If the log does not contain any of these inodes, we
- * allow the tasks to sync it.
- */
- if (ret && log_pinned) {
- if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
- btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- (new_inode &&
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
- btrfs_set_log_full_commit(trans);
-
- btrfs_end_log_trans(root);
- log_pinned = false;
- }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -9993,8 +9975,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em = alloc_extent_map();
if (!em) {
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ btrfs_set_inode_full_sync(BTRFS_I(inode));
goto next;
}
@@ -10076,11 +10057,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
-static int btrfs_set_page_dirty(struct page *page)
-{
- return __set_page_dirty_nobuffers(page);
-}
-
static int btrfs_permission(struct user_namespace *mnt_userns,
struct inode *inode, int mask)
{
@@ -10182,6 +10158,747 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
}
}
+static int btrfs_encoded_io_compression_from_extent(
+ struct btrfs_fs_info *fs_info,
+ int compress_type)
+{
+ switch (compress_type) {
+ case BTRFS_COMPRESS_NONE:
+ return BTRFS_ENCODED_IO_COMPRESSION_NONE;
+ case BTRFS_COMPRESS_ZLIB:
+ return BTRFS_ENCODED_IO_COMPRESSION_ZLIB;
+ case BTRFS_COMPRESS_LZO:
+ /*
+ * The LZO format depends on the sector size. 64K is the maximum
+ * sector size that we support.
+ */
+ if (fs_info->sectorsize < SZ_4K || fs_info->sectorsize > SZ_64K)
+ return -EINVAL;
+ return BTRFS_ENCODED_IO_COMPRESSION_LZO_4K +
+ (fs_info->sectorsize_bits - 12);
+ case BTRFS_COMPRESS_ZSTD:
+ return BTRFS_ENCODED_IO_COMPRESSION_ZSTD;
+ default:
+ return -EUCLEAN;
+ }
+}
+
+static ssize_t btrfs_encoded_read_inline(
+ struct kiocb *iocb,
+ struct iov_iter *iter, u64 start,
+ u64 lockend,
+ struct extent_state **cached_state,
+ u64 extent_start, size_t count,
+ struct btrfs_ioctl_encoded_io_args *encoded,
+ bool *unlocked)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *item;
+ u64 ram_bytes;
+ unsigned long ptr;
+ void *tmp;
+ ssize_t ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
+ extent_start, 0);
+ if (ret) {
+ if (ret > 0) {
+ /* The extent item disappeared? */
+ ret = -EIO;
+ }
+ goto out;
+ }
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+
+ ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
+ ptr = btrfs_file_extent_inline_start(item);
+
+ encoded->len = min_t(u64, extent_start + ram_bytes,
+ inode->vfs_inode.i_size) - iocb->ki_pos;
+ ret = btrfs_encoded_io_compression_from_extent(fs_info,
+ btrfs_file_extent_compression(leaf, item));
+ if (ret < 0)
+ goto out;
+ encoded->compression = ret;
+ if (encoded->compression) {
+ size_t inline_size;
+
+ inline_size = btrfs_file_extent_inline_item_len(leaf,
+ path->slots[0]);
+ if (inline_size > count) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+ count = inline_size;
+ encoded->unencoded_len = ram_bytes;
+ encoded->unencoded_offset = iocb->ki_pos - extent_start;
+ } else {
+ count = min_t(u64, count, encoded->len);
+ encoded->len = count;
+ encoded->unencoded_len = count;
+ ptr += iocb->ki_pos - extent_start;
+ }
+
+ tmp = kmalloc(count, GFP_NOFS);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ read_extent_buffer(leaf, tmp, ptr, count);
+ btrfs_release_path(path);
+ unlock_extent_cached(io_tree, start, lockend, cached_state);
+ btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+ *unlocked = true;
+
+ ret = copy_to_iter(tmp, count, iter);
+ if (ret != count)
+ ret = -EFAULT;
+ kfree(tmp);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+struct btrfs_encoded_read_private {
+ struct btrfs_inode *inode;
+ u64 file_offset;
+ wait_queue_head_t wait;
+ atomic_t pending;
+ blk_status_t status;
+ bool skip_csum;
+};
+
+static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
+ struct bio *bio, int mirror_num)
+{
+ struct btrfs_encoded_read_private *priv = bio->bi_private;
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ blk_status_t ret;
+
+ if (!priv->skip_csum) {
+ ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
+ if (ret)
+ return ret;
+ }
+
+ ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
+ if (ret) {
+ btrfs_bio_free_csum(bbio);
+ return ret;
+ }
+
+ atomic_inc(&priv->pending);
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
+ if (ret) {
+ atomic_dec(&priv->pending);
+ btrfs_bio_free_csum(bbio);
+ }
+ return ret;
+}
+
+static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
+{
+ const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
+ struct btrfs_encoded_read_private *priv = bbio->bio.bi_private;
+ struct btrfs_inode *inode = priv->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u32 sectorsize = fs_info->sectorsize;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+ u64 start = priv->file_offset;
+ u32 bio_offset = 0;
+
+ if (priv->skip_csum || !uptodate)
+ return bbio->bio.bi_status;
+
+ bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+ unsigned int i, nr_sectors, pgoff;
+
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+ pgoff = bvec->bv_offset;
+ for (i = 0; i < nr_sectors; i++) {
+ ASSERT(pgoff < PAGE_SIZE);
+ if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
+ bvec->bv_page, pgoff, start))
+ return BLK_STS_IOERR;
+ start += sectorsize;
+ bio_offset += sectorsize;
+ pgoff += sectorsize;
+ }
+ }
+ return BLK_STS_OK;
+}
+
+static void btrfs_encoded_read_endio(struct bio *bio)
+{
+ struct btrfs_encoded_read_private *priv = bio->bi_private;
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+ blk_status_t status;
+
+ status = btrfs_encoded_read_verify_csum(bbio);
+ if (status) {
+ /*
+ * The memory barrier implied by the atomic_dec_return() here
+ * pairs with the memory barrier implied by the
+ * atomic_dec_return() or io_wait_event() in
+ * btrfs_encoded_read_regular_fill_pages() to ensure that this
+ * write is observed before the load of status in
+ * btrfs_encoded_read_regular_fill_pages().
+ */
+ WRITE_ONCE(priv->status, status);
+ }
+ if (!atomic_dec_return(&priv->pending))
+ wake_up(&priv->wait);
+ btrfs_bio_free_csum(bbio);
+ bio_put(bio);
+}
+
+static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+ u64 file_offset,
+ u64 disk_bytenr,
+ u64 disk_io_size,
+ struct page **pages)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_encoded_read_private priv = {
+ .inode = inode,
+ .file_offset = file_offset,
+ .pending = ATOMIC_INIT(1),
+ .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
+ };
+ unsigned long i = 0;
+ u64 cur = 0;
+ int ret;
+
+ init_waitqueue_head(&priv.wait);
+ /*
+ * Submit bios for the extent, splitting due to bio or stripe limits as
+ * necessary.
+ */
+ while (cur < disk_io_size) {
+ struct extent_map *em;
+ struct btrfs_io_geometry geom;
+ struct bio *bio = NULL;
+ u64 remaining;
+
+ em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
+ disk_io_size - cur);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ } else {
+ ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
+ disk_bytenr + cur, &geom);
+ free_extent_map(em);
+ }
+ if (ret) {
+ WRITE_ONCE(priv.status, errno_to_blk_status(ret));
+ break;
+ }
+ remaining = min(geom.len, disk_io_size - cur);
+ while (bio || remaining) {
+ size_t bytes = min_t(u64, remaining, PAGE_SIZE);
+
+ if (!bio) {
+ bio = btrfs_bio_alloc(BIO_MAX_VECS);
+ bio->bi_iter.bi_sector =
+ (disk_bytenr + cur) >> SECTOR_SHIFT;
+ bio->bi_end_io = btrfs_encoded_read_endio;
+ bio->bi_private = &priv;
+ bio->bi_opf = REQ_OP_READ;
+ }
+
+ if (!bytes ||
+ bio_add_page(bio, pages[i], bytes, 0) < bytes) {
+ blk_status_t status;
+
+ status = submit_encoded_read_bio(inode, bio, 0);
+ if (status) {
+ WRITE_ONCE(priv.status, status);
+ bio_put(bio);
+ goto out;
+ }
+ bio = NULL;
+ continue;
+ }
+
+ i++;
+ cur += bytes;
+ remaining -= bytes;
+ }
+ }
+
+out:
+ if (atomic_dec_return(&priv.pending))
+ io_wait_event(priv.wait, !atomic_read(&priv.pending));
+ /* See btrfs_encoded_read_endio() for ordering. */
+ return blk_status_to_errno(READ_ONCE(priv.status));
+}
+
+static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
+ struct iov_iter *iter,
+ u64 start, u64 lockend,
+ struct extent_state **cached_state,
+ u64 disk_bytenr, u64 disk_io_size,
+ size_t count, bool compressed,
+ bool *unlocked)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct page **pages;
+ unsigned long nr_pages, i;
+ u64 cur;
+ size_t page_offset;
+ ssize_t ret;
+
+ nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+ for (i = 0; i < nr_pages; i++) {
+ pages[i] = alloc_page(GFP_NOFS);
+ if (!pages[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
+ disk_io_size, pages);
+ if (ret)
+ goto out;
+
+ unlock_extent_cached(io_tree, start, lockend, cached_state);
+ btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+ *unlocked = true;
+
+ if (compressed) {
+ i = 0;
+ page_offset = 0;
+ } else {
+ i = (iocb->ki_pos - start) >> PAGE_SHIFT;
+ page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
+ }
+ cur = 0;
+ while (cur < count) {
+ size_t bytes = min_t(size_t, count - cur,
+ PAGE_SIZE - page_offset);
+
+ if (copy_page_to_iter(pages[i], page_offset, bytes,
+ iter) != bytes) {
+ ret = -EFAULT;
+ goto out;
+ }
+ i++;
+ cur += bytes;
+ page_offset = 0;
+ }
+ ret = count;
+out:
+ for (i = 0; i < nr_pages; i++) {
+ if (pages[i])
+ __free_page(pages[i]);
+ }
+ kfree(pages);
+ return ret;
+}
+
+ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+ struct btrfs_ioctl_encoded_io_args *encoded)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ ssize_t ret;
+ size_t count = iov_iter_count(iter);
+ u64 start, lockend, disk_bytenr, disk_io_size;
+ struct extent_state *cached_state = NULL;
+ struct extent_map *em;
+ bool unlocked = false;
+
+ file_accessed(iocb->ki_filp);
+
+ btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+
+ if (iocb->ki_pos >= inode->vfs_inode.i_size) {
+ btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+ return 0;
+ }
+ start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
+ /*
+ * We don't know how long the extent containing iocb->ki_pos is, but if
+ * it's compressed we know that it won't be longer than this.
+ */
+ lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
+
+ for (;;) {
+ struct btrfs_ordered_extent *ordered;
+
+ ret = btrfs_wait_ordered_range(&inode->vfs_inode, start,
+ lockend - start + 1);
+ if (ret)
+ goto out_unlock_inode;
+ lock_extent_bits(io_tree, start, lockend, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ lockend - start + 1);
+ if (!ordered)
+ break;
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(io_tree, start, lockend, &cached_state);
+ cond_resched();
+ }
+
+ em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out_unlock_extent;
+ }
+
+ if (em->block_start == EXTENT_MAP_INLINE) {
+ u64 extent_start = em->start;
+
+ /*
+ * For inline extents we get everything we need out of the
+ * extent item.
+ */
+ free_extent_map(em);
+ em = NULL;
+ ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
+ &cached_state, extent_start,
+ count, encoded, &unlocked);
+ goto out;
+ }
+
+ /*
+ * We only want to return up to EOF even if the extent extends beyond
+ * that.
+ */
+ encoded->len = min_t(u64, extent_map_end(em),
+ inode->vfs_inode.i_size) - iocb->ki_pos;
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ disk_bytenr = EXTENT_MAP_HOLE;
+ count = min_t(u64, count, encoded->len);
+ encoded->len = count;
+ encoded->unencoded_len = count;
+ } else if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+ disk_bytenr = em->block_start;
+ /*
+ * Bail if the buffer isn't large enough to return the whole
+ * compressed extent.
+ */
+ if (em->block_len > count) {
+ ret = -ENOBUFS;
+ goto out_em;
+ }
+ disk_io_size = count = em->block_len;
+ encoded->unencoded_len = em->ram_bytes;
+ encoded->unencoded_offset = iocb->ki_pos - em->orig_start;
+ ret = btrfs_encoded_io_compression_from_extent(fs_info,
+ em->compress_type);
+ if (ret < 0)
+ goto out_em;
+ encoded->compression = ret;
+ } else {
+ disk_bytenr = em->block_start + (start - em->start);
+ if (encoded->len > count)
+ encoded->len = count;
+ /*
+ * Don't read beyond what we locked. This also limits the page
+ * allocations that we'll do.
+ */
+ disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
+ count = start + disk_io_size - iocb->ki_pos;
+ encoded->len = count;
+ encoded->unencoded_len = count;
+ disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
+ }
+ free_extent_map(em);
+ em = NULL;
+
+ if (disk_bytenr == EXTENT_MAP_HOLE) {
+ unlock_extent_cached(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+ unlocked = true;
+ ret = iov_iter_zero(count, iter);
+ if (ret != count)
+ ret = -EFAULT;
+ } else {
+ ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
+ &cached_state, disk_bytenr,
+ disk_io_size, count,
+ encoded->compression,
+ &unlocked);
+ }
+
+out:
+ if (ret >= 0)
+ iocb->ki_pos += encoded->len;
+out_em:
+ free_extent_map(em);
+out_unlock_extent:
+ if (!unlocked)
+ unlock_extent_cached(io_tree, start, lockend, &cached_state);
+out_unlock_inode:
+ if (!unlocked)
+ btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+ return ret;
+}
+
+ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+ const struct btrfs_ioctl_encoded_io_args *encoded)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct extent_changeset *data_reserved = NULL;
+ struct extent_state *cached_state = NULL;
+ int compression;
+ size_t orig_count;
+ u64 start, end;
+ u64 num_bytes, ram_bytes, disk_num_bytes;
+ unsigned long nr_pages, i;
+ struct page **pages;
+ struct btrfs_key ins;
+ bool extent_reserved = false;
+ struct extent_map *em;
+ ssize_t ret;
+
+ switch (encoded->compression) {
+ case BTRFS_ENCODED_IO_COMPRESSION_ZLIB:
+ compression = BTRFS_COMPRESS_ZLIB;
+ break;
+ case BTRFS_ENCODED_IO_COMPRESSION_ZSTD:
+ compression = BTRFS_COMPRESS_ZSTD;
+ break;
+ case BTRFS_ENCODED_IO_COMPRESSION_LZO_4K:
+ case BTRFS_ENCODED_IO_COMPRESSION_LZO_8K:
+ case BTRFS_ENCODED_IO_COMPRESSION_LZO_16K:
+ case BTRFS_ENCODED_IO_COMPRESSION_LZO_32K:
+ case BTRFS_ENCODED_IO_COMPRESSION_LZO_64K:
+ /* The sector size must match for LZO. */
+ if (encoded->compression -
+ BTRFS_ENCODED_IO_COMPRESSION_LZO_4K + 12 !=
+ fs_info->sectorsize_bits)
+ return -EINVAL;
+ compression = BTRFS_COMPRESS_LZO;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+ return -EINVAL;
+
+ orig_count = iov_iter_count(from);
+
+ /* The extent size must be sane. */
+ if (encoded->unencoded_len > BTRFS_MAX_UNCOMPRESSED ||
+ orig_count > BTRFS_MAX_COMPRESSED || orig_count == 0)
+ return -EINVAL;
+
+ /*
+ * The compressed data must be smaller than the decompressed data.
+ *
+ * It's of course possible for data to compress to larger or the same
+ * size, but the buffered I/O path falls back to no compression for such
+ * data, and we don't want to break any assumptions by creating these
+ * extents.
+ *
+ * Note that this is less strict than the current check we have that the
+ * compressed data must be at least one sector smaller than the
+ * decompressed data. We only want to enforce the weaker requirement
+ * from old kernels that it is at least one byte smaller.
+ */
+ if (orig_count >= encoded->unencoded_len)
+ return -EINVAL;
+
+ /* The extent must start on a sector boundary. */
+ start = iocb->ki_pos;
+ if (!IS_ALIGNED(start, fs_info->sectorsize))
+ return -EINVAL;
+
+ /*
+ * The extent must end on a sector boundary. However, we allow a write
+ * which ends at or extends i_size to have an unaligned length; we round
+ * up the extent size and set i_size to the unaligned end.
+ */
+ if (start + encoded->len < inode->vfs_inode.i_size &&
+ !IS_ALIGNED(start + encoded->len, fs_info->sectorsize))
+ return -EINVAL;
+
+ /* Finally, the offset in the unencoded data must be sector-aligned. */
+ if (!IS_ALIGNED(encoded->unencoded_offset, fs_info->sectorsize))
+ return -EINVAL;
+
+ num_bytes = ALIGN(encoded->len, fs_info->sectorsize);
+ ram_bytes = ALIGN(encoded->unencoded_len, fs_info->sectorsize);
+ end = start + num_bytes - 1;
+
+ /*
+ * If the extent cannot be inline, the compressed data on disk must be
+ * sector-aligned. For convenience, we extend it with zeroes if it
+ * isn't.
+ */
+ disk_num_bytes = ALIGN(orig_count, fs_info->sectorsize);
+ nr_pages = DIV_ROUND_UP(disk_num_bytes, PAGE_SIZE);
+ pages = kvcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
+ if (!pages)
+ return -ENOMEM;
+ for (i = 0; i < nr_pages; i++) {
+ size_t bytes = min_t(size_t, PAGE_SIZE, iov_iter_count(from));
+ char *kaddr;
+
+ pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!pages[i]) {
+ ret = -ENOMEM;
+ goto out_pages;
+ }
+ kaddr = kmap(pages[i]);
+ if (copy_from_iter(kaddr, bytes, from) != bytes) {
+ kunmap(pages[i]);
+ ret = -EFAULT;
+ goto out_pages;
+ }
+ if (bytes < PAGE_SIZE)
+ memset(kaddr + bytes, 0, PAGE_SIZE - bytes);
+ kunmap(pages[i]);
+ }
+
+ for (;;) {
+ struct btrfs_ordered_extent *ordered;
+
+ ret = btrfs_wait_ordered_range(&inode->vfs_inode, start, num_bytes);
+ if (ret)
+ goto out_pages;
+ ret = invalidate_inode_pages2_range(inode->vfs_inode.i_mapping,
+ start >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
+ if (ret)
+ goto out_pages;
+ lock_extent_bits(io_tree, start, end, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);
+ if (!ordered &&
+ !filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))
+ break;
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(io_tree, start, end, &cached_state);
+ cond_resched();
+ }
+
+ /*
+ * We don't use the higher-level delalloc space functions because our
+ * num_bytes and disk_num_bytes are different.
+ */
+ ret = btrfs_alloc_data_chunk_ondemand(inode, disk_num_bytes);
+ if (ret)
+ goto out_unlock;
+ ret = btrfs_qgroup_reserve_data(inode, &data_reserved, start, num_bytes);
+ if (ret)
+ goto out_free_data_space;
+ ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes);
+ if (ret)
+ goto out_qgroup_free_data;
+
+ /* Try an inline extent first. */
+ if (start == 0 && encoded->unencoded_len == encoded->len &&
+ encoded->unencoded_offset == 0) {
+ ret = cow_file_range_inline(inode, encoded->len, orig_count,
+ compression, pages, true);
+ if (ret <= 0) {
+ if (ret == 0)
+ ret = orig_count;
+ goto out_delalloc_release;
+ }
+ }
+
+ ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes,
+ disk_num_bytes, 0, 0, &ins, 1, 1);
+ if (ret)
+ goto out_delalloc_release;
+ extent_reserved = true;
+
+ em = create_io_em(inode, start, num_bytes,
+ start - encoded->unencoded_offset, ins.objectid,
+ ins.offset, ins.offset, ram_bytes, compression,
+ BTRFS_ORDERED_COMPRESSED);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out_free_reserved;
+ }
+ free_extent_map(em);
+
+ ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
+ ins.objectid, ins.offset,
+ encoded->unencoded_offset,
+ (1 << BTRFS_ORDERED_ENCODED) |
+ (1 << BTRFS_ORDERED_COMPRESSED),
+ compression);
+ if (ret) {
+ btrfs_drop_extent_cache(inode, start, end, 0);
+ goto out_free_reserved;
+ }
+ btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+
+ if (start + encoded->len > inode->vfs_inode.i_size)
+ i_size_write(&inode->vfs_inode, start + encoded->len);
+
+ unlock_extent_cached(io_tree, start, end, &cached_state);
+
+ btrfs_delalloc_release_extents(inode, num_bytes);
+
+ if (btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
+ ins.offset, pages, nr_pages, 0, NULL,
+ false)) {
+ btrfs_writepage_endio_finish_ordered(inode, pages[0], start, end, 0);
+ ret = -EIO;
+ goto out_pages;
+ }
+ ret = orig_count;
+ goto out;
+
+out_free_reserved:
+ btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+out_delalloc_release:
+ btrfs_delalloc_release_extents(inode, num_bytes);
+ btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
+out_qgroup_free_data:
+ if (ret < 0)
+ btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
+out_free_data_space:
+ /*
+ * If btrfs_reserve_extent() succeeded, then we already decremented
+ * bytes_may_use.
+ */
+ if (!extent_reserved)
+ btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);
+out_unlock:
+ unlock_extent_cached(io_tree, start, end, &cached_state);
+out_pages:
+ for (i = 0; i < nr_pages; i++) {
+ if (pages[i])
+ __free_page(pages[i]);
+ }
+ kvfree(pages);
+out:
+ if (ret >= 0)
+ iocb->ki_pos += encoded->len;
+ return ret;
+}
+
#ifdef CONFIG_SWAP
/*
* Add an entry indicating a block group or device which is pinned by a
@@ -10638,12 +11355,12 @@ static const struct address_space_operations btrfs_aops = {
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
.direct_IO = noop_direct_IO,
- .invalidatepage = btrfs_invalidatepage,
+ .invalidate_folio = btrfs_invalidate_folio,
.releasepage = btrfs_releasepage,
#ifdef CONFIG_MIGRATION
.migratepage = btrfs_migratepage,
#endif
- .set_page_dirty = btrfs_set_page_dirty,
+ .dirty_folio = filemap_dirty_folio,
.error_remove_page = generic_error_remove_page,
.swap_activate = btrfs_swap_activate,
.swap_deactivate = btrfs_swap_deactivate,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a5bd6926f7ff..238cee5b5254 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -28,6 +28,7 @@
#include <linux/iversion.h>
#include <linux/fileattr.h>
#include <linux/fsverity.h>
+#include <linux/sched/xacct.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -88,6 +89,24 @@ struct btrfs_ioctl_send_args_32 {
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
+
+struct btrfs_ioctl_encoded_io_args_32 {
+ compat_uptr_t iov;
+ compat_ulong_t iovcnt;
+ __s64 offset;
+ __u64 flags;
+ __u64 len;
+ __u64 unencoded_len;
+ __u64 unencoded_offset;
+ __u32 compression;
+ __u32 encryption;
+ __u8 reserved[64];
+};
+
+#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
+ struct btrfs_ioctl_encoded_io_args_32)
+#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
+ struct btrfs_ioctl_encoded_io_args_32)
#endif
/* Mask out flags that are inappropriate for the given type of inode. */
@@ -440,10 +459,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
}
}
-static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
+static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
{
- struct inode *inode = file_inode(file);
-
return put_user(inode->i_generation, arg);
}
@@ -753,6 +770,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_trans_handle *trans;
int ret;
+ /* We do not support snapshotting right now. */
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_warn(fs_info,
+ "extent tree v2 doesn't support snapshotting yet");
+ return -EOPNOTSUPP;
+ }
+
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
@@ -805,10 +829,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto fail;
}
- spin_lock(&fs_info->trans_lock);
- list_add(&pending_snapshot->list,
- &trans->transaction->pending_snapshots);
- spin_unlock(&fs_info->trans_lock);
+ trans->pending_snapshot = pending_snapshot;
ret = btrfs_commit_transaction(trans);
if (ret)
@@ -1015,8 +1036,155 @@ out:
return ret;
}
+/*
+ * Defrag specific helper to get an extent map.
+ *
+ * Differences between this and btrfs_get_extent() are:
+ *
+ * - No extent_map will be added to inode->extent_tree
+ * To reduce memory usage in the long run.
+ *
+ * - Extra optimization to skip file extents older than @newer_than
+ * By using btrfs_search_forward() we can skip entire file ranges that
+ * have extents created in past transactions, because btrfs_search_forward()
+ * will not visit leaves and nodes with a generation smaller than given
+ * minimal generation threshold (@newer_than).
+ *
+ * Return valid em if we find a file extent matching the requirement.
+ * Return NULL if we can not find a file extent matching the requirement.
+ *
+ * Return ERR_PTR() for error.
+ */
+static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
+ u64 start, u64 newer_than)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_path path = { 0 };
+ struct extent_map *em;
+ struct btrfs_key key;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+ em = alloc_extent_map();
+ if (!em) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ if (newer_than) {
+ ret = btrfs_search_forward(root, &key, &path, newer_than);
+ if (ret < 0)
+ goto err;
+ /* Can't find anything newer */
+ if (ret > 0)
+ goto not_found;
+ } else {
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto err;
+ }
+ if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
+ /*
+ * If btrfs_search_slot() makes path to point beyond nritems,
+ * we should not have an empty leaf, as this inode must at
+ * least have its INODE_ITEM.
+ */
+ ASSERT(btrfs_header_nritems(path.nodes[0]));
+ path.slots[0] = btrfs_header_nritems(path.nodes[0]) - 1;
+ }
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ /* Perfect match, no need to go one slot back */
+ if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY &&
+ key.offset == start)
+ goto iterate;
+
+ /* We didn't find a perfect match, needs to go one slot back */
+ if (path.slots[0] > 0) {
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
+ path.slots[0]--;
+ }
+
+iterate:
+ /* Iterate through the path to find a file extent covering @start */
+ while (true) {
+ u64 extent_end;
+
+ if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
+ goto next;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+
+ /*
+ * We may go one slot back to INODE_REF/XATTR item, then
+ * need to go forward until we reach an EXTENT_DATA.
+ * But we should still has the correct ino as key.objectid.
+ */
+ if (WARN_ON(key.objectid < ino) || key.type < BTRFS_EXTENT_DATA_KEY)
+ goto next;
+
+ /* It's beyond our target range, definitely not extent found */
+ if (key.objectid > ino || key.type > BTRFS_EXTENT_DATA_KEY)
+ goto not_found;
+
+ /*
+ * | |<- File extent ->|
+ * \- start
+ *
+ * This means there is a hole between start and key.offset.
+ */
+ if (key.offset > start) {
+ em->start = start;
+ em->orig_start = start;
+ em->block_start = EXTENT_MAP_HOLE;
+ em->len = key.offset - start;
+ break;
+ }
+
+ fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_file_extent_item);
+ extent_end = btrfs_file_extent_end(&path);
+
+ /*
+ * |<- file extent ->| |
+ * \- start
+ *
+ * We haven't reached start, search next slot.
+ */
+ if (extent_end <= start)
+ goto next;
+
+ /* Now this extent covers @start, convert it to em */
+ btrfs_extent_item_to_extent_map(inode, &path, fi, false, em);
+ break;
+next:
+ ret = btrfs_next_item(root, &path);
+ if (ret < 0)
+ goto err;
+ if (ret > 0)
+ goto not_found;
+ }
+ btrfs_release_path(&path);
+ return em;
+
+not_found:
+ btrfs_release_path(&path);
+ free_extent_map(em);
+ return NULL;
+
+err:
+ btrfs_release_path(&path);
+ free_extent_map(em);
+ return ERR_PTR(ret);
+}
+
static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
- bool locked)
+ u64 newer_than, bool locked)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -1031,6 +1199,20 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
em = lookup_extent_mapping(em_tree, start, sectorsize);
read_unlock(&em_tree->lock);
+ /*
+ * We can get a merged extent, in that case, we need to re-search
+ * tree to get the original em for defrag.
+ *
+ * If @newer_than is 0 or em::generation < newer_than, we can trust
+ * this em, as either we don't care about the generation, or the
+ * merged extent map will be rejected anyway.
+ */
+ if (em && test_bit(EXTENT_FLAG_MERGED, &em->flags) &&
+ newer_than && em->generation >= newer_than) {
+ free_extent_map(em);
+ em = NULL;
+ }
+
if (!em) {
struct extent_state *cached = NULL;
u64 end = start + sectorsize - 1;
@@ -1038,7 +1220,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
/* get the big lock and read metadata off disk */
if (!locked)
lock_extent_bits(io_tree, start, end, &cached);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize);
+ em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
if (!locked)
unlock_extent_cached(io_tree, start, end, &cached);
@@ -1049,23 +1231,42 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
return em;
}
+static u32 get_extent_max_capacity(const struct extent_map *em)
+{
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ return BTRFS_MAX_COMPRESSED;
+ return BTRFS_MAX_EXTENT_SIZE;
+}
+
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
bool locked)
{
struct extent_map *next;
- bool ret = true;
+ bool ret = false;
/* this is the last extent */
if (em->start + em->len >= i_size_read(inode))
return false;
- next = defrag_lookup_extent(inode, em->start + em->len, locked);
+ /*
+ * We want to check if the next extent can be merged with the current
+ * one, which can be an extent created in a past generation, so we pass
+ * a minimum generation of 0 to defrag_lookup_extent().
+ */
+ next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
+ /* No more em or hole */
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
- ret = false;
- else if ((em->block_start + em->block_len == next->block_start) &&
- (em->block_len > SZ_128K && next->block_len > SZ_128K))
- ret = false;
-
+ goto out;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags))
+ goto out;
+ /*
+ * If the next extent is at its max capacity, defragging current extent
+ * makes no sense, as the total number of extents won't change.
+ */
+ if (next->len >= get_extent_max_capacity(em))
+ goto out;
+ ret = true;
+out:
free_extent_map(next);
return ret;
}
@@ -1189,8 +1390,10 @@ struct defrag_target_range {
static int defrag_collect_targets(struct btrfs_inode *inode,
u64 start, u64 len, u32 extent_thresh,
u64 newer_than, bool do_compress,
- bool locked, struct list_head *target_list)
+ bool locked, struct list_head *target_list,
+ u64 *last_scanned_ret)
{
+ bool last_is_target = false;
u64 cur = start;
int ret = 0;
@@ -1200,7 +1403,9 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
bool next_mergeable = true;
u64 range_len;
- em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
+ last_is_target = false;
+ em = defrag_lookup_extent(&inode->vfs_inode, cur,
+ newer_than, locked);
if (!em)
break;
@@ -1213,6 +1418,39 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
if (em->generation < newer_than)
goto next;
+ /* This em is under writeback, no need to defrag */
+ if (em->generation == (u64)-1)
+ goto next;
+
+ /*
+ * Our start offset might be in the middle of an existing extent
+ * map, so take that into account.
+ */
+ range_len = em->len - (cur - em->start);
+ /*
+ * If this range of the extent map is already flagged for delalloc,
+ * skip it, because:
+ *
+ * 1) We could deadlock later, when trying to reserve space for
+ * delalloc, because in case we can't immediately reserve space
+ * the flusher can start delalloc and wait for the respective
+ * ordered extents to complete. The deadlock would happen
+ * because we do the space reservation while holding the range
+ * locked, and starting writeback, or finishing an ordered
+ * extent, requires locking the range;
+ *
+ * 2) If there's delalloc there, it means there's dirty pages for
+ * which writeback has not started yet (we clean the delalloc
+ * flag when starting writeback and after creating an ordered
+ * extent). If we mark pages in an adjacent range for defrag,
+ * then we will have a larger contiguous range for delalloc,
+ * very likely resulting in a larger extent after writeback is
+ * triggered (except in a case of free space fragmentation).
+ */
+ if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1,
+ EXTENT_DELALLOC, 0, NULL))
+ goto next;
+
/*
* For do_compress case, we want to compress all valid file
* extents, thus no @extent_thresh or mergeable check.
@@ -1221,7 +1459,14 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
- if (em->len >= extent_thresh)
+ if (range_len >= extent_thresh)
+ goto next;
+
+ /*
+ * Skip extents already at its max capacity, this is mostly for
+ * compressed extents, which max cap is only 128K.
+ */
+ if (em->len >= get_extent_max_capacity(em))
goto next;
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
@@ -1242,6 +1487,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
}
add:
+ last_is_target = true;
range_len = min(extent_map_end(em), start + len) - cur;
/*
* This one is a good target, check if it can be merged into
@@ -1285,10 +1531,22 @@ next:
kfree(entry);
}
}
+ if (!ret && last_scanned_ret) {
+ /*
+ * If the last extent is not a target, the caller can skip to
+ * the end of that extent.
+ * Otherwise, we can only go the end of the specified range.
+ */
+ if (!last_is_target)
+ *last_scanned_ret = max(cur, *last_scanned_ret);
+ else
+ *last_scanned_ret = max(start + len, *last_scanned_ret);
+ }
return ret;
}
#define CLUSTER_SIZE (SZ_256K)
+static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
/*
* Defrag one contiguous target range.
@@ -1343,7 +1601,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
}
static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
- u32 extent_thresh, u64 newer_than, bool do_compress)
+ u32 extent_thresh, u64 newer_than, bool do_compress,
+ u64 *last_scanned_ret)
{
struct extent_state *cached_state = NULL;
struct defrag_target_range *entry;
@@ -1389,7 +1648,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
*/
ret = defrag_collect_targets(inode, start, len, extent_thresh,
newer_than, do_compress, true,
- &target_list);
+ &target_list, last_scanned_ret);
if (ret < 0)
goto unlock_extent;
@@ -1424,7 +1683,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
u64 start, u32 len, u32 extent_thresh,
u64 newer_than, bool do_compress,
unsigned long *sectors_defragged,
- unsigned long max_sectors)
+ unsigned long max_sectors,
+ u64 *last_scanned_ret)
{
const u32 sectorsize = inode->root->fs_info->sectorsize;
struct defrag_target_range *entry;
@@ -1432,24 +1692,34 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
LIST_HEAD(target_list);
int ret;
- BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
ret = defrag_collect_targets(inode, start, len, extent_thresh,
newer_than, do_compress, false,
- &target_list);
+ &target_list, NULL);
if (ret < 0)
goto out;
list_for_each_entry(entry, &target_list, list) {
u32 range_len = entry->len;
- /* Reached the limit */
- if (max_sectors && max_sectors == *sectors_defragged)
+ /* Reached or beyond the limit */
+ if (max_sectors && *sectors_defragged >= max_sectors) {
+ ret = 1;
break;
+ }
if (max_sectors)
range_len = min_t(u32, range_len,
(max_sectors - *sectors_defragged) * sectorsize);
+ /*
+ * If defrag_one_range() has updated last_scanned_ret,
+ * our range may already be invalid (e.g. hole punched).
+ * Skip if our range is before last_scanned_ret, as there is
+ * no need to defrag the range anymore.
+ */
+ if (entry->start + range_len <= *last_scanned_ret)
+ continue;
+
if (ra)
page_cache_sync_readahead(inode->vfs_inode.i_mapping,
ra, NULL, entry->start >> PAGE_SHIFT,
@@ -1462,16 +1732,20 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
* accounting.
*/
ret = defrag_one_range(inode, entry->start, range_len,
- extent_thresh, newer_than, do_compress);
+ extent_thresh, newer_than, do_compress,
+ last_scanned_ret);
if (ret < 0)
break;
- *sectors_defragged += range_len;
+ *sectors_defragged += range_len >>
+ inode->root->fs_info->sectorsize_bits;
}
out:
list_for_each_entry_safe(entry, tmp, &target_list, list) {
list_del_init(&entry->list);
kfree(entry);
}
+ if (ret >= 0)
+ *last_scanned_ret = max(*last_scanned_ret, start + len);
return ret;
}
@@ -1484,6 +1758,12 @@ out:
* @newer_than: minimum transid to defrag
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
* will be defragged.
+ *
+ * Return <0 for error.
+ * Return >=0 for the number of sectors defragged, and range->start will be updated
+ * to indicate the file offset where next defrag should be started at.
+ * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without
+ * defragging all the range).
*/
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
@@ -1499,6 +1779,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
int compress_type = BTRFS_COMPRESS_ZLIB;
int ret = 0;
u32 extent_thresh = range->extent_thresh;
+ pgoff_t start_index;
if (isize == 0)
return 0;
@@ -1518,12 +1799,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
if (range->start + range->len > range->start) {
/* Got a specific range */
- last_byte = min(isize, range->start + range->len) - 1;
+ last_byte = min(isize, range->start + range->len);
} else {
/* Defrag until file end */
- last_byte = isize - 1;
+ last_byte = isize;
}
+ /* Align the range */
+ cur = round_down(range->start, fs_info->sectorsize);
+ last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+
/*
* If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so
@@ -1536,15 +1821,23 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
file_ra_state_init(ra, inode->i_mapping);
}
- /* Align the range */
- cur = round_down(range->start, fs_info->sectorsize);
- last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+ /*
+ * Make writeback start from the beginning of the range, so that the
+ * defrag range can be written sequentially.
+ */
+ start_index = cur >> PAGE_SHIFT;
+ if (start_index < inode->i_mapping->writeback_index)
+ inode->i_mapping->writeback_index = start_index;
while (cur < last_byte) {
+ const unsigned long prev_sectors_defragged = sectors_defragged;
+ u64 last_scanned = cur;
u64 cluster_end;
- /* The cluster size 256K should always be page aligned */
- BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
+ if (btrfs_defrag_cancelled(fs_info)) {
+ ret = -EAGAIN;
+ break;
+ }
/* We want the cluster end at page boundary when possible */
cluster_end = (((cur >> PAGE_SHIFT) +
@@ -1565,16 +1858,30 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
BTRFS_I(inode)->defrag_compress = compress_type;
ret = defrag_one_cluster(BTRFS_I(inode), ra, cur,
cluster_end + 1 - cur, extent_thresh,
- newer_than, do_compress,
- &sectors_defragged, max_to_defrag);
+ newer_than, do_compress, &sectors_defragged,
+ max_to_defrag, &last_scanned);
+
+ if (sectors_defragged > prev_sectors_defragged)
+ balance_dirty_pages_ratelimited(inode->i_mapping);
+
btrfs_inode_unlock(inode, 0);
if (ret < 0)
break;
- cur = cluster_end + 1;
+ cur = max(cluster_end + 1, last_scanned);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ cond_resched();
}
if (ra_allocated)
kfree(ra);
+ /*
+ * Update range.start for autodefrag, this will indicate where to start
+ * in next run.
+ */
+ range->start = cur;
if (sectors_defragged) {
/*
* We have defragged some sectors, for compression case they
@@ -1943,10 +2250,9 @@ free_args:
return ret;
}
-static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
void __user *arg)
{
- struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
@@ -2276,12 +2582,11 @@ err:
return ret;
}
-static noinline int btrfs_ioctl_tree_search(struct file *file,
- void __user *argp)
+static noinline int btrfs_ioctl_tree_search(struct inode *inode,
+ void __user *argp)
{
struct btrfs_ioctl_search_args __user *uargs;
struct btrfs_ioctl_search_key sk;
- struct inode *inode;
int ret;
size_t buf_size;
@@ -2295,7 +2600,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
buf_size = sizeof(uargs->buf);
- inode = file_inode(file);
ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
/*
@@ -2310,12 +2614,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
return ret;
}
-static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
void __user *argp)
{
struct btrfs_ioctl_search_args_v2 __user *uarg;
struct btrfs_ioctl_search_args_v2 args;
- struct inode *inode;
int ret;
size_t buf_size;
const size_t buf_limit = SZ_16M;
@@ -2334,7 +2637,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
if (buf_size > buf_limit)
buf_size = buf_limit;
- inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size,
(char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
@@ -2585,25 +2887,22 @@ out:
return ret;
}
-static noinline int btrfs_ioctl_ino_lookup(struct file *file,
+static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
void __user *argp)
{
struct btrfs_ioctl_ino_lookup_args *args;
- struct inode *inode;
int ret = 0;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
return PTR_ERR(args);
- inode = file_inode(file);
-
/*
* Unprivileged query to obtain the containing subvolume root id. The
* path is reset so it's consistent with btrfs_search_path_in_tree.
*/
if (args->treeid == 0)
- args->treeid = BTRFS_I(inode)->root->root_key.objectid;
+ args->treeid = root->root_key.objectid;
if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
args->name[0] = 0;
@@ -2615,7 +2914,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
goto out;
}
- ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
+ ret = btrfs_search_path_in_tree(root->fs_info,
args->treeid, args->objectid,
args->name);
@@ -2671,7 +2970,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
}
/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
-static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
{
struct btrfs_ioctl_get_subvol_info_args *subvol_info;
struct btrfs_fs_info *fs_info;
@@ -2683,7 +2982,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
struct extent_buffer *leaf;
unsigned long item_off;
unsigned long item_len;
- struct inode *inode;
int slot;
int ret = 0;
@@ -2697,7 +2995,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
return -ENOMEM;
}
- inode = file_inode(file);
fs_info = BTRFS_I(inode)->root->fs_info;
/* Get root_item of inode's subvolume */
@@ -2791,15 +3088,14 @@ out_free:
* Return ROOT_REF information of the subvolume containing this inode
* except the subvolume name.
*/
-static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
+ void __user *argp)
{
struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
struct btrfs_root_ref *rref;
- struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *leaf;
- struct inode *inode;
u64 objectid;
int slot;
int ret;
@@ -2815,15 +3111,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
return PTR_ERR(rootrefs);
}
- inode = file_inode(file);
- root = BTRFS_I(inode)->root->fs_info->tree_root;
- objectid = BTRFS_I(inode)->root->root_key.objectid;
-
+ objectid = root->root_key.objectid;
key.objectid = objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = rootrefs->min_treeid;
found = 0;
+ root = root->fs_info->tree_root;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
goto out;
@@ -2903,6 +3197,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int err = 0;
bool destroy_parent = false;
+ /* We don't support snapshots with extent tree v2 yet. */
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info,
+ "extent tree v2 doesn't support snapshot deletion yet");
+ return -EOPNOTSUPP;
+ }
+
if (destroy_v2) {
vol_args2 = memdup_user(arg, sizeof(*vol_args2));
if (IS_ERR(vol_args2))
@@ -3086,10 +3387,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
btrfs_inode_lock(inode, 0);
err = btrfs_delete_subvolume(dir, dentry);
btrfs_inode_unlock(inode, 0);
- if (!err) {
- fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
- }
+ if (!err)
+ d_delete_notify(dir, dentry);
out_dput:
dput(dentry);
@@ -3180,6 +3479,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
+ return -EINVAL;
+ }
+
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@@ -3290,7 +3594,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
struct block_device *bdev = NULL;
fmode_t mode;
int ret;
- bool cancel;
+ bool cancel = false;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -3705,6 +4009,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
+ return -EINVAL;
+ }
+
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
@@ -3804,6 +4113,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
+ return -EINVAL;
+ }
+
p = memdup_user(arg, sizeof(*p));
if (IS_ERR(p))
return PTR_ERR(p);
@@ -4865,7 +5179,7 @@ out_drop_write:
return ret;
}
-static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
+static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
@@ -4895,11 +5209,194 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (IS_ERR(arg))
return PTR_ERR(arg);
}
- ret = btrfs_ioctl_send(file, arg);
+ ret = btrfs_ioctl_send(inode, arg);
kfree(arg);
return ret;
}
+static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
+ bool compat)
+{
+ struct btrfs_ioctl_encoded_io_args args = { 0 };
+ size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
+ flags);
+ size_t copy_end;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ loff_t pos;
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+
+ if (compat) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
+ flags);
+ if (copy_from_user(&args32, argp, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ args.iov = compat_ptr(args32.iov);
+ args.iovcnt = args32.iovcnt;
+ args.offset = args32.offset;
+ args.flags = args32.flags;
+#else
+ return -ENOTTY;
+#endif
+ } else {
+ copy_end = copy_end_kernel;
+ if (copy_from_user(&args, argp, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+ if (args.flags != 0) {
+ ret = -EINVAL;
+ goto out_acct;
+ }
+
+ ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&iter) == 0) {
+ ret = 0;
+ goto out_iov;
+ }
+ pos = args.offset;
+ ret = rw_verify_area(READ, file, &pos, args.len);
+ if (ret < 0)
+ goto out_iov;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = pos;
+
+ ret = btrfs_encoded_read(&kiocb, &iter, &args);
+ if (ret >= 0) {
+ fsnotify_access(file);
+ if (copy_to_user(argp + copy_end,
+ (char *)&args + copy_end_kernel,
+ sizeof(args) - copy_end_kernel))
+ ret = -EFAULT;
+ }
+
+out_iov:
+ kfree(iov);
+out_acct:
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+ return ret;
+}
+
+static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
+{
+ struct btrfs_ioctl_encoded_io_args args;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ loff_t pos;
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+
+ if (!(file->f_mode & FMODE_WRITE)) {
+ ret = -EBADF;
+ goto out_acct;
+ }
+
+ if (compat) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ if (copy_from_user(&args32, argp, sizeof(args32))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ args.iov = compat_ptr(args32.iov);
+ args.iovcnt = args32.iovcnt;
+ args.offset = args32.offset;
+ args.flags = args32.flags;
+ args.len = args32.len;
+ args.unencoded_len = args32.unencoded_len;
+ args.unencoded_offset = args32.unencoded_offset;
+ args.compression = args32.compression;
+ args.encryption = args32.encryption;
+ memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
+#else
+ return -ENOTTY;
+#endif
+ } else {
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ ret = -EINVAL;
+ if (args.flags != 0)
+ goto out_acct;
+ if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
+ goto out_acct;
+ if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+ args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+ goto out_acct;
+ if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+ args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+ goto out_acct;
+ if (args.unencoded_offset > args.unencoded_len)
+ goto out_acct;
+ if (args.len > args.unencoded_len - args.unencoded_offset)
+ goto out_acct;
+
+ ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret < 0)
+ goto out_acct;
+
+ file_start_write(file);
+
+ if (iov_iter_count(&iter) == 0) {
+ ret = 0;
+ goto out_end_write;
+ }
+ pos = args.offset;
+ ret = rw_verify_area(WRITE, file, &pos, args.len);
+ if (ret < 0)
+ goto out_end_write;
+
+ init_sync_kiocb(&kiocb, file);
+ ret = kiocb_set_rw_flags(&kiocb, 0);
+ if (ret)
+ goto out_end_write;
+ kiocb.ki_pos = pos;
+
+ ret = btrfs_do_write_iter(&kiocb, &iter, &args);
+ if (ret > 0)
+ fsnotify_modify(file);
+
+out_end_write:
+ file_end_write(file);
+ kfree(iov);
+out_acct:
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ return ret;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -4910,7 +5407,7 @@ long btrfs_ioctl(struct file *file, unsigned int
switch (cmd) {
case FS_IOC_GETVERSION:
- return btrfs_ioctl_getversion(file, argp);
+ return btrfs_ioctl_getversion(inode, argp);
case FS_IOC_GETFSLABEL:
return btrfs_ioctl_get_fslabel(fs_info, argp);
case FS_IOC_SETFSLABEL:
@@ -4930,7 +5427,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SNAP_DESTROY_V2:
return btrfs_ioctl_snap_destroy(file, argp, true);
case BTRFS_IOC_SUBVOL_GETFLAGS:
- return btrfs_ioctl_subvol_getflags(file, argp);
+ return btrfs_ioctl_subvol_getflags(inode, argp);
case BTRFS_IOC_SUBVOL_SETFLAGS:
return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -4954,11 +5451,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TREE_SEARCH:
- return btrfs_ioctl_tree_search(file, argp);
+ return btrfs_ioctl_tree_search(inode, argp);
case BTRFS_IOC_TREE_SEARCH_V2:
- return btrfs_ioctl_tree_search_v2(file, argp);
+ return btrfs_ioctl_tree_search_v2(inode, argp);
case BTRFS_IOC_INO_LOOKUP:
- return btrfs_ioctl_ino_lookup(file, argp);
+ return btrfs_ioctl_ino_lookup(root, argp);
case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO:
@@ -5005,10 +5502,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
case BTRFS_IOC_SEND:
- return _btrfs_ioctl_send(file, argp, false);
+ return _btrfs_ioctl_send(inode, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
- return _btrfs_ioctl_send(file, argp, true);
+ return _btrfs_ioctl_send(inode, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
@@ -5035,15 +5532,25 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
case BTRFS_IOC_GET_SUBVOL_INFO:
- return btrfs_ioctl_get_subvol_info(file, argp);
+ return btrfs_ioctl_get_subvol_info(inode, argp);
case BTRFS_IOC_GET_SUBVOL_ROOTREF:
- return btrfs_ioctl_get_subvol_rootref(file, argp);
+ return btrfs_ioctl_get_subvol_rootref(root, argp);
case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp);
case FS_IOC_ENABLE_VERITY:
return fsverity_ioctl_enable(file, (const void __user *)argp);
case FS_IOC_MEASURE_VERITY:
return fsverity_ioctl_measure(file, argp);
+ case BTRFS_IOC_ENCODED_READ:
+ return btrfs_ioctl_encoded_read(file, argp, false);
+ case BTRFS_IOC_ENCODED_WRITE:
+ return btrfs_ioctl_encoded_write(file, argp, false);
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_READ_32:
+ return btrfs_ioctl_encoded_read(file, argp, true);
+ case BTRFS_IOC_ENCODED_WRITE_32:
+ return btrfs_ioctl_encoded_write(file, argp, true);
+#endif
}
return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0fb90cbe7669..430ad36b8b08 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -55,6 +55,9 @@
* 0x1000 | SegHdr N+1| Data payload N+1 ... |
*/
+#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
+#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
+
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
- workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
+ workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
+ workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@@ -380,6 +383,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
kunmap(cur_page);
cur_in += LZO_LEN;
+ if (seg_len > WORKSPACE_CBUF_LENGTH) {
+ /*
+ * seg_len shouldn't be larger than we have allocated
+ * for workspace->cbuf
+ */
+ btrfs_err(fs_info, "unexpectedly large lzo segment len %u",
+ seg_len);
+ ret = -EIO;
+ goto out;
+ }
+
/* Copy the compressed segment payload into workspace */
copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
@@ -422,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
- size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
+ size_t max_segment_len = WORKSPACE_BUF_LENGTH;
int ret = 0;
char *kaddr;
unsigned long bytes;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6b51fd2ec5ac..1957b14b329a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
-/*
- * Allocate and add a new ordered_extent into the per-inode tree.
+/**
+ * Add an ordered extent to the per-inode tree.
+ *
+ * @inode: Inode that this extent is for.
+ * @file_offset: Logical offset in file where the extent starts.
+ * @num_bytes: Logical length of extent in file.
+ * @ram_bytes: Full length of unencoded data.
+ * @disk_bytenr: Offset of extent on disk.
+ * @disk_num_bytes: Size of extent on disk.
+ * @offset: Offset into unencoded data where file data starts.
+ * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @compress_type: Compression algorithm used for data.
*
- * The tree is given a single reference on the ordered extent that was
- * inserted.
+ * Most of these parameters correspond to &struct btrfs_file_extent_item. The
+ * tree is given a single reference on the ordered extent that was inserted.
+ *
+ * Return: 0 or -ENOMEM.
*/
-static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int type, int dio,
- int compress_type)
+int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+ u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+ u64 disk_num_bytes, u64 offset, unsigned flags,
+ int compress_type)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
struct btrfs_ordered_extent *entry;
int ret;
- if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
+ if (flags &
+ ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
@@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return -ENOMEM;
entry->file_offset = file_offset;
- entry->disk_bytenr = disk_bytenr;
entry->num_bytes = num_bytes;
+ entry->ram_bytes = ram_bytes;
+ entry->disk_bytenr = disk_bytenr;
entry->disk_num_bytes = disk_num_bytes;
+ entry->offset = offset;
entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
@@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
- ASSERT(type == BTRFS_ORDERED_REGULAR ||
- type == BTRFS_ORDERED_NOCOW ||
- type == BTRFS_ORDERED_PREALLOC ||
- type == BTRFS_ORDERED_COMPRESSED);
- set_bit(type, &entry->flags);
+ ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
+ entry->flags = flags;
percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
fs_info->delalloc_batch);
- if (dio)
- set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
-
/* one ref for the tree */
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
@@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return 0;
}
-int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
- int type)
-{
- ASSERT(type == BTRFS_ORDERED_REGULAR ||
- type == BTRFS_ORDERED_NOCOW ||
- type == BTRFS_ORDERED_PREALLOC);
- return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
- num_bytes, disk_num_bytes, type, 0,
- BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int type)
-{
- ASSERT(type == BTRFS_ORDERED_REGULAR ||
- type == BTRFS_ORDERED_NOCOW ||
- type == BTRFS_ORDERED_PREALLOC);
- return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
- num_bytes, disk_num_bytes, type, 1,
- BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int compress_type)
-{
- ASSERT(compress_type != BTRFS_COMPRESS_NONE);
- return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
- num_bytes, disk_num_bytes,
- BTRFS_ORDERED_COMPRESSED, 0,
- compress_type);
-}
-
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one
@@ -548,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
- if (root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
- false);
+ if (root != fs_info->tree_root) {
+ u64 release;
+
+ if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
+ release = entry->disk_num_bytes;
+ else
+ release = entry->num_bytes;
+ btrfs_delalloc_release_metadata(btrfs_inode, release, false);
+ }
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
fs_info->delalloc_batch);
@@ -1052,42 +1032,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 file_offset = ordered->file_offset + pos;
u64 disk_bytenr = ordered->disk_bytenr + pos;
- u64 num_bytes = len;
- u64 disk_num_bytes = len;
- int type;
- unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
- int compress_type = ordered->compress_type;
- unsigned long weight;
- int ret;
-
- weight = hweight_long(flags_masked);
- WARN_ON_ONCE(weight > 1);
- if (!weight)
- type = 0;
- else
- type = __ffs(flags_masked);
+ unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
/*
- * The splitting extent is already counted and will be added again
- * in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
- * double counting.
+ * The splitting extent is already counted and will be added again in
+ * btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
*/
- percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
+ percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
fs_info->delalloc_batch);
- if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
- WARN_ON_ONCE(1);
- ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
- file_offset, disk_bytenr, num_bytes,
- disk_num_bytes, compress_type);
- } else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
- ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
- disk_bytenr, num_bytes, disk_num_bytes, type);
- } else {
- ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
- disk_bytenr, num_bytes, disk_num_bytes, type);
- }
-
- return ret;
+ WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
+ return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
+ disk_bytenr, len, 0, flags,
+ ordered->compress_type);
}
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4194e960ff61..ecad67a2c745 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -74,8 +74,18 @@ enum {
BTRFS_ORDERED_LOGGED_CSUM,
/* We wait for this extent to complete in the current transaction */
BTRFS_ORDERED_PENDING,
+ /* BTRFS_IOC_ENCODED_WRITE */
+ BTRFS_ORDERED_ENCODED,
};
+/* BTRFS_ORDERED_* flags that specify the type of the extent. */
+#define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) | \
+ (1UL << BTRFS_ORDERED_NOCOW) | \
+ (1UL << BTRFS_ORDERED_PREALLOC) | \
+ (1UL << BTRFS_ORDERED_COMPRESSED) | \
+ (1UL << BTRFS_ORDERED_DIRECT) | \
+ (1UL << BTRFS_ORDERED_ENCODED))
+
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@@ -84,9 +94,11 @@ struct btrfs_ordered_extent {
* These fields directly correspond to the same fields in
* btrfs_file_extent_item.
*/
- u64 disk_bytenr;
u64 num_bytes;
+ u64 ram_bytes;
+ u64 disk_bytenr;
u64 disk_num_bytes;
+ u64 offset;
/* number of bytes that still need writing */
u64 bytes_left;
@@ -179,14 +191,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
- int type);
-int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int type);
-int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
- u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int compress_type);
+ u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+ u64 disk_num_bytes, u64 offset, unsigned flags,
+ int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0775ae9f4419..dd8777872143 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
{ BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" },
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" },
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },
+ { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" },
};
@@ -391,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_header_owner(c),
btrfs_node_ptr_generation(c, i),
level - 1, &first_key);
- if (IS_ERR(next)) {
+ if (IS_ERR(next))
continue;
- } else if (!extent_buffer_uptodate(next)) {
+ if (!extent_buffer_uptodate(next)) {
free_extent_buffer(next);
continue;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8928275823a1..1866b1f0da01 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -25,18 +25,6 @@
#include "sysfs.h"
#include "tree-mod-log.h"
-/* TODO XXX FIXME
- * - subvol delete -> delete when ref goes to 0? delete limits also?
- * - reorganize keys
- * - compressed
- * - sync
- * - copy also limits on subvol creation
- * - limit
- * - caches for ulists
- * - performance benchmarks
- * - check all ioctl parameters
- */
-
/*
* Helpers to access qgroup reservation
*
@@ -258,16 +246,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
return 0;
}
-/* must be called with qgroup_lock held */
-static int add_relation_rb(struct btrfs_fs_info *fs_info,
- u64 memberid, u64 parentid)
+/*
+ * Add relation specified by two qgroups.
+ *
+ * Must be called with qgroup_lock held.
+ *
+ * Return: 0 on success
+ * -ENOENT if one of the qgroups is NULL
+ * <0 other errors
+ */
+static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
{
- struct btrfs_qgroup *member;
- struct btrfs_qgroup *parent;
struct btrfs_qgroup_list *list;
- member = find_qgroup_rb(fs_info, memberid);
- parent = find_qgroup_rb(fs_info, parentid);
if (!member || !parent)
return -ENOENT;
@@ -283,7 +274,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
return 0;
}
-/* must be called with qgroup_lock held */
+/*
+ * Add relation specified by two qgoup ids.
+ *
+ * Must be called with qgroup_lock held.
+ *
+ * Return: 0 on success
+ * -ENOENT if one of the ids does not exist
+ * <0 other errors
+ */
+static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
+{
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup *parent;
+
+ member = find_qgroup_rb(fs_info, memberid);
+ parent = find_qgroup_rb(fs_info, parentid);
+
+ return __add_relation_rb(member, parent);
+}
+
+/* Must be called with qgroup_lock held */
static int del_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid)
{
@@ -948,6 +959,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
*/
lockdep_assert_held_write(&fs_info->subvol_sem);
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info,
+ "qgroups are currently unsupported in extent tree v2");
+ return -EINVAL;
+ }
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root)
goto out;
@@ -1185,12 +1202,34 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
struct btrfs_trans_handle *trans = NULL;
int ret = 0;
+ /*
+ * We need to have subvol_sem write locked, to prevent races between
+ * concurrent tasks trying to disable quotas, because we will unlock
+ * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+ */
+ lockdep_assert_held_write(&fs_info->subvol_sem);
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
+
+ /*
+ * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to
+ * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs
+ * to lock that mutex while holding a transaction handle and the rescan
+ * worker needs to commit a transaction.
+ */
mutex_unlock(&fs_info->qgroup_ioctl_lock);
/*
+ * Request qgroup rescan worker to complete and wait for it. This wait
+ * must be done before transaction start for quota disable since it may
+ * deadlock with transaction by the qgroup rescan worker.
+ */
+ clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
+
+ /*
* 1 For the root item
*
* We should also reserve enough items for the quota tree deletion in
@@ -1205,14 +1244,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
+ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
goto out;
}
if (!fs_info->quota_root)
goto out;
- clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
@@ -1430,7 +1468,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
}
spin_lock(&fs_info->qgroup_lock);
- ret = add_relation_rb(fs_info, src, dst);
+ ret = __add_relation_rb(member, parent);
if (ret < 0) {
spin_unlock(&fs_info->qgroup_lock);
goto out;
@@ -3247,7 +3285,8 @@ out:
static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
{
return btrfs_fs_closing(fs_info) ||
- test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+ test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
+ !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
}
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@@ -3277,11 +3316,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
err = PTR_ERR(trans);
break;
}
- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- err = -EINTR;
- } else {
- err = qgroup_rescan_leaf(trans, path);
- }
+
+ err = qgroup_rescan_leaf(trans, path);
+
if (err > 0)
btrfs_commit_transaction(trans);
else
@@ -3295,7 +3332,7 @@ out:
if (err > 0 &&
fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- } else if (err < 0) {
+ } else if (err < 0 || stopped) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -3383,6 +3420,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
btrfs_warn(fs_info,
"qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
+ } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+ /* Quota disable is in progress */
+ ret = -EBUSY;
}
if (ret) {
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index a3930da4eb3f..04a88bfe4fcf 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -277,7 +277,7 @@ copy_inline_extent:
path->slots[0]),
size);
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
+ btrfs_set_inode_full_sync(BTRFS_I(dst));
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
out:
if (!ret && !trans) {
@@ -494,7 +494,8 @@ process_slot:
&clone_info, &trans);
if (ret)
goto out;
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+ } else {
+ ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
/*
* Inline extents always have to start at file offset 0
* and can never be bigger then the sector size. We can
@@ -505,8 +506,12 @@ process_slot:
*/
ASSERT(key.offset == 0);
ASSERT(datal <= fs_info->sectorsize);
- if (key.offset != 0 || datal > fs_info->sectorsize)
- return -EUCLEAN;
+ if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
+ WARN_ON(key.offset != 0) ||
+ WARN_ON(datal > fs_info->sectorsize)) {
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = clone_copy_inline_extent(inode, path, &new_key,
drop_start, datal, size,
@@ -518,17 +523,22 @@ process_slot:
btrfs_release_path(path);
/*
- * If this is a new extent update the last_reflink_trans of both
- * inodes. This is used by fsync to make sure it does not log
- * multiple checksum items with overlapping ranges. For older
- * extents we don't need to do it since inode logging skips the
- * checksums for older extents. Also ignore holes and inline
- * extents because they don't have checksums in the csum tree.
+ * Whenever we share an extent we update the last_reflink_trans
+ * of each inode to the current transaction. This is needed to
+ * make sure fsync does not log multiple checksum items with
+ * overlapping ranges (because some extent items might refer
+ * only to sections of the original extent). For the destination
+ * inode we do this regardless of the generation of the extents
+ * or even if they are inline extents or explicit holes, to make
+ * sure a full fsync does not skip them. For the source inode,
+ * we only need to update last_reflink_trans in case it's a new
+ * extent that is not a hole or an inline extent, to deal with
+ * the checksums problem on fsync.
*/
- if (extent_gen == trans->transid && disko > 0) {
+ if (extent_gen == trans->transid && disko > 0)
BTRFS_I(src)->last_reflink_trans = trans->transid;
- BTRFS_I(inode)->last_reflink_trans = trans->transid;
- }
+
+ BTRFS_I(inode)->last_reflink_trans = trans->transid;
last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize);
@@ -575,8 +585,7 @@ process_slot:
* replaced file extent items.
*/
if (last_dest_end >= i_size_read(inode))
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ btrfs_set_inode_full_sync(BTRFS_I(inode));
ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
last_dest_end, destoff + len - 1, NULL, &trans);
@@ -772,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (btrfs_root_readonly(root_out))
return -EROFS;
- if (file_in->f_path.mnt != file_out->f_path.mnt ||
- inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
+ ASSERT(inode_in->i_sb == inode_out->i_sb);
}
/* Don't make the dst file partly checksummed */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f5465197996d..fdc2c4b411f0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
eb = read_tree_block(fs_info, block->bytenr, block->owner,
block->key.offset, block->level, NULL);
- if (IS_ERR(eb)) {
+ if (IS_ERR(eb))
return PTR_ERR(eb);
- } else if (!extent_buffer_uptodate(eb)) {
+ if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
@@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
/* Reserve metadata for this range */
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
- clamped_len);
+ clamped_len, clamped_len);
if (ret)
goto release_page;
@@ -3960,6 +3960,19 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
int rw = 0;
int err = 0;
+ /*
+ * This only gets set if we had a half-deleted snapshot on mount. We
+ * cannot allow relocation to start while we're still trying to clean up
+ * these pending deletions.
+ */
+ ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
+ if (ret)
+ return ret;
+
+ /* We may have been woken up by close_ctree, so bail if we're closing. */
+ if (btrfs_fs_closing(fs_info))
+ return -EINTR;
+
bg = btrfs_lookup_block_group(fs_info, group_start);
if (!bg)
return -ENOENT;
@@ -4110,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
* this function resumes merging reloc trees with corresponding fs trees.
* this is important for keeping the sharing of tree blocks
*/
-int btrfs_recover_relocation(struct btrfs_root *root)
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
LIST_HEAD(reloc_roots);
struct btrfs_key key;
struct btrfs_root *fs_root;
@@ -4153,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
key.type != BTRFS_ROOT_ITEM_KEY)
break;
- reloc_root = btrfs_read_tree_root(root, &key);
+ reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root);
goto out;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 3d68d2dcd83e..ca7426ef61c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -278,6 +278,21 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state));
if (btrfs_root_refs(&root->root_item) == 0) {
+ struct btrfs_key drop_key;
+
+ btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
+ /*
+ * If we have a non-zero drop_progress then we know we
+ * made it partly through deleting this snapshot, and
+ * thus we need to make sure we block any balance from
+ * happening until this snapshot is completely dropped.
+ */
+ if (drop_key.objectid != 0 || drop_key.type != 0 ||
+ drop_key.offset != 0) {
+ set_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
+ set_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
+ }
+
set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
btrfs_add_dead_root(root);
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2e9a322773f2..11089568b287 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 generation;
int mirror_num;
struct btrfs_key key;
- u64 increment = map->stripe_len;
+ u64 increment;
u64 offset;
u64 extent_logical;
u64 extent_physical;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d8ccb62aa7d2..7d1642937274 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -528,17 +528,12 @@ out:
static int fs_path_copy(struct fs_path *p, struct fs_path *from)
{
- int ret;
-
p->reversed = from->reversed;
fs_path_reset(p);
- ret = fs_path_add_path(p, from);
-
- return ret;
+ return fs_path_add_path(p, from);
}
-
static void fs_path_unreverse(struct fs_path *p)
{
char *tmp;
@@ -4999,6 +4994,10 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
+ btrfs_err(fs_info,
+ "send: IO error at offset %llu for inode %llu root %llu",
+ page_offset(page), sctx->cur_ino,
+ sctx->send_root->root_key.objectid);
put_page(page);
ret = -EIO;
break;
@@ -7473,10 +7472,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
root->root_key.objectid, root->dedupe_in_progress);
}
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
{
int ret = 0;
- struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
+ struct btrfs_root *send_root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root;
struct send_ctx *sctx = NULL;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 23bcefc84e49..08602fdd600a 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -126,7 +126,7 @@ enum {
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
#endif
#endif
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 294242c194d8..b87931a458eb 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
u64 thresh = div_factor_fine(space_info->total_bytes, 90);
u64 used;
+ lockdep_assert_held(&space_info->lock);
+
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved +
global_rsv_size) >= thresh)
@@ -1061,7 +1063,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
trans_rsv->reserved;
if (block_rsv_size < space_info->bytes_may_use)
delalloc_size = space_info->bytes_may_use - block_rsv_size;
- spin_unlock(&space_info->lock);
/*
* We don't want to include the global_rsv in our calculation,
@@ -1092,6 +1093,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
flush = FLUSH_DELAYED_REFS_NR;
}
+ spin_unlock(&space_info->lock);
+
/*
* We don't want to reclaim everything, just a portion, so scale
* down the to_reclaim by 1/4. If it takes us down to 0,
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 29bd8c7a7706..ef7ae20d2b77 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -736,7 +736,7 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
* Since we own the page lock, no one else could touch subpage::writers
* and we are safe to do several atomic operations without spinlock.
*/
- if (atomic_read(&subpage->writers))
+ if (atomic_read(&subpage->writers) == 0)
/* No writers, locked by plain lock_page() */
return unlock_page(page);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4d947ba32da9..b228efe8ab6e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
+#ifdef CONFIG_PRINTK
+
+#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+
+/*
+ * Characters to print to indicate error conditions or uncommon filesystem sate.
+ * RO is not an error.
+ */
+static const char fs_state_chars[] = {
+ [BTRFS_FS_STATE_ERROR] = 'E',
+ [BTRFS_FS_STATE_REMOUNTING] = 'M',
+ [BTRFS_FS_STATE_RO] = 0,
+ [BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
+ [BTRFS_FS_STATE_DEV_REPLACING] = 'R',
+ [BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
+ [BTRFS_FS_STATE_NO_CSUMS] = 'C',
+ [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
+};
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+ unsigned int bit;
+ bool states_printed = false;
+ unsigned long fs_state = READ_ONCE(info->fs_state);
+ char *curr = buf;
+
+ memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+ curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+ for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
+ WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
+ if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
+ *curr++ = fs_state_chars[bit];
+ states_printed = true;
+ }
+ }
+
+ /* If no states were printed, reset the buffer */
+ if (!states_printed)
+ curr = buf;
+
+ *curr++ = 0;
+}
+#endif
+
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
@@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{
struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK
+ char statestr[STATE_STRING_BUF_LEN];
const char *errstr;
#endif
@@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
#ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno);
+ btrfs_state_to_string(fs_info, statestr);
if (fmt) {
struct va_format vaf;
va_list args;
@@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt;
vaf.va = &args;
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
- sb->s_id, function, line, errno, errstr, &vaf);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+ sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args);
} else {
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
- sb->s_id, function, line, errno, errstr);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+ sb->s_id, statestr, function, line, errno, errstr);
}
#endif
@@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args;
if (__ratelimit(ratelimit)) {
- if (fs_info)
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
- fs_info->sb->s_id, &vaf);
- else
+ if (fs_info) {
+ char statestr[STATE_STRING_BUF_LEN];
+
+ btrfs_state_to_string(fs_info, statestr);
+ printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+ fs_info->sb->s_id, statestr, &vaf);
+ } else {
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+ }
}
va_end(args);
@@ -861,6 +913,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_space_cache:
case Opt_space_cache_version:
+ /*
+ * We already set FREE_SPACE_TREE above because we have
+ * compat_ro(FREE_SPACE_TREE) set, and we aren't going
+ * to allow v1 to be set for extent tree v2, simply
+ * ignore this setting if we're extent tree v2.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
if (token == Opt_space_cache ||
strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(info->mount_opt,
@@ -881,6 +941,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
+ /*
+ * We cannot operate without the free space tree with
+ * extent tree v2, ignore this option.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
if (btrfs_test_opt(info, SPACE_CACHE)) {
btrfs_clear_and_info(info, SPACE_CACHE,
"disabling disk space caching");
@@ -896,6 +962,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
"the 'inode_cache' option is deprecated and has no effect since 5.11");
break;
case Opt_clear_cache:
+ /*
+ * We cannot clear the free space tree with extent tree
+ * v2, ignore this option.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache");
break;
@@ -2383,6 +2455,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
{
struct btrfs_ioctl_vol_args *vol;
struct btrfs_device *device = NULL;
+ dev_t devt = 0;
int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
@@ -2402,7 +2475,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_FORGET_DEV:
- ret = btrfs_forget_devices(vol->name);
+ if (vol->name[0] != 0) {
+ ret = lookup_bdev(vol->name, &devt);
+ if (ret)
+ break;
+ }
+ ret = btrfs_forget_devices(devt);
break;
case BTRFS_IOC_DEVICES_READY:
mutex_lock(&uuid_mutex);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index beb7f72d50b8..17389a42a3ab 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
-/* Remove once support for zoned allocation is feature complete */
#ifdef CONFIG_BTRFS_DEBUG
+/* Remove once support for zoned allocation is feature complete */
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
+/* Remove once support for extent tree v2 is feature complete */
+BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
#endif
#ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
@@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid1c34),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned),
+ BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
#endif
#ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_PTR(verity),
@@ -1104,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
+static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
+ ARRAY_SIZE(btrfs_feature_attrs));
+static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
+ ARRAY_SIZE(btrfs_feature_attrs[0]));
+
static const u64 supported_feature_masks[FEAT_MAX] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
@@ -1272,11 +1280,6 @@ static void init_feature_attrs(void)
struct btrfs_feature_attr *fa;
int set, i;
- BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
- ARRAY_SIZE(btrfs_feature_attrs));
- BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
- ARRAY_SIZE(btrfs_feature_attrs[0]));
-
memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
memset(btrfs_unknown_feature_names, 0,
sizeof(btrfs_unknown_feature_names));
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 319fed82d741..c5b3a631bf4f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
struct extent_map *em;
struct rb_node *node;
+ write_lock(&em_tree->lock);
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
node = rb_first_cached(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
@@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
#endif
free_extent_map(em);
}
+ write_unlock(&em_tree->lock);
}
/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 03de89b45f27..b008c5110958 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -854,7 +854,37 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
static noinline void wait_for_commit(struct btrfs_transaction *commit,
const enum btrfs_trans_state min_state)
{
- wait_event(commit->commit_wait, commit->state >= min_state);
+ struct btrfs_fs_info *fs_info = commit->fs_info;
+ u64 transid = commit->transid;
+ bool put = false;
+
+ while (1) {
+ wait_event(commit->commit_wait, commit->state >= min_state);
+ if (put)
+ btrfs_put_transaction(commit);
+
+ if (min_state < TRANS_STATE_COMPLETED)
+ break;
+
+ /*
+ * A transaction isn't really completed until all of the
+ * previous transactions are completed, but with fsync we can
+ * end up with SUPER_COMMITTED transactions before a COMPLETED
+ * transaction. Wait for those.
+ */
+
+ spin_lock(&fs_info->trans_lock);
+ commit = list_first_entry_or_null(&fs_info->trans_list,
+ struct btrfs_transaction,
+ list);
+ if (!commit || commit->transid > transid) {
+ spin_unlock(&fs_info->trans_lock);
+ break;
+ }
+ refcount_inc(&commit->use_count);
+ put = true;
+ spin_unlock(&fs_info->trans_lock);
+ }
}
int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
@@ -1320,6 +1350,32 @@ again:
}
/*
+ * If we had a pending drop we need to see if there are any others left in our
+ * dead roots list, and if not clear our bit and wake any waiters.
+ */
+void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
+{
+ /*
+ * We put the drop in progress roots at the front of the list, so if the
+ * first entry doesn't have UNFINISHED_DROP set we can wake everybody
+ * up.
+ */
+ spin_lock(&fs_info->trans_lock);
+ if (!list_empty(&fs_info->dead_roots)) {
+ struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root,
+ root_list);
+ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
+ spin_unlock(&fs_info->trans_lock);
+ return;
+ }
+ }
+ spin_unlock(&fs_info->trans_lock);
+
+ btrfs_wake_unfinished_drop(fs_info);
+}
+
+/*
* dead roots are old snapshots that need to be deleted. This allocates
* a dirty root struct and adds it into the list of dead roots that need to
* be deleted
@@ -1331,7 +1387,12 @@ void btrfs_add_dead_root(struct btrfs_root *root)
spin_lock(&fs_info->trans_lock);
if (list_empty(&root->root_list)) {
btrfs_grab_root(root);
- list_add_tail(&root->root_list, &fs_info->dead_roots);
+
+ /* We want to process the partially complete drops first. */
+ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
+ list_add(&root->root_list, &fs_info->dead_roots);
+ else
+ list_add_tail(&root->root_list, &fs_info->dead_roots);
}
spin_unlock(&fs_info->trans_lock);
}
@@ -1850,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->cache_generation = 0;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
super->uuid_tree_generation = root_item->generation;
+
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ root_item = &fs_info->block_group_root->root_item;
+
+ super->block_group_root = root_item->bytenr;
+ super->block_group_root_generation = root_item->generation;
+ super->block_group_root_level = root_item->level;
+ }
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -1981,16 +2050,24 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
/*
- * We use writeback_inodes_sb here because if we used
+ * We use try_to_writeback_inodes_sb() here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
* Currently are holding the fs freeze lock, if we do an async flush
* we'll do btrfs_join_transaction() and deadlock because we need to
* wait for the fs freeze lock. Using the direct flushing we benefit
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
+ *
+ * Note that try_to_writeback_inodes_sb() will only trigger writeback
+ * if it can read lock sb->s_umount. It will always be able to lock it,
+ * except when the filesystem is being unmounted or being frozen, but in
+ * those cases sync_filesystem() is called, which results in calling
+ * writeback_inodes_sb() while holding a write lock on sb->s_umount.
+ * Note that we don't call writeback_inodes_sb() directly, because it
+ * will emit a warning if sb->s_umount is not locked.
*/
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
- writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
return 0;
}
@@ -2000,6 +2077,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
+/*
+ * Add a pending snapshot associated with the given transaction handle to the
+ * respective handle. This must be called after the transaction commit started
+ * and while holding fs_info->trans_lock.
+ * This serves to guarantee a caller of btrfs_commit_transaction() that it can
+ * safely free the pending snapshot pointer in case btrfs_commit_transaction()
+ * returns an error.
+ */
+static void add_pending_snapshot(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_transaction *cur_trans = trans->transaction;
+
+ if (!trans->pending_snapshot)
+ return;
+
+ lockdep_assert_held(&trans->fs_info->trans_lock);
+ ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
+
+ list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
+}
+
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -2073,6 +2171,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
+ add_pending_snapshot(trans);
+
spin_unlock(&fs_info->trans_lock);
refcount_inc(&cur_trans->use_count);
@@ -2163,6 +2263,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* COMMIT_DOING so make sure to wait for num_writers to == 1 again.
*/
spin_lock(&fs_info->trans_lock);
+ add_pending_snapshot(trans);
cur_trans->state = TRANS_STATE_COMMIT_DOING;
spin_unlock(&fs_info->trans_lock);
wait_event(cur_trans->writer_wait,
@@ -2269,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits);
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_set_root_node(&fs_info->block_group_root->root_item,
+ fs_info->block_group_root->node);
+ list_add_tail(&fs_info->block_group_root->dirty_list,
+ &cur_trans->switch_commits);
+ }
+
switch_commit_roots(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
@@ -2397,10 +2505,10 @@ cleanup_transaction:
* because btrfs_commit_super will poke cleaner thread and it will process it a
* few seconds later.
*/
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
{
+ struct btrfs_root *root;
int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&fs_info->trans_lock);
if (list_empty(&fs_info->dead_roots)) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 1852ed9de7fd..970ff316069d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,6 +123,8 @@ struct btrfs_trans_handle {
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
+ /* Set by a task that wants to create a snapshot. */
+ struct btrfs_pending_snapshot *pending_snapshot;
refcount_t use_count;
unsigned int type;
/*
@@ -214,7 +216,8 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
void btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root);
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
+void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 72e1c942197d..e56c0107eea3 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
static int check_block_group_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_block_group_item bgi;
u32 item_size = btrfs_item_size(leaf, slot);
+ u64 chunk_objectid;
u64 flags;
u64 type;
@@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
- if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
- BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
+ chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ /*
+ * We don't init the nr_global_roots until we load the global
+ * roots, so this could be 0 at mount time. If it's 0 we'll
+ * just assume we're fine, and later we'll check against our
+ * actual value.
+ */
+ if (unlikely(fs_info->nr_global_roots &&
+ chunk_objectid >= fs_info->nr_global_roots)) {
+ block_group_err(leaf, slot,
+ "invalid block group global root id, have %llu, needs to be <= %llu",
+ chunk_objectid,
+ fs_info->nr_global_roots);
+ return -EUCLEAN;
+ }
+ } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
block_group_err(leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu",
btrfs_stack_block_group_chunk_objectid(&bgi),
@@ -965,6 +982,7 @@ static int check_dev_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_dev_item *ditem;
+ const u32 item_size = btrfs_item_size(leaf, slot);
if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
dev_item_err(leaf, slot,
@@ -972,6 +990,13 @@ static int check_dev_item(struct extent_buffer *leaf,
key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
return -EUCLEAN;
}
+
+ if (unlikely(item_size != sizeof(*ditem))) {
+ dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
+ item_size, sizeof(*ditem));
+ return -EUCLEAN;
+ }
+
ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
dev_item_err(leaf, slot,
@@ -1007,6 +1032,7 @@ static int check_inode_item(struct extent_buffer *leaf,
struct btrfs_inode_item *iitem;
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
+ const u32 item_size = btrfs_item_size(leaf, slot);
u32 mode;
int ret;
u32 flags;
@@ -1016,6 +1042,12 @@ static int check_inode_item(struct extent_buffer *leaf,
if (unlikely(ret < 0))
return ret;
+ if (unlikely(item_size != sizeof(*iitem))) {
+ generic_err(leaf, slot, "invalid item size: has %u expect %zu",
+ item_size, sizeof(*iitem));
+ return -EUCLEAN;
+ }
+
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
/* Here we use super block generation + 1 to handle log tree */
@@ -1633,7 +1665,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
/* These trees must never be empty */
if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
owner == BTRFS_CHUNK_TREE_OBJECTID ||
- owner == BTRFS_EXTENT_TREE_OBJECTID ||
owner == BTRFS_DEV_TREE_OBJECTID ||
owner == BTRFS_FS_TREE_OBJECTID ||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
@@ -1642,12 +1673,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
owner);
return -EUCLEAN;
}
+
/* Unknown tree */
if (unlikely(owner == 0)) {
generic_err(leaf, 0,
"invalid owner, root 0 is not defined");
return -EUCLEAN;
}
+
+ /* EXTENT_TREE_V2 can have empty extent trees. */
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+ return 0;
+
+ if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
+ generic_err(leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+
return 0;
}
@@ -1667,6 +1711,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
*/
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
+ u64 item_data_end;
int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@@ -1681,6 +1726,8 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
return -EUCLEAN;
}
+ item_data_end = (u64)btrfs_item_offset(leaf, slot) +
+ btrfs_item_size(leaf, slot);
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
@@ -1691,11 +1738,10 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
else
item_end_expected = btrfs_item_offset(leaf,
slot - 1);
- if (unlikely(btrfs_item_data_end(leaf, slot) != item_end_expected)) {
+ if (unlikely(item_data_end != item_end_expected)) {
generic_err(leaf, slot,
- "unexpected item end, have %u expect %u",
- btrfs_item_data_end(leaf, slot),
- item_end_expected);
+ "unexpected item end, have %llu expect %u",
+ item_data_end, item_end_expected);
return -EUCLEAN;
}
@@ -1704,12 +1750,10 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
- if (unlikely(btrfs_item_data_end(leaf, slot) >
- BTRFS_LEAF_DATA_SIZE(fs_info))) {
+ if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) {
generic_err(leaf, slot,
- "slot end outside of leaf, have %u expect range [0, %u]",
- btrfs_item_data_end(leaf, slot),
- BTRFS_LEAF_DATA_SIZE(fs_info));
+ "slot end outside of leaf, have %llu expect range [0, %u]",
+ item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
return -EUCLEAN;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c1ddbe800897..571dae8ad65e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -270,12 +270,6 @@ void btrfs_end_log_trans(struct btrfs_root *root)
}
}
-static int btrfs_write_tree_block(struct extent_buffer *buf)
-{
- return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
- buf->start + buf->len - 1);
-}
-
static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
{
filemap_fdatawait_range(buf->pages[0]->mapping,
@@ -294,16 +288,6 @@ struct walk_control {
*/
int free;
- /* should we write out the extent buffer? This is used
- * while flushing the log tree to disk during a sync
- */
- int write;
-
- /* should we wait for the extent buffer io to finish? Also used
- * while flushing the log tree to disk for a sync
- */
- int wait;
-
/* pin only walk, we record which extents on disk belong to the
* log trees
*/
@@ -354,17 +338,15 @@ static int process_one_buffer(struct btrfs_root *log,
return ret;
}
- if (wc->pin)
+ if (wc->pin) {
ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
eb->len);
+ if (ret)
+ return ret;
- if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
- if (wc->pin && btrfs_header_level(eb) == 0)
+ if (btrfs_buffer_uptodate(eb, gen, 0) &&
+ btrfs_header_level(eb) == 0)
ret = btrfs_exclude_logged_extents(eb);
- if (wc->write)
- btrfs_write_tree_block(eb);
- if (wc->wait)
- btrfs_wait_tree_block_writeback(eb);
}
return ret;
}
@@ -917,6 +899,26 @@ out:
return ret;
}
+static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
+ const char *name,
+ int name_len)
+{
+ int ret;
+
+ ret = btrfs_unlink_inode(trans, dir, inode, name, name_len);
+ if (ret)
+ return ret;
+ /*
+ * Whenever we need to check if a name exists or not, we check the
+ * fs/subvolume tree. So after an unlink we must run delayed items, so
+ * that future checks for a name during log replay see that the name
+ * does not exists anymore.
+ */
+ return btrfs_run_delayed_items(trans);
+}
+
/*
* when cleaning up conflicts between the directory names in the
* subvolume, directory names in the log and directory names in the
@@ -959,12 +961,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_unlink_inode(trans, dir, BTRFS_I(inode), name,
+ ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name,
name_len);
- if (ret)
- goto out;
- else
- ret = btrfs_run_delayed_items(trans);
out:
kfree(name);
iput(inode);
@@ -1124,14 +1122,11 @@ again:
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans, dir, inode,
+ ret = unlink_inode_for_log_replay(trans, dir, inode,
victim_name, victim_name_len);
kfree(victim_name);
if (ret)
return ret;
- ret = btrfs_run_delayed_items(trans);
- if (ret)
- return ret;
*search_done = 1;
goto again;
}
@@ -1196,14 +1191,11 @@ again:
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans,
+ ret = unlink_inode_for_log_replay(trans,
BTRFS_I(victim_parent),
inode,
victim_name,
victim_name_len);
- if (!ret)
- ret = btrfs_run_delayed_items(
- trans);
}
iput(victim_parent);
kfree(victim_name);
@@ -1358,7 +1350,7 @@ again:
kfree(name);
goto out;
}
- ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
+ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
inode, name, namelen);
kfree(name);
iput(dir);
@@ -1457,8 +1449,8 @@ static int add_link(struct btrfs_trans_handle *trans,
ret = -ENOENT;
goto out;
}
- ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(other_inode),
- name, namelen);
+ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(other_inode),
+ name, namelen);
if (ret)
goto out;
/*
@@ -1467,10 +1459,6 @@ static int add_link(struct btrfs_trans_handle *trans,
*/
if (other_inode->i_nlink == 0)
inc_nlink(other_inode);
-
- ret = btrfs_run_delayed_items(trans);
- if (ret)
- goto out;
add_link:
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
name, namelen, 0, ref_index);
@@ -1603,7 +1591,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = btrfs_inode_ref_exists(inode, dir, key->type,
name, namelen);
if (ret > 0) {
- ret = btrfs_unlink_inode(trans,
+ ret = unlink_inode_for_log_replay(trans,
BTRFS_I(dir),
BTRFS_I(inode),
name, namelen);
@@ -2350,15 +2338,8 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
goto out;
inc_nlink(inode);
- ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(inode), name,
- name_len);
- if (ret)
- goto out;
-
- ret = btrfs_run_delayed_items(trans);
- if (ret)
- goto out;
-
+ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode),
+ name, name_len);
/*
* Unlike dir item keys, dir index keys can only have one name (entry) in
* them, as there are no key collisions since each key has a unique offset
@@ -3414,6 +3395,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
if (log->node) {
ret = walk_log_tree(trans, log, &wc);
if (ret) {
+ /*
+ * We weren't able to traverse the entire log tree, the
+ * typical scenario is getting an -EIO when reading an
+ * extent buffer of the tree, due to a previous writeback
+ * failure of it.
+ */
+ set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+ &log->fs_info->fs_state);
+
+ /*
+ * Some extent buffers of the log tree may still be dirty
+ * and not yet written back to storage, because we may
+ * have updates to a log tree without syncing a log tree,
+ * such as during rename and link operations. So flush
+ * them out and wait for their writeback to complete, so
+ * that we properly cleanup their state and pages.
+ */
+ btrfs_write_marked_extents(log->fs_info,
+ &log->dirty_log_pages,
+ EXTENT_DIRTY | EXTENT_NEW);
+ btrfs_wait_tree_log_extents(log,
+ EXTENT_DIRTY | EXTENT_NEW);
+
if (trans)
btrfs_abort_transaction(trans, ret);
else
@@ -3454,35 +3458,156 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
}
/*
- * Check if an inode was logged in the current transaction. This may often
- * return some false positives, because logged_trans is an in memory only field,
- * not persisted anywhere. This is meant to be used in contexts where a false
- * positive has no functional consequences.
+ * Check if an inode was logged in the current transaction. This correctly deals
+ * with the case where the inode was logged but has a logged_trans of 0, which
+ * happens if the inode is evicted and loaded again, as logged_trans is an in
+ * memory only field (not persisted).
+ *
+ * Returns 1 if the inode was logged before in the transaction, 0 if it was not,
+ * and < 0 on error.
*/
-static bool inode_logged(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
+static int inode_logged(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path_in)
{
+ struct btrfs_path *path = path_in;
+ struct btrfs_key key;
+ int ret;
+
if (inode->logged_trans == trans->transid)
- return true;
+ return 1;
- if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
- return false;
+ /*
+ * If logged_trans is not 0, then we know the inode logged was not logged
+ * in this transaction, so we can return false right away.
+ */
+ if (inode->logged_trans > 0)
+ return 0;
/*
- * The inode's logged_trans is always 0 when we load it (because it is
- * not persisted in the inode item or elsewhere). So if it is 0, the
- * inode was last modified in the current transaction then the inode may
- * have been logged before in the current transaction, then evicted and
- * loaded again in the current transaction - or may have never been logged
- * in the current transaction, but since we can not be sure, we have to
- * assume it was, otherwise our callers can leave an inconsistent log.
+ * If no log tree was created for this root in this transaction, then
+ * the inode can not have been logged in this transaction. In that case
+ * set logged_trans to anything greater than 0 and less than the current
+ * transaction's ID, to avoid the search below in a future call in case
+ * a log tree gets created after this.
*/
- if (inode->logged_trans == 0 &&
- inode->last_trans == trans->transid &&
- !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
- return true;
+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) {
+ inode->logged_trans = trans->transid - 1;
+ return 0;
+ }
- return false;
+ /*
+ * We have a log tree and the inode's logged_trans is 0. We can't tell
+ * for sure if the inode was logged before in this transaction by looking
+ * only at logged_trans. We could be pessimistic and assume it was, but
+ * that can lead to unnecessarily logging an inode during rename and link
+ * operations, and then further updating the log in followup rename and
+ * link operations, specially if it's a directory, which adds latency
+ * visible to applications doing a series of rename or link operations.
+ *
+ * A logged_trans of 0 here can mean several things:
+ *
+ * 1) The inode was never logged since the filesystem was mounted, and may
+ * or may have not been evicted and loaded again;
+ *
+ * 2) The inode was logged in a previous transaction, then evicted and
+ * then loaded again;
+ *
+ * 3) The inode was logged in the current transaction, then evicted and
+ * then loaded again.
+ *
+ * For cases 1) and 2) we don't want to return true, but we need to detect
+ * case 3) and return true. So we do a search in the log root for the inode
+ * item.
+ */
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ }
+
+ ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0);
+
+ if (path_in)
+ btrfs_release_path(path);
+ else
+ btrfs_free_path(path);
+
+ /*
+ * Logging an inode always results in logging its inode item. So if we
+ * did not find the item we know the inode was not logged for sure.
+ */
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ /*
+ * Set logged_trans to a value greater than 0 and less then the
+ * current transaction to avoid doing the search in future calls.
+ */
+ inode->logged_trans = trans->transid - 1;
+ return 0;
+ }
+
+ /*
+ * The inode was previously logged and then evicted, set logged_trans to
+ * the current transacion's ID, to avoid future tree searches as long as
+ * the inode is not evicted again.
+ */
+ inode->logged_trans = trans->transid;
+
+ /*
+ * If it's a directory, then we must set last_dir_index_offset to the
+ * maximum possible value, so that the next attempt to log the inode does
+ * not skip checking if dir index keys found in modified subvolume tree
+ * leaves have been logged before, otherwise it would result in attempts
+ * to insert duplicate dir index keys in the log tree. This must be done
+ * because last_dir_index_offset is an in-memory only field, not persisted
+ * in the inode item or any other on-disk structure, so its value is lost
+ * once the inode is evicted.
+ */
+ if (S_ISDIR(inode->vfs_inode.i_mode))
+ inode->last_dir_index_offset = (u64)-1;
+
+ return 1;
+}
+
+/*
+ * Delete a directory entry from the log if it exists.
+ *
+ * Returns < 0 on error
+ * 1 if the entry does not exists
+ * 0 if the entry existed and was successfully deleted
+ */
+static int del_logged_dentry(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log,
+ struct btrfs_path *path,
+ u64 dir_ino,
+ const char *name, int name_len,
+ u64 index)
+{
+ struct btrfs_dir_item *di;
+
+ /*
+ * We only log dir index items of a directory, so we don't need to look
+ * for dir item keys.
+ */
+ di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
+ index, name, name_len, -1);
+ if (IS_ERR(di))
+ return PTR_ERR(di);
+ else if (!di)
+ return 1;
+
+ /*
+ * We do not need to update the size field of the directory's
+ * inode item because on log replay we update the field to reflect
+ * all existing entries in the directory (see overwrite_item()).
+ */
+ return btrfs_delete_one_dir_name(trans, log, path, di);
}
/*
@@ -3511,15 +3636,16 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir, u64 index)
{
- struct btrfs_root *log;
- struct btrfs_dir_item *di;
struct btrfs_path *path;
int ret;
- int err = 0;
- u64 dir_ino = btrfs_ino(dir);
- if (!inode_logged(trans, dir))
+ ret = inode_logged(trans, dir, NULL);
+ if (ret == 0)
return;
+ else if (ret < 0) {
+ btrfs_set_log_full_commit(trans);
+ return;
+ }
ret = join_running_log_trans(root);
if (ret)
@@ -3527,41 +3653,18 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
mutex_lock(&dir->log_mutex);
- log = root->log_root;
path = btrfs_alloc_path();
if (!path) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto out_unlock;
}
- /*
- * We only log dir index items of a directory, so we don't need to look
- * for dir item keys.
- */
- di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
- index, name, name_len, -1);
- if (IS_ERR(di)) {
- err = PTR_ERR(di);
- goto fail;
- }
- if (di) {
- ret = btrfs_delete_one_dir_name(trans, log, path, di);
- if (ret) {
- err = ret;
- goto fail;
- }
- }
-
- /*
- * We do not need to update the size field of the directory's inode item
- * because on log replay we update the field to reflect all existing
- * entries in the directory (see overwrite_item()).
- */
-fail:
+ ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir),
+ name, name_len, index);
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
- if (err < 0)
+ if (ret < 0)
btrfs_set_log_full_commit(trans);
btrfs_end_log_trans(root);
}
@@ -3576,8 +3679,13 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
u64 index;
int ret;
- if (!inode_logged(trans, inode))
+ ret = inode_logged(trans, inode, NULL);
+ if (ret == 0)
return;
+ else if (ret < 0) {
+ btrfs_set_log_full_commit(trans);
+ return;
+ }
ret = join_running_log_trans(root);
if (ret)
@@ -3702,19 +3810,20 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path,
- struct btrfs_log_ctx *ctx)
+ struct btrfs_log_ctx *ctx,
+ u64 *last_old_dentry_offset)
{
struct btrfs_root *log = inode->root->log_root;
struct extent_buffer *src = path->nodes[0];
const int nritems = btrfs_header_nritems(src);
const u64 ino = btrfs_ino(inode);
- const bool inode_logged_before = inode_logged(trans, inode);
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
int i;
for (i = path->slots[0]; i < nritems; i++) {
+ struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -3725,7 +3834,34 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
break;
}
+ di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
ctx->last_dir_item_offset = key.offset;
+
+ /*
+ * Skip ranges of items that consist only of dir item keys created
+ * in past transactions. However if we find a gap, we must log a
+ * dir index range item for that gap, so that index keys in that
+ * gap are deleted during log replay.
+ */
+ if (btrfs_dir_transid(src, di) < trans->transid) {
+ if (key.offset > *last_old_dentry_offset + 1) {
+ ret = insert_dir_log_key(trans, log, dst_path,
+ ino, *last_old_dentry_offset + 1,
+ key.offset - 1);
+ /*
+ * -EEXIST should never happen because when we
+ * log a directory in full mode (LOG_INODE_ALL)
+ * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from
+ * the log tree.
+ */
+ ASSERT(ret != -EEXIST);
+ if (ret < 0)
+ return ret;
+ }
+
+ *last_old_dentry_offset = key.offset;
+ continue;
+ }
/*
* We must make sure that when we log a directory entry, the
* corresponding inode, after log replay, has a matching link
@@ -3749,25 +3885,23 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
* resulting in -ENOTEMPTY errors.
*/
if (!ctx->log_new_dentries) {
- struct btrfs_dir_item *di;
struct btrfs_key di_key;
- di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(src, di, &di_key);
- if ((btrfs_dir_transid(src, di) == trans->transid ||
- btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
- di_key.type != BTRFS_ROOT_ITEM_KEY)
+ if (di_key.type != BTRFS_ROOT_ITEM_KEY)
ctx->log_new_dentries = true;
}
- if (!inode_logged_before)
+ if (!ctx->logged_before)
goto add_to_batch;
/*
* If we were logged before and have logged dir items, we can skip
* checking if any item with a key offset larger than the last one
* we logged is in the log tree, saving time and avoiding adding
- * contention on the log tree.
+ * contention on the log tree. We can only rely on the value of
+ * last_dir_index_offset when we know for sure that the inode was
+ * previously logged in the current transaction.
*/
if (key.offset > inode->last_dir_index_offset)
goto add_to_batch;
@@ -3837,7 +3971,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
int err = 0;
int ret;
- u64 first_offset = min_offset;
+ u64 last_old_dentry_offset = min_offset - 1;
u64 last_offset = (u64)-1;
u64 ino = btrfs_ino(inode);
@@ -3871,10 +4005,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
*/
if (ret == 0) {
struct btrfs_key tmp;
+
btrfs_item_key_to_cpu(path->nodes[0], &tmp,
path->slots[0]);
if (tmp.type == BTRFS_DIR_INDEX_KEY)
- first_offset = max(min_offset, tmp.offset) + 1;
+ last_old_dentry_offset = tmp.offset;
}
goto done;
}
@@ -3883,17 +4018,18 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
ret = btrfs_previous_item(root, path, ino, BTRFS_DIR_INDEX_KEY);
if (ret == 0) {
struct btrfs_key tmp;
+
btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
- if (tmp.type == BTRFS_DIR_INDEX_KEY) {
- first_offset = tmp.offset;
- ret = overwrite_item(trans, log, dst_path,
- path->nodes[0], path->slots[0],
- &tmp);
- if (ret) {
- err = ret;
- goto done;
- }
- }
+ /*
+ * The dir index key before the first one we found that needs to
+ * be logged might be in a previous leaf, and there might be a
+ * gap between these keys, meaning that we had deletions that
+ * happened. So the key range item we log (key type
+ * BTRFS_DIR_LOG_INDEX_KEY) must cover a range that starts at the
+ * previous key's offset plus 1, so that those deletes are replayed.
+ */
+ if (tmp.type == BTRFS_DIR_INDEX_KEY)
+ last_old_dentry_offset = tmp.offset;
}
btrfs_release_path(path);
@@ -3915,7 +4051,8 @@ search:
* from our directory
*/
while (1) {
- ret = process_dir_items_leaf(trans, inode, path, dst_path, ctx);
+ ret = process_dir_items_leaf(trans, inode, path, dst_path, ctx,
+ &last_old_dentry_offset);
if (ret != 0) {
if (ret < 0)
err = ret;
@@ -3941,14 +4078,16 @@ search:
goto done;
}
if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
- ctx->last_dir_item_offset = min_key.offset;
- ret = overwrite_item(trans, log, dst_path,
- path->nodes[0], path->slots[0],
- &min_key);
- if (ret)
- err = ret;
- else
- last_offset = min_key.offset;
+ /*
+ * The next leaf was not changed in the current transaction
+ * and has at least one dir index key.
+ * We check for the next key because there might have been
+ * one or more deletions between the last key we logged and
+ * that next key. So the key range item we log (key type
+ * BTRFS_DIR_LOG_INDEX_KEY) must end at the next key's
+ * offset minus 1, so that those deletes are replayed.
+ */
+ last_offset = min_key.offset - 1;
goto done;
}
if (need_resched()) {
@@ -3964,13 +4103,21 @@ done:
if (err == 0) {
*last_offset_ret = last_offset;
/*
- * insert the log range keys to indicate where the log
- * is valid
+ * In case the leaf was changed in the current transaction but
+ * all its dir items are from a past transaction, the last item
+ * in the leaf is a dir item and there's no gap between that last
+ * dir item and the first one on the next leaf (which did not
+ * change in the current transaction), then we don't need to log
+ * a range, last_old_dentry_offset is == to last_offset.
*/
- ret = insert_dir_log_key(trans, log, path, ino, first_offset,
- last_offset);
- if (ret)
- err = ret;
+ ASSERT(last_old_dentry_offset <= last_offset);
+ if (last_old_dentry_offset < last_offset) {
+ ret = insert_dir_log_key(trans, log, path, ino,
+ last_old_dentry_offset + 1,
+ last_offset);
+ if (ret)
+ err = ret;
+ }
}
return err;
}
@@ -3997,22 +4144,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
u64 max_key;
int ret;
- /*
- * If this is the first time we are being logged in the current
- * transaction, or we were logged before but the inode was evicted and
- * reloaded later, in which case its logged_trans is 0, reset the value
- * of the last logged key offset. Note that we don't use the helper
- * function inode_logged() here - that is because the function returns
- * true after an inode eviction, assuming the worst case as it can not
- * know for sure if the inode was logged before. So we can not skip key
- * searches in the case the inode was evicted, because it may not have
- * been logged in this transaction and may have been logged in a past
- * transaction, so we need to reset the last dir index offset to (u64)-1.
- */
- if (inode->logged_trans != trans->transid)
- inode->last_dir_index_offset = (u64)-1;
-
- min_key = 0;
+ min_key = BTRFS_DIR_START_INDEX;
max_key = 0;
ctx->last_dir_item_offset = inode->last_dir_index_offset;
@@ -4048,9 +4180,6 @@ static int drop_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_key found_key;
int start_slot;
- if (!inode_logged(trans, inode))
- return 0;
-
key.objectid = btrfs_ino(inode);
key.type = max_key_type;
key.offset = (u64)-1;
@@ -4270,23 +4399,18 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- unsigned long src_offset;
- unsigned long dst_offset;
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
- struct btrfs_inode_item *inode_item;
struct extent_buffer *src = src_path->nodes[0];
- int ret;
+ int ret = 0;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
struct btrfs_item_batch batch;
char *ins_data;
int i;
- struct list_head ordered_sums;
- int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
-
- INIT_LIST_HEAD(&ordered_sums);
+ int dst_index;
+ const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
+ const u64 i_size = i_size_read(&inode->vfs_inode);
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
@@ -4298,28 +4422,152 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
batch.keys = ins_keys;
batch.data_sizes = ins_sizes;
batch.total_data_size = 0;
- batch.nr = nr;
+ batch.nr = 0;
+ dst_index = 0;
for (i = 0; i < nr; i++) {
- ins_sizes[i] = btrfs_item_size(src, i + start_slot);
- batch.total_data_size += ins_sizes[i];
- btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
+ const int src_slot = start_slot + i;
+ struct btrfs_root *csum_root;
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_ordered_sum *sums_next;
+ LIST_HEAD(ordered_sums);
+ u64 disk_bytenr;
+ u64 disk_num_bytes;
+ u64 extent_offset;
+ u64 extent_num_bytes;
+ bool is_old_extent;
+
+ btrfs_item_key_to_cpu(src, &ins_keys[dst_index], src_slot);
+
+ if (ins_keys[dst_index].type != BTRFS_EXTENT_DATA_KEY)
+ goto add_to_batch;
+
+ extent = btrfs_item_ptr(src, src_slot,
+ struct btrfs_file_extent_item);
+
+ is_old_extent = (btrfs_file_extent_generation(src, extent) <
+ trans->transid);
+
+ /*
+ * Don't copy extents from past generations. That would make us
+ * log a lot more metadata for common cases like doing only a
+ * few random writes into a file and then fsync it for the first
+ * time or after the full sync flag is set on the inode. We can
+ * get leaves full of extent items, most of which are from past
+ * generations, so we can skip them - as long as the inode has
+ * not been the target of a reflink operation in this transaction,
+ * as in that case it might have had file extent items with old
+ * generations copied into it. We also must always log prealloc
+ * extents that start at or beyond eof, otherwise we would lose
+ * them on log replay.
+ */
+ if (is_old_extent &&
+ ins_keys[dst_index].offset < i_size &&
+ inode->last_reflink_trans < trans->transid)
+ continue;
+
+ if (skip_csum)
+ goto add_to_batch;
+
+ /* Only regular extents have checksums. */
+ if (btrfs_file_extent_type(src, extent) != BTRFS_FILE_EXTENT_REG)
+ goto add_to_batch;
+
+ /*
+ * If it's an extent created in a past transaction, then its
+ * checksums are already accessible from the committed csum tree,
+ * no need to log them.
+ */
+ if (is_old_extent)
+ goto add_to_batch;
+
+ disk_bytenr = btrfs_file_extent_disk_bytenr(src, extent);
+ /* If it's an explicit hole, there are no checksums. */
+ if (disk_bytenr == 0)
+ goto add_to_batch;
+
+ disk_num_bytes = btrfs_file_extent_disk_num_bytes(src, extent);
+
+ if (btrfs_file_extent_compression(src, extent)) {
+ extent_offset = 0;
+ extent_num_bytes = disk_num_bytes;
+ } else {
+ extent_offset = btrfs_file_extent_offset(src, extent);
+ extent_num_bytes = btrfs_file_extent_num_bytes(src, extent);
+ }
+
+ csum_root = btrfs_csum_root(trans->fs_info, disk_bytenr);
+ disk_bytenr += extent_offset;
+ ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
+ disk_bytenr + extent_num_bytes - 1,
+ &ordered_sums, 0);
+ if (ret)
+ goto out;
+
+ list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) {
+ if (!ret)
+ ret = log_csums(trans, inode, log, sums);
+ list_del(&sums->list);
+ kfree(sums);
+ }
+ if (ret)
+ goto out;
+
+add_to_batch:
+ ins_sizes[dst_index] = btrfs_item_size(src, src_slot);
+ batch.total_data_size += ins_sizes[dst_index];
+ batch.nr++;
+ dst_index++;
}
+
+ /*
+ * We have a leaf full of old extent items that don't need to be logged,
+ * so we don't need to do anything.
+ */
+ if (batch.nr == 0)
+ goto out;
+
ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
- if (ret) {
- kfree(ins_data);
- return ret;
- }
+ if (ret)
+ goto out;
+
+ dst_index = 0;
+ for (i = 0; i < nr; i++) {
+ const int src_slot = start_slot + i;
+ const int dst_slot = dst_path->slots[0] + dst_index;
+ struct btrfs_key key;
+ unsigned long src_offset;
+ unsigned long dst_offset;
+
+ /*
+ * We're done, all the remaining items in the source leaf
+ * correspond to old file extent items.
+ */
+ if (dst_index >= batch.nr)
+ break;
+
+ btrfs_item_key_to_cpu(src, &key, src_slot);
- for (i = 0; i < nr; i++, dst_path->slots[0]++) {
- dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
- dst_path->slots[0]);
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ goto copy_item;
- src_offset = btrfs_item_ptr_offset(src, start_slot + i);
+ extent = btrfs_item_ptr(src, src_slot,
+ struct btrfs_file_extent_item);
- if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
- inode_item = btrfs_item_ptr(dst_path->nodes[0],
- dst_path->slots[0],
+ /* See the comment in the previous loop, same logic. */
+ if (btrfs_file_extent_generation(src, extent) < trans->transid &&
+ key.offset < i_size &&
+ inode->last_reflink_trans < trans->transid)
+ continue;
+
+copy_item:
+ dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], dst_slot);
+ src_offset = btrfs_item_ptr_offset(src, src_slot);
+
+ if (key.type == BTRFS_INODE_ITEM_KEY) {
+ struct btrfs_inode_item *inode_item;
+
+ inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_slot,
struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item,
&inode->vfs_inode,
@@ -4327,71 +4575,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
logged_isize);
} else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
- src_offset, ins_sizes[i]);
+ src_offset, ins_sizes[dst_index]);
}
- /* take a reference on file data extents so that truncates
- * or deletes of this inode don't have to relog the inode
- * again
- */
- if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY &&
- !skip_csum) {
- int found_type;
- extent = btrfs_item_ptr(src, start_slot + i,
- struct btrfs_file_extent_item);
-
- if (btrfs_file_extent_generation(src, extent) < trans->transid)
- continue;
-
- found_type = btrfs_file_extent_type(src, extent);
- if (found_type == BTRFS_FILE_EXTENT_REG) {
- struct btrfs_root *csum_root;
- u64 ds, dl, cs, cl;
- ds = btrfs_file_extent_disk_bytenr(src,
- extent);
- /* ds == 0 is a hole */
- if (ds == 0)
- continue;
-
- dl = btrfs_file_extent_disk_num_bytes(src,
- extent);
- cs = btrfs_file_extent_offset(src, extent);
- cl = btrfs_file_extent_num_bytes(src,
- extent);
- if (btrfs_file_extent_compression(src,
- extent)) {
- cs = 0;
- cl = dl;
- }
-
- csum_root = btrfs_csum_root(fs_info, ds);
- ret = btrfs_lookup_csums_range(csum_root,
- ds + cs, ds + cs + cl - 1,
- &ordered_sums, 0);
- if (ret)
- break;
- }
- }
+ dst_index++;
}
btrfs_mark_buffer_dirty(dst_path->nodes[0]);
btrfs_release_path(dst_path);
+out:
kfree(ins_data);
- /*
- * we have to do this after the loop above to avoid changing the
- * log tree while trying to change the log tree.
- */
- while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
- if (!ret)
- ret = log_csums(trans, inode, log, sums);
- list_del(&sums->list);
- kfree(sums);
- }
-
return ret;
}
@@ -4527,14 +4721,34 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *log = inode->root->log_root;
- struct btrfs_file_extent_item *fi;
+ struct btrfs_file_extent_item fi = { 0 };
struct extent_buffer *leaf;
- struct btrfs_map_token token;
struct btrfs_key key;
u64 extent_offset = em->start - em->orig_start;
u64 block_len;
int ret;
+ btrfs_set_stack_file_extent_generation(&fi, trans->transid);
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_PREALLOC);
+ else
+ btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
+
+ block_len = max(em->block_len, em->orig_block_len);
+ if (em->compress_type != BTRFS_COMPRESS_NONE) {
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start);
+ btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
+ } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start -
+ extent_offset);
+ btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
+ }
+
+ btrfs_set_stack_file_extent_offset(&fi, extent_offset);
+ btrfs_set_stack_file_extent_num_bytes(&fi, em->len);
+ btrfs_set_stack_file_extent_ram_bytes(&fi, em->ram_bytes);
+ btrfs_set_stack_file_extent_compression(&fi, em->compress_type);
+
ret = log_extent_csums(trans, inode, log, em, ctx);
if (ret)
return ret;
@@ -4548,12 +4762,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
* are small, with a root at level 2 or 3 at most, due to their short
* life span.
*/
- if (inode_logged(trans, inode)) {
+ if (ctx->logged_before) {
drop_args.path = path;
drop_args.start = em->start;
drop_args.end = em->start + em->len;
drop_args.replace_extent = true;
- drop_args.extent_item_size = sizeof(*fi);
+ drop_args.extent_item_size = sizeof(fi);
ret = btrfs_drop_extents(trans, log, inode, &drop_args);
if (ret)
return ret;
@@ -4565,44 +4779,14 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
key.offset = em->start;
ret = btrfs_insert_empty_item(trans, log, path, &key,
- sizeof(*fi));
+ sizeof(fi));
if (ret)
return ret;
}
leaf = path->nodes[0];
- btrfs_init_map_token(&token, leaf);
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- btrfs_set_token_file_extent_type(&token, fi,
- BTRFS_FILE_EXTENT_PREALLOC);
- else
- btrfs_set_token_file_extent_type(&token, fi,
- BTRFS_FILE_EXTENT_REG);
-
- block_len = max(em->block_len, em->orig_block_len);
- if (em->compress_type != BTRFS_COMPRESS_NONE) {
- btrfs_set_token_file_extent_disk_bytenr(&token, fi,
- em->block_start);
- btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
- } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
- btrfs_set_token_file_extent_disk_bytenr(&token, fi,
- em->block_start -
- extent_offset);
- btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
- } else {
- btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
- btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
- }
-
- btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
- btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
- btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
- btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
- btrfs_set_token_file_extent_encryption(&token, fi, 0);
- btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
+ write_extent_buffer(leaf, &fi,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(fi));
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -4612,7 +4796,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
/*
* Log all prealloc extents beyond the inode's i_size to make sure we do not
- * lose them after doing a fast fsync and replaying the log. We scan the
+ * lose them after doing a full/fast fsync and replaying the log. We scan the
* subvolume's root instead of iterating the inode's extent map tree because
* otherwise we can log incorrect extent items based on extent map conversion.
* That can happen due to the fact that extent maps are merged when they
@@ -4816,7 +5000,6 @@ process:
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
- btrfs_release_path(path);
if (!ret)
ret = btrfs_log_prealloc_extents(trans, inode, path);
if (ret)
@@ -5391,6 +5574,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx,
bool *need_log_inode_item)
{
+ const u64 i_size = i_size_read(&inode->vfs_inode);
struct btrfs_root *root = inode->root;
int ins_start_slot = 0;
int ins_nr = 0;
@@ -5411,13 +5595,21 @@ again:
if (min_key->type > max_key->type)
break;
- if (min_key->type == BTRFS_INODE_ITEM_KEY)
+ if (min_key->type == BTRFS_INODE_ITEM_KEY) {
*need_log_inode_item = false;
-
- if ((min_key->type == BTRFS_INODE_REF_KEY ||
- min_key->type == BTRFS_INODE_EXTREF_KEY) &&
- inode->generation == trans->transid &&
- !recursive_logging) {
+ } else if (min_key->type == BTRFS_EXTENT_DATA_KEY &&
+ min_key->offset >= i_size) {
+ /*
+ * Extents at and beyond eof are logged with
+ * btrfs_log_prealloc_extents().
+ * Only regular files have BTRFS_EXTENT_DATA_KEY keys,
+ * and no keys greater than that, so bail out.
+ */
+ break;
+ } else if ((min_key->type == BTRFS_INODE_REF_KEY ||
+ min_key->type == BTRFS_INODE_EXTREF_KEY) &&
+ inode->generation == trans->transid &&
+ !recursive_logging) {
u64 other_ino = 0;
u64 other_parent = 0;
@@ -5448,10 +5640,8 @@ again:
btrfs_release_path(path);
goto next_key;
}
- }
-
- /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
- if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
+ } else if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
+ /* Skip xattrs, logged later with btrfs_log_all_xattrs() */
if (ins_nr == 0)
goto next_slot;
ret = copy_items(trans, inode, dst_path, path,
@@ -5503,10 +5693,29 @@ next_key:
} else {
break;
}
+
+ /*
+ * We may process many leaves full of items for our inode, so
+ * avoid monopolizing a cpu for too long by rescheduling while
+ * not holding locks on any tree.
+ */
+ cond_resched();
}
- if (ins_nr)
+ if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
ins_nr, inode_only, logged_isize);
+ if (ret)
+ return ret;
+ }
+
+ if (inode_only == LOG_INODE_ALL && S_ISREG(inode->vfs_inode.i_mode)) {
+ /*
+ * Release the path because otherwise we might attempt to double
+ * lock the same leaf with btrfs_log_prealloc_extents() below.
+ */
+ btrfs_release_path(path);
+ ret = btrfs_log_prealloc_extents(trans, inode, dst_path);
+ }
return ret;
}
@@ -5535,8 +5744,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = inode->root->log_root;
- int err = 0;
- int ret = 0;
+ int ret;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &inode->extent_tree;
@@ -5545,6 +5753,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool xattrs_logged = false;
bool recursive_logging = false;
bool inode_item_dropped = true;
+ const bool orig_logged_before = ctx->logged_before;
path = btrfs_alloc_path();
if (!path)
@@ -5578,8 +5787,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* and figure out which index ranges have to be logged.
*/
if (S_ISDIR(inode->vfs_inode.i_mode)) {
- err = btrfs_commit_inode_delayed_items(trans, inode);
- if (err)
+ ret = btrfs_commit_inode_delayed_items(trans, inode);
+ if (ret)
goto out;
}
@@ -5595,6 +5804,17 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
/*
+ * Before logging the inode item, cache the value returned by
+ * inode_logged(), because after that we have the need to figure out if
+ * the inode was previously logged in this transaction.
+ */
+ ret = inode_logged(trans, inode, path);
+ if (ret < 0)
+ goto out_unlock;
+ ctx->logged_before = (ret == 1);
+ ret = 0;
+
+ /*
* This is for cases where logging a directory could result in losing a
* a file after replaying the log. For example, if we move a file from a
* directory A to a directory B, then fsync directory A, we have no way
@@ -5605,7 +5825,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
inode_only == LOG_INODE_ALL &&
inode->last_unlink_trans >= trans->transid) {
btrfs_set_log_full_commit(trans);
- err = 1;
+ ret = 1;
goto out_unlock;
}
@@ -5619,9 +5839,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags);
if (inode_only == LOG_INODE_EXISTS)
max_key_type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_inode_items(trans, log, path, inode, max_key_type);
+ if (ctx->logged_before)
+ ret = drop_inode_items(trans, log, path, inode,
+ max_key_type);
} else {
- if (inode_only == LOG_INODE_EXISTS && inode_logged(trans, inode)) {
+ if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) {
/*
* Make sure the new inode item we write to the log has
* the same isize as the current one (if it exists).
@@ -5635,22 +5857,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
- err = logged_inode_size(log, inode, path, &logged_isize);
- if (err)
+ ret = logged_inode_size(log, inode, path, &logged_isize);
+ if (ret)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&inode->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_inode_items(trans, log, path, inode,
- max_key.type);
+ if (ctx->logged_before)
+ ret = drop_inode_items(trans, log, path,
+ inode, max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&inode->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&inode->runtime_flags);
- if (inode_logged(trans, inode))
+ if (ctx->logged_before)
ret = truncate_inode_items(trans, log,
inode, 0, 0);
}
@@ -5660,8 +5883,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (inode_only == LOG_INODE_ALL)
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_inode_items(trans, log, path, inode,
- max_key.type);
+ if (ctx->logged_before)
+ ret = drop_inode_items(trans, log, path, inode,
+ max_key.type);
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
@@ -5670,37 +5894,35 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
}
- if (ret) {
- err = ret;
+ if (ret)
goto out_unlock;
- }
- err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
+ ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
path, dst_path, logged_isize,
recursive_logging, inode_only, ctx,
&need_log_inode_item);
- if (err)
+ if (ret)
goto out_unlock;
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
- if (err)
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ if (ret)
goto out_unlock;
xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
- err = btrfs_log_holes(trans, inode, path);
- if (err)
+ ret = btrfs_log_holes(trans, inode, path);
+ if (ret)
goto out_unlock;
}
log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
if (need_log_inode_item) {
- err = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
- if (err)
+ ret = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
+ if (ret)
goto out_unlock;
/*
* If we are doing a fast fsync and the inode was logged before
@@ -5711,18 +5933,16 @@ log_extents:
* BTRFS_INODE_COPY_EVERYTHING set.
*/
if (!xattrs_logged && inode->logged_trans < trans->transid) {
- err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
- if (err)
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ if (ret)
goto out_unlock;
btrfs_release_path(path);
}
}
if (fast_search) {
ret = btrfs_log_changed_extents(trans, inode, dst_path, ctx);
- if (ret) {
- err = ret;
+ if (ret)
goto out_unlock;
- }
} else if (inode_only == LOG_INODE_ALL) {
struct extent_map *em, *n;
@@ -5734,10 +5954,8 @@ log_extents:
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
ret = log_directory_changes(trans, inode, path, dst_path, ctx);
- if (ret) {
- err = ret;
+ if (ret)
goto out_unlock;
- }
}
spin_lock(&inode->lock);
@@ -5776,12 +5994,24 @@ log_extents:
if (inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
+
+ /*
+ * Reset the last_reflink_trans so that the next fsync does not need to
+ * go through the slower path when logging extents and their checksums.
+ */
+ if (inode_only == LOG_INODE_ALL)
+ inode->last_reflink_trans = 0;
+
out_unlock:
mutex_unlock(&inode->log_mutex);
out:
btrfs_free_path(path);
btrfs_free_path(dst_path);
- return err;
+
+ if (recursive_logging)
+ ctx->logged_before = orig_logged_before;
+
+ return ret;
}
/*
@@ -5866,7 +6096,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_root *log = root->log_root;
struct btrfs_path *path;
LIST_HEAD(dir_list);
struct btrfs_dir_list *dir_elem;
@@ -5908,7 +6137,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
min_key.offset = 0;
again:
btrfs_release_path(path);
- ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+ ret = btrfs_search_forward(root, &min_key, path, trans->transid);
if (ret < 0) {
goto next_dir_inode;
} else if (ret > 0) {
@@ -5916,7 +6145,6 @@ again:
goto next_dir_inode;
}
-process_leaf:
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
for (i = path->slots[0]; i < nritems; i++) {
@@ -5934,8 +6162,7 @@ process_leaf:
di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
type = btrfs_dir_type(leaf, di);
- if (btrfs_dir_transid(leaf, di) < trans->transid &&
- type != BTRFS_FT_DIR)
+ if (btrfs_dir_transid(leaf, di) < trans->transid)
continue;
btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
if (di_key.type == BTRFS_ROOT_ITEM_KEY)
@@ -5973,16 +6200,6 @@ process_leaf:
}
break;
}
- if (i == nritems) {
- ret = btrfs_next_leaf(log, path);
- if (ret < 0) {
- goto next_dir_inode;
- } else if (ret > 0) {
- ret = 0;
- goto next_dir_inode;
- }
- goto process_leaf;
- }
if (min_key.offset < (u64)-1) {
min_key.offset++;
goto again;
@@ -6713,15 +6930,32 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
mutex_unlock(&dir->log_mutex);
}
-/*
- * Call this after adding a new name for a file and it will properly
- * update the log to reflect the new name.
+/**
+ * Update the log after adding a new name for an inode.
+ *
+ * @trans: Transaction handle.
+ * @old_dentry: The dentry associated with the old name and the old
+ * parent directory.
+ * @old_dir: The inode of the previous parent directory for the case
+ * of a rename. For a link operation, it must be NULL.
+ * @old_dir_index: The index number associated with the old name, meaningful
+ * only for rename operations (when @old_dir is not NULL).
+ * Ignored for link operations.
+ * @parent: The dentry associated with the directory under which the
+ * new name is located.
+ *
+ * Call this after adding a new name for an inode, as a result of a link or
+ * rename operation, and it will properly update the log to reflect the new name.
*/
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, struct btrfs_inode *old_dir,
- struct dentry *parent)
+ struct dentry *old_dentry, struct btrfs_inode *old_dir,
+ u64 old_dir_index, struct dentry *parent)
{
+ struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
+ struct btrfs_root *root = inode->root;
struct btrfs_log_ctx ctx;
+ bool log_pinned = false;
+ int ret;
/*
* this will force the logging code to walk the dentry chain
@@ -6734,26 +6968,83 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* if this inode hasn't been logged and directory we're renaming it
* from hasn't been logged, we don't need to log it
*/
- if (!inode_logged(trans, inode) &&
- (!old_dir || !inode_logged(trans, old_dir)))
- return;
+ ret = inode_logged(trans, inode, NULL);
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
+ if (!old_dir)
+ return;
+ /*
+ * If the inode was not logged and we are doing a rename (old_dir is not
+ * NULL), check if old_dir was logged - if it was not we can return and
+ * do nothing.
+ */
+ ret = inode_logged(trans, old_dir, NULL);
+ if (ret < 0)
+ goto out;
+ else if (ret == 0)
+ return;
+ }
+ ret = 0;
/*
* If we are doing a rename (old_dir is not NULL) from a directory that
- * was previously logged, make sure the next log attempt on the directory
- * is not skipped and logs the inode again. This is because the log may
- * not currently be authoritative for a range including the old
- * BTRFS_DIR_INDEX_KEY key, so we want to make sure after a log replay we
- * do not end up with both the new and old dentries around (in case the
- * inode is a directory we would have a directory with two hard links and
- * 2 inode references for different parents). The next log attempt of
- * old_dir will happen at btrfs_log_all_parents(), called through
- * btrfs_log_inode_parent() below, because we have previously set
- * inode->last_unlink_trans to the current transaction ID, either here or
- * at btrfs_record_unlink_dir() in case the inode is a directory.
+ * was previously logged, make sure that on log replay we get the old
+ * dir entry deleted. This is needed because we will also log the new
+ * name of the renamed inode, so we need to make sure that after log
+ * replay we don't end up with both the new and old dir entries existing.
*/
- if (old_dir)
- old_dir->logged_trans = 0;
+ if (old_dir && old_dir->logged_trans == trans->transid) {
+ struct btrfs_root *log = old_dir->root->log_root;
+ struct btrfs_path *path;
+
+ ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+
+ /*
+ * We have two inodes to update in the log, the old directory and
+ * the inode that got renamed, so we must pin the log to prevent
+ * anyone from syncing the log until we have updated both inodes
+ * in the log.
+ */
+ log_pinned = true;
+ btrfs_pin_log_trans(root);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Other concurrent task might be logging the old directory,
+ * as it can be triggered when logging other inode that had or
+ * still has a dentry in the old directory. So take the old
+ * directory's log_mutex to prevent getting an -EEXIST when
+ * logging a key to record the deletion, or having that other
+ * task logging the old directory get an -EEXIST if it attempts
+ * to log the same key after we just did it. In both cases that
+ * would result in falling back to a transaction commit.
+ */
+ mutex_lock(&old_dir->log_mutex);
+ ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
+ old_dentry->d_name.name,
+ old_dentry->d_name.len, old_dir_index);
+ if (ret > 0) {
+ /*
+ * The dentry does not exist in the log, so record its
+ * deletion.
+ */
+ btrfs_release_path(path);
+ ret = insert_dir_log_key(trans, log, path,
+ btrfs_ino(old_dir),
+ old_dir_index, old_dir_index);
+ }
+ mutex_unlock(&old_dir->log_mutex);
+
+ btrfs_free_path(path);
+ if (ret < 0)
+ goto out;
+ }
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
ctx.logging_new_name = true;
@@ -6765,5 +7056,16 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+out:
+ /*
+ * If an error happened mark the log for a full commit because it's not
+ * consistent and up to date or we couldn't find out if one of the
+ * inodes was logged before in this transaction. Do it before unpinning
+ * the log, to avoid any races with someone else trying to commit it.
+ */
+ if (ret < 0)
+ btrfs_set_log_full_commit(trans);
+ if (log_pinned)
+ btrfs_end_log_trans(root);
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index f6811c3df38a..1620f8170629 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -17,6 +17,8 @@ struct btrfs_log_ctx {
int log_transid;
bool log_new_dentries;
bool logging_new_name;
+ /* Indicate if the inode being logged was logged before. */
+ bool logged_before;
/* Tracks the last logged dir item/index key offset. */
u64 last_dir_item_offset;
struct inode *inode;
@@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
ctx->log_transid = 0;
ctx->log_new_dentries = false;
ctx->logging_new_name = false;
+ ctx->logged_before = false;
ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->ordered_extents);
@@ -86,7 +89,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode, struct btrfs_inode *old_dir,
- struct dentry *parent);
+ struct dentry *old_dentry, struct btrfs_inode *old_dir,
+ u64 old_dir_index, struct dentry *parent);
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b07d382d53a8..1be7cb2f955f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -534,30 +534,20 @@ error:
return ret;
}
-static bool device_path_matched(const char *path, struct btrfs_device *device)
-{
- int found;
-
- rcu_read_lock();
- found = strcmp(rcu_str_deref(device->name), path);
- rcu_read_unlock();
-
- return found == 0;
-}
-
-/*
- * Search and remove all stale (devices which are not mounted) devices.
+/**
+ * Search and remove all stale devices (which are not mounted).
* When both inputs are NULL, it will search and release all stale devices.
- * path: Optional. When provided will it release all unmounted devices
- * matching this path only.
- * skip_dev: Optional. Will skip this device when searching for the stale
+ *
+ * @devt: Optional. When provided will it release all unmounted devices
+ * matching this devt only.
+ * @skip_device: Optional. Will skip this device when searching for the stale
* devices.
- * Return: 0 for success or if @path is NULL.
- * -EBUSY if @path is a mounted device.
- * -ENOENT if @path does not match any device in the list.
+ *
+ * Return: 0 for success or if @devt is 0.
+ * -EBUSY if @devt is a mounted device.
+ * -ENOENT if @devt does not match any device in the list.
*/
-static int btrfs_free_stale_devices(const char *path,
- struct btrfs_device *skip_device)
+static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
{
struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
struct btrfs_device *device, *tmp_device;
@@ -565,7 +555,7 @@ static int btrfs_free_stale_devices(const char *path,
lockdep_assert_held(&uuid_mutex);
- if (path)
+ if (devt)
ret = -ENOENT;
list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
@@ -575,13 +565,11 @@ static int btrfs_free_stale_devices(const char *path,
&fs_devices->devices, dev_list) {
if (skip_device && skip_device == device)
continue;
- if (path && !device->name)
- continue;
- if (path && !device_path_matched(path, device))
+ if (devt && devt != device->devt)
continue;
if (fs_devices->opened) {
/* for an already deleted device return 0 */
- if (path && ret != 0)
+ if (devt && ret != 0)
ret = -EBUSY;
break;
}
@@ -614,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device, fmode_t flags,
void *holder)
{
- struct request_queue *q;
struct block_device *bdev;
struct btrfs_super_block *disk_super;
u64 devid;
@@ -656,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
}
- q = bdev_get_queue(bdev);
- if (!blk_queue_nonrot(q))
+ if (!blk_queue_nonrot(bdev_get_queue(bdev)))
fs_devices->rotating = true;
device->bdev = bdev;
@@ -781,11 +767,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
struct rcu_string *name;
u64 found_transid = btrfs_super_generation(disk_super);
u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
+ dev_t path_devt;
+ int error;
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
+ error = lookup_bdev(path, &path_devt);
+ if (error)
+ return ERR_PTR(error);
+
if (fsid_change_in_progress) {
if (!has_metadata_uuid)
fs_devices = find_fsid_inprogress(disk_super);
@@ -868,6 +860,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
+ device->devt = path_devt;
list_add_rcu(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++;
@@ -928,25 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
/*
* We are going to replace the device path for a given devid,
* make sure it's the same device if the device is mounted
+ *
+ * NOTE: the device->fs_info may not be reliable here so pass
+ * in a NULL to message helpers instead. This avoids a possible
+ * use-after-free when the fs_info and fs_info->sb are already
+ * torn down.
*/
if (device->bdev) {
- int error;
- dev_t path_dev;
-
- error = lookup_bdev(path, &path_dev);
- if (error) {
- mutex_unlock(&fs_devices->device_list_mutex);
- return ERR_PTR(error);
- }
-
- if (device->bdev->bd_dev != path_dev) {
+ if (device->devt != path_devt) {
mutex_unlock(&fs_devices->device_list_mutex);
- /*
- * device->fs_info may not be reliable here, so
- * pass in a NULL instead. This avoids a
- * possible use-after-free when the fs_info and
- * fs_info->sb are already torn down.
- */
btrfs_warn_in_rcu(NULL,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
path, devid, found_transid,
@@ -954,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
- btrfs_info_in_rcu(device->fs_info,
+ btrfs_info_in_rcu(NULL,
"devid %llu device path %s changed to %s scanned by %s (%d)",
devid, rcu_str_deref(device->name),
path, current->comm,
@@ -972,6 +955,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
fs_devices->missing_devices--;
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
}
+ device->devt = path_devt;
}
/*
@@ -1331,12 +1315,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
return disk_super;
}
-int btrfs_forget_devices(const char *path)
+int btrfs_forget_devices(dev_t devt)
{
int ret;
mutex_lock(&uuid_mutex);
- ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
+ ret = btrfs_free_stale_devices(devt, NULL);
mutex_unlock(&uuid_mutex);
return ret;
@@ -1385,10 +1369,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
}
device = device_list_add(path, disk_super, &new_device_added);
- if (!IS_ERR(device)) {
- if (new_device_added)
- btrfs_free_stale_devices(path, device);
- }
+ if (!IS_ERR(device) && new_device_added)
+ btrfs_free_stale_devices(device->devt, device);
btrfs_release_disk_super(disk_super);
@@ -2102,6 +2084,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
u64 num_devices;
int ret = 0;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
+ return -EINVAL;
+ }
+
/*
* The device list in fs_devices is accessed without locks (neither
* uuid_mutex nor device_list_mutex) as it won't change on a mounted
@@ -2606,7 +2593,6 @@ error:
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
{
struct btrfs_root *root = fs_info->dev_root;
- struct request_queue *q;
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct block_device *bdev;
@@ -2668,6 +2654,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
device->fs_info = fs_info;
device->bdev = bdev;
+ ret = lookup_bdev(device_path, &device->devt);
+ if (ret)
+ goto error_free_device;
ret = btrfs_get_dev_zone_info(device, false);
if (ret)
@@ -2679,7 +2668,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
goto error_free_zone;
}
- q = bdev_get_queue(bdev);
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = trans->transid;
device->io_width = fs_info->sectorsize;
@@ -2727,7 +2715,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
- if (!blk_queue_nonrot(q))
+ if (!blk_queue_nonrot(bdev_get_queue(bdev)))
fs_devices->rotating = true;
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@@ -2814,7 +2802,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
* We can ignore the return value as it typically returns -EINVAL and
* only succeeds if the device was an alien.
*/
- btrfs_forget_devices(device_path);
+ btrfs_forget_devices(device->devt);
/* Update ctime/mtime for blkid or udev */
update_dev_time(device_path);
@@ -3251,6 +3239,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
u64 length;
int ret;
+ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+ btrfs_err(fs_info,
+ "relocate: not supported on extent tree v2 yet");
+ return -EINVAL;
+ }
+
/*
* Prevent races with automatic removal of unused block groups.
* After we relocate and before we remove the chunk with offset
@@ -7060,6 +7054,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
}
#endif
+static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
+ u64 devid, u8 *uuid)
+{
+ struct btrfs_device *dev;
+
+ if (!btrfs_test_opt(fs_info, DEGRADED)) {
+ btrfs_report_missing_device(fs_info, devid, uuid, true);
+ return ERR_PTR(-ENOENT);
+ }
+
+ dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
+ if (IS_ERR(dev)) {
+ btrfs_err(fs_info, "failed to init missing device %llu: %ld",
+ devid, PTR_ERR(dev));
+ return dev;
+ }
+ btrfs_report_missing_device(fs_info, devid, uuid, false);
+
+ return dev;
+}
+
static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
{
@@ -7147,28 +7162,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
BTRFS_UUID_SIZE);
args.uuid = uuid;
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
- if (!map->stripes[i].dev &&
- !btrfs_test_opt(fs_info, DEGRADED)) {
- free_extent_map(em);
- btrfs_report_missing_device(fs_info, devid, uuid, true);
- return -ENOENT;
- }
if (!map->stripes[i].dev) {
- map->stripes[i].dev =
- add_missing_dev(fs_info->fs_devices, devid,
- uuid);
+ map->stripes[i].dev = handle_missing_device(fs_info,
+ devid, uuid);
if (IS_ERR(map->stripes[i].dev)) {
free_extent_map(em);
- btrfs_err(fs_info,
- "failed to init missing dev %llu: %ld",
- devid, PTR_ERR(map->stripes[i].dev));
return PTR_ERR(map->stripes[i].dev);
}
- btrfs_report_missing_device(fs_info, devid, uuid, false);
}
+
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&(map->stripes[i].dev->dev_state));
-
}
write_lock(&map_tree->lock);
@@ -8299,10 +8303,12 @@ static int relocating_repair_kthread(void *data)
target = cache->start;
btrfs_put_block_group(cache);
+ sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY",
target);
+ sb_end_write(fs_info->sb);
return -EBUSY;
}
@@ -8330,6 +8336,7 @@ out:
btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
+ sb_end_write(fs_info->sb);
return ret;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 005c9e2a491a..bd297f23d19e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -72,6 +72,11 @@ struct btrfs_device {
/* the mode sent to blkdev_get */
fmode_t mode;
+ /*
+ * Device's major-minor number. Must be set even if the device is not
+ * opened (bdev == NULL), unless the device is missing.
+ */
+ dev_t devt;
unsigned long dev_state;
blk_status_t last_flush_error;
@@ -505,7 +510,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
struct btrfs_device *btrfs_scan_one_device(const char *path,
fmode_t flags, void *holder);
-int btrfs_forget_devices(const char *path);
+int btrfs_forget_devices(dev_t devt);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
void btrfs_assign_next_active_device(struct btrfs_device *device,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index f559d517c7c4..b7b5fac1c779 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned) ||
(model == BLK_ZONED_NONE && incompat_zoned)) {
- struct btrfs_zoned_device_info *zone_info =
- device->zone_info;
+ struct btrfs_zoned_device_info *zone_info;
zone_info = device->zone_info;
zoned_devices++;
@@ -1215,12 +1214,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
struct btrfs_device *device;
u64 logical = cache->start;
u64 length = cache->length;
- u64 physical = 0;
int ret;
int i;
unsigned int nofs_flag;
u64 *alloc_offsets = NULL;
u64 *caps = NULL;
+ u64 *physical = NULL;
unsigned long *active = NULL;
u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
@@ -1264,6 +1263,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
+ physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
+ if (!physical) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
if (!active) {
ret = -ENOMEM;
@@ -1277,14 +1282,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
int dev_replace_is_ongoing = 0;
device = map->stripes[i].dev;
- physical = map->stripes[i].physical;
+ physical[i] = map->stripes[i].physical;
if (device->bdev == NULL) {
alloc_offsets[i] = WP_MISSING_DEV;
continue;
}
- is_sequential = btrfs_dev_is_sequential(device, physical);
+ is_sequential = btrfs_dev_is_sequential(device, physical[i]);
if (is_sequential)
num_sequential++;
else
@@ -1299,21 +1304,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
* This zone will be used for allocation, so mark this zone
* non-empty.
*/
- btrfs_dev_clear_zone_empty(device, physical);
+ btrfs_dev_clear_zone_empty(device, physical[i]);
down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
- btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
+ btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
up_read(&dev_replace->rwsem);
/*
* The group is mapped to a sequential zone. Get the zone write
* pointer to determine the allocation offset within the zone.
*/
- WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
+ WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
nofs_flag = memalloc_nofs_save();
- ret = btrfs_get_dev_zone(device, physical, &zone);
+ ret = btrfs_get_dev_zone(device, physical[i], &zone);
memalloc_nofs_restore(nofs_flag);
if (ret == -EIO || ret == -EOPNOTSUPP) {
ret = 0;
@@ -1339,7 +1344,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
case BLK_ZONE_COND_READONLY:
btrfs_err(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
- physical >> device->zone_info->zone_size_shift,
+ physical[i] >> device->zone_info->zone_size_shift,
rcu_str_deref(device->name), device->devid);
alloc_offsets[i] = WP_MISSING_DEV;
break;
@@ -1404,7 +1409,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (alloc_offsets[0] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
- physical);
+ physical[0]);
ret = -EIO;
goto out;
}
@@ -1413,6 +1418,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
cache->zone_is_active = test_bit(0, active);
break;
case BTRFS_BLOCK_GROUP_DUP:
+ if (map->type & BTRFS_BLOCK_GROUP_DATA) {
+ btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (alloc_offsets[0] == WP_MISSING_DEV) {
+ btrfs_err(fs_info,
+ "zoned: cannot recover write pointer for zone %llu",
+ physical[0]);
+ ret = -EIO;
+ goto out;
+ }
+ if (alloc_offsets[1] == WP_MISSING_DEV) {
+ btrfs_err(fs_info,
+ "zoned: cannot recover write pointer for zone %llu",
+ physical[1]);
+ ret = -EIO;
+ goto out;
+ }
+ if (alloc_offsets[0] != alloc_offsets[1]) {
+ btrfs_err(fs_info,
+ "zoned: write pointer offset mismatch of zones in DUP profile");
+ ret = -EIO;
+ goto out;
+ }
+ if (test_bit(0, active) != test_bit(1, active)) {
+ if (!btrfs_zone_activate(cache)) {
+ ret = -EIO;
+ goto out;
+ }
+ } else {
+ cache->zone_is_active = test_bit(0, active);
+ }
+ cache->alloc_offset = alloc_offsets[0];
+ cache->zone_capacity = min(caps[0], caps[1]);
+ break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID0:
case BTRFS_BLOCK_GROUP_RAID10:
@@ -1465,6 +1506,7 @@ out:
cache->physical_map = NULL;
}
bitmap_free(active);
+ kfree(physical);
kfree(caps);
kfree(alloc_offsets);
free_extent_map(em);
@@ -1781,50 +1823,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
struct btrfs_device *device;
u64 physical;
bool ret;
+ int i;
if (!btrfs_is_zoned(block_group->fs_info))
return true;
map = block_group->physical_map;
- /* Currently support SINGLE profile only */
- ASSERT(map->num_stripes == 1);
- device = map->stripes[0].dev;
- physical = map->stripes[0].physical;
-
- if (device->zone_info->max_active_zones == 0)
- return true;
spin_lock(&block_group->lock);
-
if (block_group->zone_is_active) {
ret = true;
goto out_unlock;
}
- /* No space left */
- if (block_group->alloc_offset == block_group->zone_capacity) {
- ret = false;
- goto out_unlock;
- }
+ for (i = 0; i < map->num_stripes; i++) {
+ device = map->stripes[i].dev;
+ physical = map->stripes[i].physical;
- if (!btrfs_dev_set_active_zone(device, physical)) {
- /* Cannot activate the zone */
- ret = false;
- goto out_unlock;
- }
+ if (device->zone_info->max_active_zones == 0)
+ continue;
+
+ /* No space left */
+ if (block_group->alloc_offset == block_group->zone_capacity) {
+ ret = false;
+ goto out_unlock;
+ }
+
+ if (!btrfs_dev_set_active_zone(device, physical)) {
+ /* Cannot activate the zone */
+ ret = false;
+ goto out_unlock;
+ }
+
+ /* Successfully activated all the zones */
+ if (i == map->num_stripes - 1)
+ block_group->zone_is_active = 1;
- /* Successfully activated all the zones */
- block_group->zone_is_active = 1;
+ }
spin_unlock(&block_group->lock);
- /* For the active block group list */
- btrfs_get_block_group(block_group);
+ if (block_group->zone_is_active) {
+ /* For the active block group list */
+ btrfs_get_block_group(block_group);
- spin_lock(&fs_info->zone_active_bgs_lock);
- ASSERT(list_empty(&block_group->active_bg_list));
- list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
- spin_unlock(&fs_info->zone_active_bgs_lock);
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_add_tail(&block_group->active_bg_list,
+ &fs_info->zone_active_bgs);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ }
return true;
@@ -1840,19 +1887,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
struct btrfs_device *device;
u64 physical;
int ret = 0;
+ int i;
if (!btrfs_is_zoned(fs_info))
return 0;
map = block_group->physical_map;
- /* Currently support SINGLE profile only */
- ASSERT(map->num_stripes == 1);
-
- device = map->stripes[0].dev;
- physical = map->stripes[0].physical;
-
- if (device->zone_info->max_active_zones == 0)
- return 0;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
@@ -1904,25 +1944,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
- btrfs_dec_block_group_ro(block_group);
+ for (i = 0; i < map->num_stripes; i++) {
+ device = map->stripes[i].dev;
+ physical = map->stripes[i].physical;
- if (!ret) {
- btrfs_dev_clear_active_zone(device, physical);
+ if (device->zone_info->max_active_zones == 0)
+ continue;
- spin_lock(&fs_info->zone_active_bgs_lock);
- ASSERT(!list_empty(&block_group->active_bg_list));
- list_del_init(&block_group->active_bg_list);
- spin_unlock(&fs_info->zone_active_bgs_lock);
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ device->zone_info->zone_size >> SECTOR_SHIFT,
+ GFP_NOFS);
- /* For active_bg_list */
- btrfs_put_block_group(block_group);
+ if (ret)
+ return ret;
+
+ btrfs_dev_clear_active_zone(device, physical);
}
+ btrfs_dec_block_group_ro(block_group);
- return ret;
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ ASSERT(!list_empty(&block_group->active_bg_list));
+ list_del_init(&block_group->active_bg_list);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+
+ /* For active_bg_list */
+ btrfs_put_block_group(block_group);
+
+ return 0;
}
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
diff --git a/fs/buffer.c b/fs/buffer.c
index 8e112b6bd371..ed26cb1d381d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -613,17 +613,14 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* FIXME: may need to call ->reservepage here as well. That's rather up to the
* address_space though.
*/
-int __set_page_dirty_buffers(struct page *page)
+bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- int newly_dirty;
- struct address_space *mapping = page_mapping(page);
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
+ struct buffer_head *head;
+ bool newly_dirty;
spin_lock(&mapping->private_lock);
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
+ head = folio_buffers(folio);
+ if (head) {
struct buffer_head *bh = head;
do {
@@ -635,21 +632,21 @@ int __set_page_dirty_buffers(struct page *page)
* Lock out page's memcg migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
- lock_page_memcg(page);
- newly_dirty = !TestSetPageDirty(page);
+ folio_memcg_lock(folio);
+ newly_dirty = !folio_test_set_dirty(folio);
spin_unlock(&mapping->private_lock);
if (newly_dirty)
- __set_page_dirty(page, mapping, 1);
+ __folio_mark_dirty(folio, mapping, 1);
- unlock_page_memcg(page);
+ folio_memcg_unlock(folio);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
}
-EXPORT_SYMBOL(__set_page_dirty_buffers);
+EXPORT_SYMBOL(block_dirty_folio);
/*
* Write out and wait upon a list of buffers.
@@ -1235,16 +1232,18 @@ static void bh_lru_install(struct buffer_head *bh)
int i;
check_irqs_on();
+ bh_lru_lock();
+
/*
* the refcount of buffer_head in bh_lru prevents dropping the
* attached page(i.e., try_to_free_buffers) so it could cause
* failing page migration.
* Skip putting upcoming bh into bh_lru until migration is done.
*/
- if (lru_cache_disabled())
+ if (lru_cache_disabled()) {
+ bh_lru_unlock();
return;
-
- bh_lru_lock();
+ }
b = this_cpu_ptr(&bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
@@ -1482,41 +1481,40 @@ static void discard_buffer(struct buffer_head * bh)
}
/**
- * block_invalidatepage - invalidate part or all of a buffer-backed page
- *
- * @page: the page which is affected
+ * block_invalidate_folio - Invalidate part or all of a buffer-backed folio.
+ * @folio: The folio which is affected.
* @offset: start of the range to invalidate
* @length: length of the range to invalidate
*
- * block_invalidatepage() is called when all or part of the page has become
+ * block_invalidate_folio() is called when all or part of the folio has been
* invalidated by a truncate operation.
*
- * block_invalidatepage() does not have to release all buffers, but it must
+ * block_invalidate_folio() does not have to release all buffers, but it must
* ensure that no dirty buffer is left outside @offset and that no I/O
* is underway against any of the blocks which are outside the truncation
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
-void block_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct buffer_head *head, *bh, *next;
- unsigned int curr_off = 0;
- unsigned int stop = length + offset;
+ size_t curr_off = 0;
+ size_t stop = length + offset;
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
- goto out;
+ BUG_ON(!folio_test_locked(folio));
/*
* Check for overflow
*/
- BUG_ON(stop > PAGE_SIZE || stop < length);
+ BUG_ON(stop > folio_size(folio) || stop < length);
+
+ head = folio_buffers(folio);
+ if (!head)
+ return;
- head = page_buffers(page);
bh = head;
do {
- unsigned int next_off = curr_off + bh->b_size;
+ size_t next_off = curr_off + bh->b_size;
next = bh->b_this_page;
/*
@@ -1535,21 +1533,21 @@ void block_invalidatepage(struct page *page, unsigned int offset,
} while (bh != head);
/*
- * We release buffers only if the entire page is being invalidated.
+ * We release buffers only if the entire folio is being invalidated.
* The get_block cached value has been unconditionally invalidated,
* so real IO is not possible anymore.
*/
- if (length == PAGE_SIZE)
- try_to_release_page(page, 0);
+ if (length == folio_size(folio))
+ filemap_release_folio(folio, 0);
out:
return;
}
-EXPORT_SYMBOL(block_invalidatepage);
+EXPORT_SYMBOL(block_invalidate_folio);
/*
* We attach and possibly dirty the buffers atomically wrt
- * __set_page_dirty_buffers() via private_lock. try_to_free_buffers
+ * block_dirty_folio() via private_lock. try_to_free_buffers
* is already excluded via the page lock.
*/
void create_empty_buffers(struct page *page,
@@ -1724,12 +1722,12 @@ int __block_write_full_page(struct inode *inode, struct page *page,
(1 << BH_Dirty)|(1 << BH_Uptodate));
/*
- * Be very careful. We have no exclusion from __set_page_dirty_buffers
+ * Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
*
- * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
+ * Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
*/
@@ -2206,29 +2204,27 @@ int generic_write_end(struct file *file, struct address_space *mapping,
EXPORT_SYMBOL(generic_write_end);
/*
- * block_is_partially_uptodate checks whether buffers within a page are
+ * block_is_partially_uptodate checks whether buffers within a folio are
* uptodate or not.
*
- * Returns true if all buffers which correspond to a file portion
- * we want to read are uptodate.
+ * Returns true if all buffers which correspond to the specified part
+ * of the folio are uptodate.
*/
-int block_is_partially_uptodate(struct page *page, unsigned long from,
- unsigned long count)
+bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{
unsigned block_start, block_end, blocksize;
unsigned to;
struct buffer_head *bh, *head;
- int ret = 1;
+ bool ret = true;
- if (!page_has_buffers(page))
- return 0;
-
- head = page_buffers(page);
+ head = folio_buffers(folio);
+ if (!head)
+ return false;
blocksize = head->b_size;
- to = min_t(unsigned, PAGE_SIZE - from, count);
+ to = min_t(unsigned, folio_size(folio) - from, count);
to = from + to;
- if (from < blocksize && to > PAGE_SIZE - blocksize)
- return 0;
+ if (from < blocksize && to > folio_size(folio) - blocksize)
+ return false;
bh = head;
block_start = 0;
@@ -2236,7 +2232,7 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
block_end = block_start + blocksize;
if (block_end > from && block_start < to) {
if (!buffer_uptodate(bh)) {
- ret = 0;
+ ret = false;
break;
}
if (block_end >= to)
@@ -3024,12 +3020,16 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
clear_buffer_write_io_error(bh);
- bio = bio_alloc(GFP_NOIO, 1);
+ if (buffer_meta(bh))
+ op_flags |= REQ_META;
+ if (buffer_prio(bh))
+ op_flags |= REQ_PRIO;
+
+ bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO);
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -3038,12 +3038,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
- if (buffer_meta(bh))
- op_flags |= REQ_META;
- if (buffer_prio(bh))
- op_flags |= REQ_PRIO;
- bio_set_op_attrs(bio, op, op_flags);
-
/* Take care of bh's that straddle the end of the device */
guard_bio_eod(bio);
@@ -3185,7 +3179,7 @@ EXPORT_SYMBOL(sync_dirty_buffer);
*
* The same applies to regular filesystem pages: if all the buffers are
* clean then we set the page clean and proceed. To do that, we require
- * total exclusion from __set_page_dirty_buffers(). That is obtained with
+ * total exclusion from block_dirty_folio(). That is obtained with
* private_lock.
*
* try_to_free_buffers() is non-blocking.
@@ -3252,7 +3246,7 @@ int try_to_free_buffers(struct page *page)
* the page also.
*
* private_lock must be held over this entire operation in order
- * to synchronise against __set_page_dirty_buffers and prevent the
+ * to synchronise against block_dirty_folio and prevent the
* dirty bit from being lost.
*/
if (ret)
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 51c968cd00a6..ae93cee9d25d 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -254,7 +254,7 @@ static bool cachefiles_shorten_object(struct cachefiles_object *object,
ret = cachefiles_inject_write_error();
if (ret == 0)
ret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE,
- new_size, dio_size);
+ new_size, dio_size - new_size);
if (ret < 0) {
trace_cachefiles_io_error(object, file_inode(file), ret,
cachefiles_trace_fallocate_error);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 04eb52736990..753986ea1583 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -192,6 +192,64 @@ presubmission_error:
}
/*
+ * Query the occupancy of the cache in a region, returning where the next chunk
+ * of data starts and how long it is.
+ */
+static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
+ loff_t start, size_t len, size_t granularity,
+ loff_t *_data_start, size_t *_data_len)
+{
+ struct cachefiles_object *object;
+ struct file *file;
+ loff_t off, off2;
+
+ *_data_start = -1;
+ *_data_len = 0;
+
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
+ return -ENOBUFS;
+
+ object = cachefiles_cres_object(cres);
+ file = cachefiles_cres_file(cres);
+ granularity = max_t(size_t, object->volume->cache->bsize, granularity);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start, len,
+ i_size_read(file_inode(file)));
+
+ off = cachefiles_inject_read_error();
+ if (off == 0)
+ off = vfs_llseek(file, start, SEEK_DATA);
+ if (off == -ENXIO)
+ return -ENODATA; /* Beyond EOF */
+ if (off < 0 && off >= (loff_t)-MAX_ERRNO)
+ return -ENOBUFS; /* Error. */
+ if (round_up(off, granularity) >= start + len)
+ return -ENODATA; /* No data in range */
+
+ off2 = cachefiles_inject_read_error();
+ if (off2 == 0)
+ off2 = vfs_llseek(file, off, SEEK_HOLE);
+ if (off2 == -ENXIO)
+ return -ENODATA; /* Beyond EOF */
+ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
+ return -ENOBUFS; /* Error. */
+
+ /* Round away partial blocks */
+ off = round_up(off, granularity);
+ off2 = round_down(off2, granularity);
+ if (off2 <= off)
+ return -ENODATA;
+
+ *_data_start = off;
+ if (off2 > start + len)
+ *_data_len = len;
+ else
+ *_data_len = off2 - off;
+ return 0;
+}
+
+/*
* Handle completion of a write to the cache.
*/
static void cachefiles_write_complete(struct kiocb *iocb, long ret)
@@ -545,6 +603,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
.write = cachefiles_write,
.prepare_read = cachefiles_prepare_read,
.prepare_write = cachefiles_prepare_write,
+ .query_occupancy = cachefiles_query_occupancy,
};
/*
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 83f41bd0c3a9..35465109d9c4 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -28,6 +28,11 @@ struct cachefiles_xattr {
static const char cachefiles_xattr_cache[] =
XATTR_USER_PREFIX "CacheFiles.cache";
+struct cachefiles_vol_xattr {
+ __be32 reserved; /* Reserved, should be 0 */
+ __u8 data[]; /* netfs volume coherency data */
+} __packed;
+
/*
* set the state xattr on a cache file
*/
@@ -185,6 +190,7 @@ void cachefiles_prepare_to_write(struct fscache_cookie *cookie)
*/
bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
{
+ struct cachefiles_vol_xattr *buf;
unsigned int len = volume->vcookie->coherency_len;
const void *p = volume->vcookie->coherency;
struct dentry *dentry = volume->dentry;
@@ -192,10 +198,17 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
_enter("%x,#%d", volume->vcookie->debug_id, len);
+ len += sizeof(*buf);
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return false;
+ buf->reserved = cpu_to_be32(0);
+ memcpy(buf->data, p, len);
+
ret = cachefiles_inject_write_error();
if (ret == 0)
ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
- p, len, 0);
+ buf, len, 0);
if (ret < 0) {
trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret,
cachefiles_trace_setxattr_error);
@@ -209,6 +222,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
cachefiles_coherency_vol_set_ok);
}
+ kfree(buf);
_leave(" = %d", ret);
return ret == 0;
}
@@ -218,7 +232,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
*/
int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
{
- struct cachefiles_xattr *buf;
+ struct cachefiles_vol_xattr *buf;
struct dentry *dentry = volume->dentry;
unsigned int len = volume->vcookie->coherency_len;
const void *p = volume->vcookie->coherency;
@@ -228,6 +242,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
_enter("");
+ len += sizeof(*buf);
buf = kmalloc(len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -245,7 +260,9 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
"Failed to read xattr with error %zd", xlen);
}
why = cachefiles_coherency_vol_check_xattr;
- } else if (memcmp(buf->data, p, len) != 0) {
+ } else if (buf->reserved != cpu_to_be32(0)) {
+ why = cachefiles_coherency_vol_check_resv;
+ } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) {
why = cachefiles_coherency_vol_check_cmp;
} else {
why = cachefiles_coherency_vol_check_ok;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c98e5238a1b6..f6135c93ce9d 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -76,18 +76,17 @@ static inline struct ceph_snap_context *page_snap_context(struct page *page)
* Dirty a page. Optimistically adjust accounting, on the assumption
* that we won't race with invalidate. If we do, readjust.
*/
-static int ceph_set_page_dirty(struct page *page)
+static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- struct address_space *mapping = page->mapping;
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc;
- if (PageDirty(page)) {
- dout("%p set_page_dirty %p idx %lu -- already dirty\n",
- mapping->host, page, page->index);
- BUG_ON(!PagePrivate(page));
- return 0;
+ if (folio_test_dirty(folio)) {
+ dout("%p dirty_folio %p idx %lu -- already dirty\n",
+ mapping->host, folio, folio->index);
+ BUG_ON(!folio_get_private(folio));
+ return false;
}
inode = mapping->host;
@@ -111,56 +110,56 @@ static int ceph_set_page_dirty(struct page *page)
if (ci->i_wrbuffer_ref == 0)
ihold(inode);
++ci->i_wrbuffer_ref;
- dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
+ dout("%p dirty_folio %p idx %lu head %d/%d -> %d/%d "
"snapc %p seq %lld (%d snaps)\n",
- mapping->host, page, page->index,
+ mapping->host, folio, folio->index,
ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
snapc, snapc->seq, snapc->num_snaps);
spin_unlock(&ci->i_ceph_lock);
/*
- * Reference snap context in page->private. Also set
- * PagePrivate so that we get invalidatepage callback.
+ * Reference snap context in folio->private. Also set
+ * PagePrivate so that we get invalidate_folio callback.
*/
- BUG_ON(PagePrivate(page));
- attach_page_private(page, snapc);
+ BUG_ON(folio_get_private(folio));
+ folio_attach_private(folio, snapc);
- return ceph_fscache_set_page_dirty(page);
+ return ceph_fscache_dirty_folio(mapping, folio);
}
/*
- * If we are truncating the full page (i.e. offset == 0), adjust the
- * dirty page counters appropriately. Only called if there is private
- * data on the page.
+ * If we are truncating the full folio (i.e. offset == 0), adjust the
+ * dirty folio counters appropriately. Only called if there is private
+ * data on the folio.
*/
-static void ceph_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void ceph_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc;
- inode = page->mapping->host;
+ inode = folio->mapping->host;
ci = ceph_inode(inode);
- if (offset != 0 || length != thp_size(page)) {
- dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
- inode, page, page->index, offset, length);
+ if (offset != 0 || length != folio_size(folio)) {
+ dout("%p invalidate_folio idx %lu partial dirty page %zu~%zu\n",
+ inode, folio->index, offset, length);
return;
}
- WARN_ON(!PageLocked(page));
- if (PagePrivate(page)) {
- dout("%p invalidatepage %p idx %lu full dirty page\n",
- inode, page, page->index);
+ WARN_ON(!folio_test_locked(folio));
+ if (folio_get_private(folio)) {
+ dout("%p invalidate_folio idx %lu full dirty page\n",
+ inode, folio->index);
- snapc = detach_page_private(page);
+ snapc = folio_detach_private(folio);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
}
- wait_on_page_fscache(page);
+ folio_wait_fscache(folio);
}
static int ceph_releasepage(struct page *page, gfp_t gfp)
@@ -516,6 +515,7 @@ static u64 get_writepages_data_length(struct inode *inode,
*/
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -550,8 +550,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
/* is this a partial page at end of file? */
if (page_off >= ceph_wbc.i_size) {
- dout("%p page eof %llu\n", page, ceph_wbc.i_size);
- page->mapping->a_ops->invalidatepage(page, 0, thp_size(page));
+ dout("folio at %lu beyond eof %llu\n", folio->index,
+ ceph_wbc.i_size);
+ folio_invalidate(folio, 0, folio_size(folio));
return 0;
}
@@ -563,7 +564,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (atomic_long_inc_return(&fsc->writeback_count) >
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
- set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ fsc->write_congested = true;
req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
@@ -623,7 +624,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (atomic_long_dec_return(&fsc->writeback_count) <
CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
- clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ fsc->write_congested = false;
return err;
}
@@ -635,6 +636,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
BUG_ON(!inode);
ihold(inode);
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ ceph_inode_to_client(inode)->write_congested)
+ return AOP_WRITEPAGE_ACTIVATE;
+
wait_on_page_fscache(page);
err = writepage_nounlock(page, wbc);
@@ -707,8 +712,7 @@ static void writepages_finish(struct ceph_osd_request *req)
if (atomic_long_dec_return(&fsc->writeback_count) <
CONGESTION_OFF_THRESH(
fsc->mount_options->congestion_kb))
- clear_bdi_congested(inode_to_bdi(inode),
- BLK_RW_ASYNC);
+ fsc->write_congested = false;
ceph_put_snap_context(detach_page_private(page));
end_page_writeback(page);
@@ -760,6 +764,10 @@ static int ceph_writepages_start(struct address_space *mapping,
bool done = false;
bool caching = ceph_is_cache_enabled(inode);
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ fsc->write_congested)
+ return 0;
+
dout("writepages_start %p (mode=%s)\n", inode,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
@@ -867,14 +875,16 @@ get_more_pages:
continue;
}
if (page_offset(page) >= ceph_wbc.i_size) {
- dout("%p page eof %llu\n",
- page, ceph_wbc.i_size);
+ struct folio *folio = page_folio(page);
+
+ dout("folio at %lu beyond eof %llu\n",
+ folio->index, ceph_wbc.i_size);
if ((ceph_wbc.size_stable ||
- page_offset(page) >= i_size_read(inode)) &&
- clear_page_dirty_for_io(page))
- mapping->a_ops->invalidatepage(page,
- 0, thp_size(page));
- unlock_page(page);
+ folio_pos(folio) >= i_size_read(inode)) &&
+ folio_clear_dirty_for_io(folio))
+ folio_invalidate(folio, 0,
+ folio_size(folio));
+ folio_unlock(folio);
continue;
}
if (strip_unit_end && (page->index > strip_unit_end)) {
@@ -954,11 +964,8 @@ get_more_pages:
if (atomic_long_inc_return(&fsc->writeback_count) >
CONGESTION_ON_THRESH(
- fsc->mount_options->congestion_kb)) {
- set_bdi_congested(inode_to_bdi(inode),
- BLK_RW_ASYNC);
- }
-
+ fsc->mount_options->congestion_kb))
+ fsc->write_congested = true;
pages[locked_pages++] = page;
pvec.pages[i] = NULL;
@@ -1372,8 +1379,8 @@ const struct address_space_operations ceph_aops = {
.writepages = ceph_writepages_start,
.write_begin = ceph_write_begin,
.write_end = ceph_write_end,
- .set_page_dirty = ceph_set_page_dirty,
- .invalidatepage = ceph_invalidatepage,
+ .dirty_folio = ceph_dirty_folio,
+ .invalidate_folio = ceph_invalidate_folio,
.releasepage = ceph_releasepage,
.direct_IO = noop_direct_IO,
};
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 09164389fa66..b90f3016994d 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -54,12 +54,12 @@ static inline void ceph_fscache_unpin_writeback(struct inode *inode,
fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode)));
}
-static inline int ceph_fscache_set_page_dirty(struct page *page)
+static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_inode_info *ci = ceph_inode(mapping->host);
- return fscache_set_page_dirty(page, ceph_fscache_cookie(ci));
+ return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci));
}
static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
@@ -133,9 +133,10 @@ static inline void ceph_fscache_unpin_writeback(struct inode *inode,
{
}
-static inline int ceph_fscache_set_page_dirty(struct page *page)
+static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- return __set_page_dirty_nobuffers(page);
+ return filemap_dirty_folio(mapping, folio);
}
static inline bool ceph_is_cache_enabled(struct inode *inode)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d305b974824..b472cd066d1c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
+ unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2236,36 +2237,44 @@ static int unsafe_request_wait(struct inode *inode)
spin_unlock(&ci->i_unsafe_lock);
/*
+ * The mdsc->max_sessions is unlikely to be changed
+ * mostly, here we will retry it by reallocating the
+ * sessions array memory to get rid of the mdsc->mutex
+ * lock.
+ */
+retry:
+ max_sessions = mdsc->max_sessions;
+
+ /*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if (req1 || req2) {
+ if ((req1 || req2) && likely(max_sessions)) {
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s;
struct ceph_mds_request *req;
- unsigned int max;
int i;
- /*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions arrary memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max = mdsc->max_sessions;
- sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
- if (!sessions)
- return -ENOMEM;
+ sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+ if (!sessions) {
+ err = -ENOMEM;
+ goto out;
+ }
spin_lock(&ci->i_unsafe_lock);
if (req1) {
list_for_each_entry(req, &ci->i_unsafe_dirops,
r_unsafe_dir_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2278,8 +2287,14 @@ retry:
list_for_each_entry(req, &ci->i_unsafe_iops,
r_unsafe_target_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2300,7 +2315,7 @@ retry:
spin_unlock(&ci->i_ceph_lock);
/* send flush mdlog request to MDSes */
- for (i = 0; i < max; i++) {
+ for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
@@ -2317,15 +2332,19 @@ retry:
ceph_timeout_jiffies(req1->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req1);
}
if (req2) {
ret = !wait_for_completion_timeout(&req2->r_safe_completion,
ceph_timeout_jiffies(req2->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req2);
}
+
+out:
+ if (req1)
+ ceph_mdsc_put_request(req1);
+ if (req2)
+ ceph_mdsc_put_request(req2);
return err;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5b9104b8e453..bbed3224ad68 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -583,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
struct ceph_inode_info *ci = ceph_inode(dir);
struct inode *inode;
struct timespec64 now;
+ struct ceph_string *pool_ns;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_vino vino = { .ino = req->r_deleg_ino,
.snap = CEPH_NOSNAP };
@@ -632,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
in.max_size = cpu_to_le64(lo->stripe_unit);
ceph_file_layout_to_legacy(lo, &in.layout);
+ /* lo is private, so pool_ns can't change */
+ pool_ns = rcu_dereference_raw(lo->pool_ns);
+ if (pool_ns) {
+ iinfo.pool_ns_len = pool_ns->len;
+ iinfo.pool_ns_data = pool_ns->str;
+ }
down_read(&mdsc->snap_rwsem);
ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
@@ -750,8 +757,10 @@ retry:
restore_deleg_ino(dir, req->r_deleg_ino);
ceph_mdsc_put_request(req);
try_async = false;
+ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
goto retry;
}
+ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
goto out_req;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ef4a980a7bf3..9cfa6c730519 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -447,7 +447,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
struct ceph_inode_info *ci;
int i;
- ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
+ ci = alloc_inode_sb(sb, ceph_inode_cachep, GFP_NOFS);
if (!ci)
return NULL;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bf79f369aec6..4a3b77d049c7 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -802,6 +802,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
fsc->have_copy_from2 = true;
atomic_long_set(&fsc->writeback_count, 0);
+ fsc->write_congested = false;
err = -ENOMEM;
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 67f145e1ae7a..0bd97aea2319 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -121,6 +121,7 @@ struct ceph_fs_client {
struct ceph_mds_client *mdsc;
atomic_long_t writeback_count;
+ bool write_congested;
struct workqueue_struct *inode_wq;
struct workqueue_struct *cap_wq;
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 463ebe34892b..180c234c2f46 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch
switch (state) {
case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
- cifs_reconnect(swnreg->tcon->ses->server, true);
+ cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
- cifs_reconnect(swnreg->tcon->ses->server, true);
+ cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
@@ -498,7 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
goto unlock;
}
- cifs_reconnect(tcon->ses->server, false);
+ cifs_signal_cifsd_for_reconnect(tcon->ses->server, false);
unlock:
mutex_unlock(&tcon->ses->server->srv_mutex);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index ee3aab3dd4ac..bf861fef2f0c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -949,6 +949,9 @@ static void populate_new_aces(char *nacl_base,
pnntace = (struct cifs_ace *) (nacl_base + nsize);
nsize += setup_special_mode_ACE(pnntace, nmode);
num_aces++;
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += setup_authusers_ACE(pnntace);
+ num_aces++;
goto set_size;
}
@@ -1297,7 +1300,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
if (uid_valid(uid)) { /* chown */
uid_t id;
- nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
+ nowner_sid_ptr = kzalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
if (!nowner_sid_ptr) {
rc = -ENOMEM;
@@ -1326,7 +1329,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
}
if (gid_valid(gid)) { /* chgrp */
gid_t id;
- ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
+ ngroup_sid_ptr = kzalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
if (!ngroup_sid_ptr) {
rc = -ENOMEM;
@@ -1613,7 +1616,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
nsecdesclen = secdesclen;
if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
if (mode_from_sid)
- nsecdesclen += sizeof(struct cifs_ace);
+ nsecdesclen += 2 * sizeof(struct cifs_ace);
else /* cifsacl */
nsecdesclen += 5 * sizeof(struct cifs_ace);
} else { /* chown */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 199edac0cb59..d1211ad4e85b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb)
if (rc)
goto out_no_root;
/* tune readahead according to rsize if readahead size not set on mount */
+ if (cifs_sb->ctx->rsize == 0)
+ cifs_sb->ctx->rsize =
+ tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx);
if (cifs_sb->ctx->rasize)
sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
else
@@ -254,6 +257,9 @@ static void cifs_kill_sb(struct super_block *sb)
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_tcon *tcon;
struct cached_fid *cfid;
+ struct rb_root *root = &cifs_sb->tlink_tree;
+ struct rb_node *node;
+ struct tcon_link *tlink;
/*
* We ned to release all dentries for the cached directories
@@ -263,16 +269,18 @@ static void cifs_kill_sb(struct super_block *sb)
dput(cifs_sb->root);
cifs_sb->root = NULL;
}
- tcon = cifs_sb_master_tcon(cifs_sb);
- if (tcon) {
+ node = rb_first(root);
+ while (node != NULL) {
+ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+ tcon = tlink_tcon(tlink);
cfid = &tcon->crfid;
mutex_lock(&cfid->fid_mutex);
if (cfid->dentry) {
-
dput(cfid->dentry);
cfid->dentry = NULL;
}
mutex_unlock(&cfid->fid_mutex);
+ node = rb_next(node);
}
kill_anon_super(sb);
@@ -354,7 +362,7 @@ static struct inode *
cifs_alloc_inode(struct super_block *sb)
{
struct cifsInodeInfo *cifs_inode;
- cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
+ cifs_inode = alloc_inode_sb(sb, cifs_inode_cachep, GFP_KERNEL);
if (!cifs_inode)
return NULL;
cifs_inode->cifsAttrs = 0x20; /* default */
@@ -919,6 +927,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
out_super:
deactivate_locked_super(sb);
+ return root;
out:
if (cifs_sb) {
kfree(cifs_sb->prepath);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d3701295402d..0df3b24a0bf4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -132,6 +132,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
void
+cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+ bool all_channels);
+void
cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
bool mark_smb_session);
extern int cifs_reconnect(struct TCP_Server_Info *server,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 11a22a30ee14..9964c3634322 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -162,11 +162,51 @@ static void cifs_resolve_server(struct work_struct *work)
mutex_unlock(&server->srv_mutex);
}
-/**
+/*
+ * Update the tcpStatus for the server.
+ * This is used to signal the cifsd thread to call cifs_reconnect
+ * ONLY cifsd thread should call cifs_reconnect. For any other
+ * thread, use this function
+ *
+ * @server: the tcp ses for which reconnect is needed
+ * @all_channels: if this needs to be done for all channels
+ */
+void
+cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+ bool all_channels)
+{
+ struct TCP_Server_Info *pserver;
+ struct cifs_ses *ses;
+ int i;
+
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ if (!all_channels) {
+ pserver->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&cifs_tcp_ses_lock);
+ return;
+ }
+
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ spin_lock(&ses->chan_lock);
+ for (i = 0; i < ses->chan_count; i++)
+ ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&ses->chan_lock);
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+}
+
+/*
* Mark all sessions and tcons for reconnect.
+ * IMPORTANT: make sure that this gets called only from
+ * cifsd thread. For any other thread, use
+ * cifs_signal_cifsd_for_reconnect
*
+ * @server: the tcp ses for which reconnect is needed
* @server needs to be previously set to CifsNeedReconnect.
- *
+ * @mark_smb_session: whether even sessions need to be marked
*/
void
cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
@@ -175,11 +215,6 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- struct mid_q_entry *mid, *nmid;
- struct list_head retry_list;
-
- server->maxBuf = 0;
- server->max_read = 0;
/*
* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
@@ -219,6 +254,16 @@ next_session:
spin_unlock(&ses->chan_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
+}
+
+static void
+cifs_abort_connection(struct TCP_Server_Info *server)
+{
+ struct mid_q_entry *mid, *nmid;
+ struct list_head retry_list;
+
+ server->maxBuf = 0;
+ server->max_read = 0;
/* do not want to be sending data on a socket we are freeing */
cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
@@ -310,6 +355,8 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
+ cifs_abort_connection(server);
+
do {
try_to_freeze();
mutex_lock(&server->srv_mutex);
@@ -434,6 +481,8 @@ reconnect_dfs_server(struct TCP_Server_Info *server,
cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
+ cifs_abort_connection(server);
+
do {
try_to_freeze();
mutex_lock(&server->srv_mutex);
@@ -639,6 +688,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (server->tcpStatus == CifsNeedReconnect) {
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_reconnect(server, false);
return -ECONNABORTED;
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -1831,13 +1881,9 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
int i;
for (i = 1; i < chan_count; i++) {
- /*
- * note: for now, we're okay accessing ses->chans
- * without chan_lock. But when chans can go away, we'll
- * need to introduce ref counting to make sure that chan
- * is not freed from under us.
- */
+ spin_unlock(&ses->chan_lock);
cifs_put_tcp_session(ses->chans[i].server, 0);
+ spin_lock(&ses->chan_lock);
ses->chans[i].server = NULL;
}
}
@@ -1981,6 +2027,19 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
}
+ ctx->workstation_name = kstrdup(ses->workstation_name, GFP_KERNEL);
+ if (!ctx->workstation_name) {
+ cifs_dbg(FYI, "Unable to allocate memory for workstation_name\n");
+ rc = -ENOMEM;
+ kfree(ctx->username);
+ ctx->username = NULL;
+ kfree_sensitive(ctx->password);
+ ctx->password = NULL;
+ kfree(ctx->domainname);
+ ctx->domainname = NULL;
+ goto out_key_put;
+ }
+
out_key_put:
up_read(&key->sem);
key_put(key);
@@ -2331,10 +2390,19 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
if (ses->server->posix_ext_supported) {
tcon->posix_extensions = true;
pr_warn_once("SMB3.11 POSIX Extensions are experimental\n");
- } else {
+ } else if ((ses->server->vals->protocol_id == SMB311_PROT_ID) ||
+ (strcmp(ses->server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) ||
+ (strcmp(ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0)) {
cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions\n");
rc = -EOPNOTSUPP;
goto out_fail;
+ } else {
+ cifs_dbg(VFS, "Check vers= mount option. SMB3.11 "
+ "disabled but required for POSIX extensions\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
}
}
@@ -3896,7 +3964,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
/* only send once per connect */
spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus != CifsNeedSessSetup) {
+ if ((server->tcpStatus != CifsNeedSessSetup) &&
+ (ses->status == CifsGood)) {
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index dd9643751671..30e040da4f09 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_reconnect(tcon->ses->server, true);
+ cifs_signal_cifsd_for_reconnect(tcon->ses->server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 59334be9ed3b..60f43bff7ccb 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3740,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
break;
}
+ if (cifs_sb->ctx->rsize == 0)
+ cifs_sb->ctx->rsize =
+ server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
+ cifs_sb->ctx);
+
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&rsize, credits);
if (rc)
@@ -4269,8 +4274,6 @@ cifs_readv_complete(struct work_struct *work)
for (i = 0; i < rdata->nr_pages; i++) {
struct page *page = rdata->pages[i];
- lru_cache_add(page);
-
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes)) {
flush_dcache_page(page);
@@ -4278,12 +4281,12 @@ cifs_readv_complete(struct work_struct *work)
} else
SetPageError(page);
- unlock_page(page);
-
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
+ unlock_page(page);
+
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
put_page(page);
@@ -4340,7 +4343,6 @@ readpages_fill_pages(struct TCP_Server_Info *server,
* fill them until the writes are flushed.
*/
zero_user(page, 0, PAGE_SIZE);
- lru_cache_add(page);
flush_dcache_page(page);
SetPageUptodate(page);
unlock_page(page);
@@ -4350,7 +4352,6 @@ readpages_fill_pages(struct TCP_Server_Info *server,
continue;
} else {
/* no need to hold page hostage */
- lru_cache_add(page);
unlock_page(page);
put_page(page);
rdata->pages[i] = NULL;
@@ -4393,92 +4394,20 @@ cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
return readpages_fill_pages(server, rdata, iter, iter->count);
}
-static int
-readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
- unsigned int rsize, struct list_head *tmplist,
- unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
+static void cifs_readahead(struct readahead_control *ractl)
{
- struct page *page, *tpage;
- unsigned int expected_index;
int rc;
- gfp_t gfp = readahead_gfp_mask(mapping);
-
- INIT_LIST_HEAD(tmplist);
-
- page = lru_to_page(page_list);
-
- /*
- * Lock the page and put it in the cache. Since no one else
- * should have access to this page, we're safe to simply set
- * PG_locked without checking it first.
- */
- __SetPageLocked(page);
- rc = add_to_page_cache_locked(page, mapping,
- page->index, gfp);
-
- /* give up if we can't stick it in the cache */
- if (rc) {
- __ClearPageLocked(page);
- return rc;
- }
-
- /* move first page to the tmplist */
- *offset = (loff_t)page->index << PAGE_SHIFT;
- *bytes = PAGE_SIZE;
- *nr_pages = 1;
- list_move_tail(&page->lru, tmplist);
-
- /* now try and add more pages onto the request */
- expected_index = page->index + 1;
- list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
- /* discontinuity ? */
- if (page->index != expected_index)
- break;
-
- /* would this page push the read over the rsize? */
- if (*bytes + PAGE_SIZE > rsize)
- break;
-
- __SetPageLocked(page);
- rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
- if (rc) {
- __ClearPageLocked(page);
- break;
- }
- list_move_tail(&page->lru, tmplist);
- (*bytes) += PAGE_SIZE;
- expected_index++;
- (*nr_pages)++;
- }
- return rc;
-}
-
-static int cifs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *page_list, unsigned num_pages)
-{
- int rc;
- int err = 0;
- struct list_head tmplist;
- struct cifsFileInfo *open_file = file->private_data;
- struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+ struct cifsFileInfo *open_file = ractl->file->private_data;
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
struct TCP_Server_Info *server;
pid_t pid;
- unsigned int xid;
+ unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
+ pgoff_t next_cached = ULONG_MAX;
+ bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
+ cifs_inode_cookie(ractl->mapping->host)->cache_priv;
+ bool check_cache = caching;
xid = get_xid();
- /*
- * Reads as many pages as possible from fscache. Returns -ENOBUFS
- * immediately if the cookie is negative
- *
- * After this point, every page in the list might have PG_fscache set,
- * so we will need to clean that up off of every page we don't use.
- */
- rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
- &num_pages);
- if (rc == 0) {
- free_xid(xid);
- return rc;
- }
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
@@ -4489,39 +4418,78 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
- __func__, file, mapping, num_pages);
+ __func__, ractl->file, ractl->mapping, readahead_count(ractl));
/*
- * Start with the page at end of list and move it to private
- * list. Do the same with any following pages until we hit
- * the rsize limit, hit an index discontinuity, or run out of
- * pages. Issue the async read and then start the loop again
- * until the list is empty.
- *
- * Note that list order is important. The page_list is in
- * the order of declining indexes. When we put the pages in
- * the rdata->pages, then we want them in increasing order.
+ * Chop the readahead request up into rsize-sized read requests.
*/
- while (!list_empty(page_list) && !err) {
- unsigned int i, nr_pages, bytes, rsize;
- loff_t offset;
- struct page *page, *tpage;
+ while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
+ unsigned int i, got, rsize;
+ struct page *page;
struct cifs_readdata *rdata;
struct cifs_credits credits_on_stack;
struct cifs_credits *credits = &credits_on_stack;
+ pgoff_t index = readahead_index(ractl) + last_batch_size;
+
+ /*
+ * Find out if we have anything cached in the range of
+ * interest, and if so, where the next chunk of cached data is.
+ */
+ if (caching) {
+ if (check_cache) {
+ rc = cifs_fscache_query_occupancy(
+ ractl->mapping->host, index, nr_pages,
+ &next_cached, &cache_nr_pages);
+ if (rc < 0)
+ caching = false;
+ check_cache = false;
+ }
+
+ if (index == next_cached) {
+ /*
+ * TODO: Send a whole batch of pages to be read
+ * by the cache.
+ */
+ page = readahead_page(ractl);
+ last_batch_size = 1 << thp_order(page);
+ if (cifs_readpage_from_fscache(ractl->mapping->host,
+ page) < 0) {
+ /*
+ * TODO: Deal with cache read failure
+ * here, but for the moment, delegate
+ * that to readpage.
+ */
+ caching = false;
+ }
+ unlock_page(page);
+ next_cached++;
+ cache_nr_pages--;
+ if (cache_nr_pages == 0)
+ check_cache = true;
+ continue;
+ }
+ }
if (open_file->invalidHandle) {
rc = cifs_reopen_file(open_file, true);
- if (rc == -EAGAIN)
- continue;
- else if (rc)
+ if (rc) {
+ if (rc == -EAGAIN)
+ continue;
break;
+ }
}
+ if (cifs_sb->ctx->rsize == 0)
+ cifs_sb->ctx->rsize =
+ server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
+ cifs_sb->ctx);
+
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&rsize, credits);
if (rc)
break;
+ nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
+ nr_pages = min_t(size_t, nr_pages, next_cached - index);
/*
* Give up immediately if rsize is too small to read an entire
@@ -4529,16 +4497,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
* reach this point however since we set ra_pages to 0 when the
* rsize is smaller than a cache page.
*/
- if (unlikely(rsize < PAGE_SIZE)) {
- add_credits_and_wake_if(server, credits, 0);
- free_xid(xid);
- return 0;
- }
-
- nr_pages = 0;
- err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
- &nr_pages, &offset, &bytes);
- if (!nr_pages) {
+ if (unlikely(!nr_pages)) {
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -4546,36 +4505,31 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
if (!rdata) {
/* best to give up if we're out of mem */
- list_for_each_entry_safe(page, tpage, &tmplist, lru) {
- list_del(&page->lru);
- lru_cache_add(page);
- unlock_page(page);
- put_page(page);
- }
- rc = -ENOMEM;
add_credits_and_wake_if(server, credits, 0);
break;
}
- rdata->cfile = cifsFileInfo_get(open_file);
- rdata->server = server;
- rdata->mapping = mapping;
- rdata->offset = offset;
- rdata->bytes = bytes;
- rdata->pid = pid;
- rdata->pagesz = PAGE_SIZE;
- rdata->tailsz = PAGE_SIZE;
+ got = __readahead_batch(ractl, rdata->pages, nr_pages);
+ if (got != nr_pages) {
+ pr_warn("__readahead_batch() returned %u/%u\n",
+ got, nr_pages);
+ nr_pages = got;
+ }
+
+ rdata->nr_pages = nr_pages;
+ rdata->bytes = readahead_batch_length(ractl);
+ rdata->cfile = cifsFileInfo_get(open_file);
+ rdata->server = server;
+ rdata->mapping = ractl->mapping;
+ rdata->offset = readahead_pos(ractl);
+ rdata->pid = pid;
+ rdata->pagesz = PAGE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
rdata->copy_into_pages = cifs_readpages_copy_into_pages;
- rdata->credits = credits_on_stack;
-
- list_for_each_entry_safe(page, tpage, &tmplist, lru) {
- list_del(&page->lru);
- rdata->pages[rdata->nr_pages++] = page;
- }
+ rdata->credits = credits_on_stack;
rc = adjust_credits(server, &rdata->credits, rdata->bytes);
-
if (!rc) {
if (rdata->cfile->invalidHandle)
rc = -EAGAIN;
@@ -4587,7 +4541,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
add_credits_and_wake_if(server, &rdata->credits, 0);
for (i = 0; i < rdata->nr_pages; i++) {
page = rdata->pages[i];
- lru_cache_add(page);
unlock_page(page);
put_page(page);
}
@@ -4597,10 +4550,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
}
kref_put(&rdata->refcount, cifs_readdata_release);
+ last_batch_size = nr_pages;
}
free_xid(xid);
- return rc;
}
/*
@@ -4811,17 +4764,17 @@ static int cifs_release_page(struct page *page, gfp_t gfp)
return true;
}
-static void cifs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length)
+static void cifs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- wait_on_page_fscache(page);
+ folio_wait_fscache(folio);
}
-static int cifs_launder_page(struct page *page)
+static int cifs_launder_folio(struct folio *folio)
{
int rc = 0;
- loff_t range_start = page_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
+ loff_t range_start = folio_pos(folio);
+ loff_t range_end = range_start + folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
@@ -4829,12 +4782,12 @@ static int cifs_launder_page(struct page *page)
.range_end = range_end,
};
- cifs_dbg(FYI, "Launder page: %p\n", page);
+ cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
- if (clear_page_dirty_for_io(page))
- rc = cifs_writepage_locked(page, &wbc);
+ if (folio_clear_dirty_for_io(folio))
+ rc = cifs_writepage_locked(&folio->page, &wbc);
- wait_on_page_fscache(page);
+ folio_wait_fscache(folio);
return rc;
}
@@ -4924,7 +4877,7 @@ oplock_break_done:
* In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
* so this method should never be called.
*
- * Direct IO is not yet supported in the cached mode.
+ * Direct IO is not yet supported in the cached mode.
*/
static ssize_t
cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
@@ -4996,26 +4949,27 @@ static void cifs_swap_deactivate(struct file *file)
* need to pin the cache object to write back to.
*/
#ifdef CONFIG_CIFS_FSCACHE
-static int cifs_set_page_dirty(struct page *page)
+static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
+ return fscache_dirty_folio(mapping, folio,
+ cifs_inode_cookie(mapping->host));
}
#else
-#define cifs_set_page_dirty __set_page_dirty_nobuffers
+#define cifs_dirty_folio filemap_dirty_folio
#endif
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
- .readpages = cifs_readpages,
+ .readahead = cifs_readahead,
.writepage = cifs_writepage,
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = cifs_set_page_dirty,
+ .dirty_folio = cifs_dirty_folio,
.releasepage = cifs_release_page,
.direct_IO = cifs_direct_io,
- .invalidatepage = cifs_invalidate_page,
- .launder_page = cifs_launder_page,
+ .invalidate_folio = cifs_invalidate_folio,
+ .launder_folio = cifs_launder_folio,
/*
* TODO: investigate and if useful we could add an cifs_migratePage
* helper (under an CONFIG_MIGRATION) in the future, and also
@@ -5036,8 +4990,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = cifs_set_page_dirty,
+ .dirty_folio = cifs_dirty_folio,
.releasepage = cifs_release_page,
- .invalidatepage = cifs_invalidate_page,
- .launder_page = cifs_launder_page,
+ .invalidate_folio = cifs_invalidate_folio,
+ .launder_folio = cifs_launder_folio,
};
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 7ec35f3f0a5f..a92e9eec521f 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -149,7 +149,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("echo_interval", Opt_echo_interval),
fsparam_u32("max_credits", Opt_max_credits),
fsparam_u32("handletimeout", Opt_handletimeout),
- fsparam_u32("snapshot", Opt_snapshot),
+ fsparam_u64("snapshot", Opt_snapshot),
fsparam_u32("max_channels", Opt_max_channels),
/* Mount options which take string value */
@@ -1078,7 +1078,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->echo_interval = result.uint_32;
break;
case Opt_snapshot:
- ctx->snapshot_time = result.uint_32;
+ ctx->snapshot_time = result.uint_64;
break;
case Opt_max_credits:
if (result.uint_32 < 20 || result.uint_32 > 60000) {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index efaac4d5ff55..33af72e0ac0c 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -134,37 +134,127 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
}
}
+static inline void fscache_end_operation(struct netfs_cache_resources *cres)
+{
+ const struct netfs_cache_ops *ops = fscache_operation_valid(cres);
+
+ if (ops)
+ ops->end_operation(cres);
+}
+
/*
- * Retrieve a page from FS-Cache
+ * Fallback page reading interface.
*/
-int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+static int fscache_fallback_read_page(struct inode *inode, struct page *page)
{
- cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
- __func__, CIFS_I(inode)->fscache, page, inode);
- return -ENOBUFS; // Needs conversion to using netfslib
+ struct netfs_cache_resources cres;
+ struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+ struct iov_iter iter;
+ struct bio_vec bvec[1];
+ int ret;
+
+ memset(&cres, 0, sizeof(cres));
+ bvec[0].bv_page = page;
+ bvec[0].bv_offset = 0;
+ bvec[0].bv_len = PAGE_SIZE;
+ iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+
+ ret = fscache_begin_read_operation(&cres, cookie);
+ if (ret < 0)
+ return ret;
+
+ ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL,
+ NULL, NULL);
+ fscache_end_operation(&cres);
+ return ret;
}
/*
- * Retrieve a set of pages from FS-Cache
+ * Fallback page writing interface.
*/
-int __cifs_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+static int fscache_fallback_write_page(struct inode *inode, struct page *page,
+ bool no_space_allocated_yet)
{
- cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
- __func__, CIFS_I(inode)->fscache, *nr_pages, inode);
- return -ENOBUFS; // Needs conversion to using netfslib
+ struct netfs_cache_resources cres;
+ struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+ struct iov_iter iter;
+ struct bio_vec bvec[1];
+ loff_t start = page_offset(page);
+ size_t len = PAGE_SIZE;
+ int ret;
+
+ memset(&cres, 0, sizeof(cres));
+ bvec[0].bv_page = page;
+ bvec[0].bv_offset = 0;
+ bvec[0].bv_len = PAGE_SIZE;
+ iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+
+ ret = fscache_begin_write_operation(&cres, cookie);
+ if (ret < 0)
+ return ret;
+
+ ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
+ no_space_allocated_yet);
+ if (ret == 0)
+ ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL);
+ fscache_end_operation(&cres);
+ return ret;
}
-void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+/*
+ * Retrieve a page from FS-Cache
+ */
+int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ int ret;
- WARN_ON(!cifsi->fscache);
+ cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
+ __func__, cifs_inode_cookie(inode), page, inode);
+ ret = fscache_fallback_read_page(inode, page);
+ if (ret < 0)
+ return ret;
+
+ /* Read completed synchronously */
+ SetPageUptodate(page);
+ return 0;
+}
+
+void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
- __func__, cifsi->fscache, page, inode);
+ __func__, cifs_inode_cookie(inode), page, inode);
+
+ fscache_fallback_write_page(inode, page, true);
+}
+
+/*
+ * Query the cache occupancy.
+ */
+int __cifs_fscache_query_occupancy(struct inode *inode,
+ pgoff_t first, unsigned int nr_pages,
+ pgoff_t *_data_first,
+ unsigned int *_data_nr_pages)
+{
+ struct netfs_cache_resources cres;
+ struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+ loff_t start, data_start;
+ size_t len, data_len;
+ int ret;
- // Needs conversion to using netfslib
+ ret = fscache_begin_read_operation(&cres, cookie);
+ if (ret < 0)
+ return ret;
+
+ start = first * PAGE_SIZE;
+ len = nr_pages * PAGE_SIZE;
+ ret = cres.ops->query_occupancy(&cres, start, len, PAGE_SIZE,
+ &data_start, &data_len);
+ if (ret == 0) {
+ *_data_first = data_start / PAGE_SIZE;
+ *_data_nr_pages = len / PAGE_SIZE;
+ }
+
+ fscache_end_operation(&cres);
+ return ret;
}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index c6ca49ac33d4..55129908e2c1 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -9,6 +9,7 @@
#ifndef _CIFS_FSCACHE_H
#define _CIFS_FSCACHE_H
+#include <linux/swap.h>
#include <linux/fscache.h>
#include "cifsglob.h"
@@ -58,14 +59,6 @@ void cifs_fscache_fill_coherency(struct inode *inode,
}
-extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
-extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
-extern int __cifs_readpages_from_fscache(struct inode *,
- struct address_space *,
- struct list_head *,
- unsigned *);
-extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
return CIFS_I(inode)->fscache;
@@ -80,33 +73,52 @@ static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags
i_size_read(inode), flags);
}
-static inline int cifs_readpage_from_fscache(struct inode *inode,
- struct page *page)
-{
- if (CIFS_I(inode)->fscache)
- return __cifs_readpage_from_fscache(inode, page);
+extern int __cifs_fscache_query_occupancy(struct inode *inode,
+ pgoff_t first, unsigned int nr_pages,
+ pgoff_t *_data_first,
+ unsigned int *_data_nr_pages);
- return -ENOBUFS;
+static inline int cifs_fscache_query_occupancy(struct inode *inode,
+ pgoff_t first, unsigned int nr_pages,
+ pgoff_t *_data_first,
+ unsigned int *_data_nr_pages)
+{
+ if (!cifs_inode_cookie(inode))
+ return -ENOBUFS;
+ return __cifs_fscache_query_occupancy(inode, first, nr_pages,
+ _data_first, _data_nr_pages);
}
-static inline int cifs_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+extern int __cifs_readpage_from_fscache(struct inode *pinode, struct page *ppage);
+extern void __cifs_readpage_to_fscache(struct inode *pinode, struct page *ppage);
+
+
+static inline int cifs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
{
- if (CIFS_I(inode)->fscache)
- return __cifs_readpages_from_fscache(inode, mapping, pages,
- nr_pages);
+ if (cifs_inode_cookie(inode))
+ return __cifs_readpage_from_fscache(inode, page);
return -ENOBUFS;
}
static inline void cifs_readpage_to_fscache(struct inode *inode,
struct page *page)
{
- if (PageFsCache(page))
+ if (cifs_inode_cookie(inode))
__cifs_readpage_to_fscache(inode, page);
}
+static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ if (PageFsCache(page)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
+ fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
+ }
+ return true;
+}
+
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
@@ -123,22 +135,29 @@ static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool upd
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
-static inline int
-cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+static inline int cifs_fscache_query_occupancy(struct inode *inode,
+ pgoff_t first, unsigned int nr_pages,
+ pgoff_t *_data_first,
+ unsigned int *_data_nr_pages)
{
+ *_data_first = ULONG_MAX;
+ *_data_nr_pages = 0;
return -ENOBUFS;
}
-static inline int cifs_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+static inline int
+cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
return -ENOBUFS;
}
-static inline void cifs_readpage_to_fscache(struct inode *inode,
- struct page *page) {}
+static inline
+void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {}
+
+static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ return true; /* May release page */
+}
#endif /* CONFIG_CIFS_FSCACHE */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7d8b3ceb2af3..60d853c92f6a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -83,6 +83,7 @@ static void cifs_set_ops(struct inode *inode)
static void
cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
{
+ struct cifs_fscache_inode_coherency_data cd;
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
cifs_dbg(FYI, "%s: revalidating inode %llu\n",
@@ -113,6 +114,9 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n",
__func__, cifs_i->uniqueid);
set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags);
+ /* Invalidate fscache cookie */
+ cifs_fscache_fill_coherency(&cifs_i->vfs_inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
}
/*
@@ -2261,8 +2265,6 @@ cifs_dentry_needs_reval(struct dentry *dentry)
int
cifs_invalidate_mapping(struct inode *inode)
{
- struct cifs_fscache_inode_coherency_data cd;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
int rc = 0;
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
@@ -2272,8 +2274,6 @@ cifs_invalidate_mapping(struct inode *inode)
__func__, inode);
}
- cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
- fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index dc3b16d1be09..32f478c7a66d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -127,11 +127,6 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
struct cifs_server_iface *ifaces = NULL;
size_t iface_count;
- if (ses->server->dialect < SMB30_PROT_ID) {
- cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
- return 0;
- }
-
spin_lock(&ses->chan_lock);
new_chan_count = old_chan_count = ses->chan_count;
@@ -145,6 +140,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
return 0;
}
+ if (ses->server->dialect < SMB30_PROT_ID) {
+ spin_unlock(&ses->chan_lock);
+ cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
+ return 0;
+ }
+
if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
ses->chan_max = 1;
spin_unlock(&ses->chan_lock);
@@ -713,7 +714,11 @@ static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size)
else
sz += sizeof(__le16);
- sz += sizeof(__le16) * strnlen(ses->workstation_name, CIFS_MAX_WORKSTATION_LEN);
+ if (ses->workstation_name)
+ sz += sizeof(__le16) * strnlen(ses->workstation_name,
+ CIFS_MAX_WORKSTATION_LEN);
+ else
+ sz += sizeof(__le16);
return sz;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 8272c91e15ef..c71c9a44bef4 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -228,9 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
spin_unlock(&GlobalMid_Lock);
if (reconnect) {
- spin_lock(&cifs_tcp_ses_lock);
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&cifs_tcp_ses_lock);
+ cifs_signal_cifsd_for_reconnect(server, false);
}
return mid;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index af5d0830bc8a..891b11576e55 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -25,6 +25,7 @@
#include "smb2glob.h"
#include "cifs_ioctl.h"
#include "smbdirect.h"
+#include "fscache.h"
#include "fs_context.h"
/* Change credits for different ops and return the total number of credits */
@@ -3887,29 +3888,38 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
{
int rc;
unsigned int xid;
+ struct inode *inode;
struct cifsFileInfo *cfile = file->private_data;
+ struct cifsInodeInfo *cifsi;
__le64 eof;
xid = get_xid();
- if (off >= i_size_read(file->f_inode) ||
- off + len >= i_size_read(file->f_inode)) {
+ inode = d_inode(cfile->dentry);
+ cifsi = CIFS_I(inode);
+
+ if (off >= i_size_read(inode) ||
+ off + len >= i_size_read(inode)) {
rc = -EINVAL;
goto out;
}
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
- i_size_read(file->f_inode) - off - len, off);
+ i_size_read(inode) - off - len, off);
if (rc < 0)
goto out;
- eof = cpu_to_le64(i_size_read(file->f_inode) - len);
+ eof = cpu_to_le64(i_size_read(inode) - len);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
if (rc < 0)
goto out;
rc = 0;
+
+ cifsi->server_eof = i_size_read(inode) - len;
+ truncate_setsize(inode, cifsi->server_eof);
+ fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
out:
free_xid(xid);
return rc;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 8540f7c13eae..eeb1a699bd6f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -430,10 +430,7 @@ unmask:
* be taken as the remainder of this one. We need to kill the
* socket so the server throws away the partial SMB
*/
- spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus != CifsExiting)
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&cifs_tcp_ses_lock);
+ cifs_signal_cifsd_for_reconnect(server, false);
trace_smb3_partial_send_reconnect(server->CurrentMid,
server->conn_id, server->hostname);
}
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 7d8b72d67c80..9d486fbbfbbd 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -175,11 +175,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
switch (handler->flags) {
case XATTR_CIFS_NTSD_FULL:
aclflags = (CIFS_ACL_OWNER |
+ CIFS_ACL_GROUP |
CIFS_ACL_DACL |
CIFS_ACL_SACL);
break;
case XATTR_CIFS_NTSD:
aclflags = (CIFS_ACL_OWNER |
+ CIFS_ACL_GROUP |
CIFS_ACL_DACL);
break;
case XATTR_CIFS_ACL:
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..3f3c81e6b1ab 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -14,6 +14,7 @@
#include <linux/time.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/pagemap.h>
#include <linux/stat.h>
#include <linux/cred.h>
#include <linux/errno.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d9f1bd7153df..2185328b65c7 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -43,7 +43,7 @@ static struct kmem_cache * coda_inode_cachep;
static struct inode *coda_alloc_inode(struct super_block *sb)
{
struct coda_inode_info *ei;
- ei = kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, coda_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
memset(&ei->c_fid, 0, sizeof(struct CodaFid));
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 95e72d271b95..8f0af4f62631 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -135,6 +135,8 @@
#define elf_format compat_elf_format
#define init_elf_binfmt init_compat_elf_binfmt
#define exit_elf_binfmt exit_compat_elf_binfmt
+#define binfmt_elf_test_cases compat_binfmt_elf_test_cases
+#define binfmt_elf_test_suite compat_binfmt_elf_test_suite
/*
* We share all the actual code with the native (64-bit) version.
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1466b5d01cbb..d1f9d2632202 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -34,6 +34,14 @@
*/
DEFINE_SPINLOCK(configfs_dirent_lock);
+/*
+ * All of link_obj/unlink_obj/link_group/unlink_group require that
+ * subsys->su_mutex is held.
+ * But parent configfs_subsystem is NULL when config_item is root.
+ * Use this mutex when config_item is root.
+ */
+static DEFINE_MUTEX(configfs_subsystem_mutex);
+
static void configfs_d_iput(struct dentry * dentry,
struct inode * inode)
{
@@ -1780,8 +1788,8 @@ void configfs_unregister_group(struct config_group *group)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ d_drop(dentry);
fsnotify_rmdir(d_inode(parent), dentry);
- d_delete(dentry);
inode_unlock(d_inode(parent));
dput(dentry);
@@ -1859,7 +1867,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
group->cg_item.ci_name = group->cg_item.ci_namebuf;
sd = root->d_fsdata;
+ mutex_lock(&configfs_subsystem_mutex);
link_group(to_config_group(sd->s_element), group);
+ mutex_unlock(&configfs_subsystem_mutex);
inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
@@ -1884,7 +1894,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
inode_unlock(d_inode(root));
if (err) {
+ mutex_lock(&configfs_subsystem_mutex);
unlink_group(group);
+ mutex_unlock(&configfs_subsystem_mutex);
configfs_release_fs();
}
put_fragment(frag);
@@ -1922,16 +1934,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(dentry));
- d_delete(dentry);
+ d_drop(dentry);
+ fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(root));
dput(dentry);
+ mutex_lock(&configfs_subsystem_mutex);
unlink_group(group);
+ mutex_unlock(&configfs_subsystem_mutex);
configfs_release_fs();
}
diff --git a/fs/coredump.c b/fs/coredump.c
index 1c060c0a2d72..7ed7d601e5e0 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/path.h>
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
+#include <linux/elf.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -53,6 +54,9 @@
#include <trace/events/sched.h>
+static bool dump_vma_snapshot(struct coredump_params *cprm);
+static void free_vma_snapshot(struct coredump_params *cprm);
+
static int core_uses_pid;
static unsigned int core_pipe_limit;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
@@ -531,6 +535,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* by any locks.
*/
.mm_flags = mm->flags,
+ .vma_meta = NULL,
};
audit_core_dumps(siginfo->si_signo);
@@ -745,6 +750,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
pr_info("Core dump to |%s disabled\n", cn.corename);
goto close_fail;
}
+ if (!dump_vma_snapshot(&cprm))
+ goto close_fail;
+
file_start_write(cprm.file);
core_dumped = binfmt->core_dump(&cprm);
/*
@@ -758,6 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
dump_emit(&cprm, "", 1);
}
file_end_write(cprm.file);
+ free_vma_snapshot(&cprm);
}
if (ispipe && core_pipe_limit)
wait_for_dump_helpers(cprm.file);
@@ -980,6 +989,8 @@ static bool always_dump_vma(struct vm_area_struct *vma)
return false;
}
+#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
+
/*
* Decide how much of @vma's contents should be included in a core dump.
*/
@@ -1039,9 +1050,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
* dump the first page to aid in determining what was mapped here.
*/
if (FILTER(ELF_HEADERS) &&
- vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
- (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
- return PAGE_SIZE;
+ vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
+ if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
+ return PAGE_SIZE;
+
+ /*
+ * ELF libraries aren't always executable.
+ * We'll want to check whether the mapping starts with the ELF
+ * magic, but not now - we're holding the mmap lock,
+ * so copy_from_user() doesn't work here.
+ * Use a placeholder instead, and fix it up later in
+ * dump_vma_snapshot().
+ */
+ return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
+ }
#undef FILTER
@@ -1078,18 +1100,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
return gate_vma;
}
+static void free_vma_snapshot(struct coredump_params *cprm)
+{
+ if (cprm->vma_meta) {
+ int i;
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct file *file = cprm->vma_meta[i].file;
+ if (file)
+ fput(file);
+ }
+ kvfree(cprm->vma_meta);
+ cprm->vma_meta = NULL;
+ }
+}
+
/*
* Under the mmap_lock, take a snapshot of relevant information about the task's
* VMAs.
*/
-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
- struct core_vma_metadata **vma_meta,
- size_t *vma_data_size_ptr)
+static bool dump_vma_snapshot(struct coredump_params *cprm)
{
struct vm_area_struct *vma, *gate_vma;
struct mm_struct *mm = current->mm;
int i;
- size_t vma_data_size = 0;
/*
* Once the stack expansion code is fixed to not change VMA bounds
@@ -1097,36 +1130,51 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
* mmap_lock in read mode.
*/
if (mmap_write_lock_killable(mm))
- return -EINTR;
+ return false;
+ cprm->vma_data_size = 0;
gate_vma = get_gate_vma(mm);
- *vma_count = mm->map_count + (gate_vma ? 1 : 0);
+ cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
- *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
- if (!*vma_meta) {
+ cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
+ if (!cprm->vma_meta) {
mmap_write_unlock(mm);
- return -ENOMEM;
+ return false;
}
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma), i++) {
- struct core_vma_metadata *m = (*vma_meta) + i;
+ struct core_vma_metadata *m = cprm->vma_meta + i;
m->start = vma->vm_start;
m->end = vma->vm_end;
m->flags = vma->vm_flags;
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
+ m->pgoff = vma->vm_pgoff;
- vma_data_size += m->dump_size;
+ m->file = vma->vm_file;
+ if (m->file)
+ get_file(m->file);
}
mmap_write_unlock(mm);
- if (WARN_ON(i != *vma_count)) {
- kvfree(*vma_meta);
- return -EFAULT;
+ for (i = 0; i < cprm->vma_count; i++) {
+ struct core_vma_metadata *m = cprm->vma_meta + i;
+
+ if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
+ char elfmag[SELFMAG];
+
+ if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
+ memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
+ m->dump_size = 0;
+ } else {
+ m->dump_size = PAGE_SIZE;
+ }
+ }
+
+ cprm->vma_data_size += m->dump_size;
}
- *vma_data_size_ptr = vma_data_size;
- return 0;
+ return true;
}
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index bfc2a5b74ed3..2217fe5ece6f 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -54,7 +54,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
int num_pages = 0;
/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
- bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
+ bio = bio_alloc(inode->i_sb->s_bdev, BIO_MAX_VECS, REQ_OP_WRITE,
+ GFP_NOFS);
while (len) {
unsigned int blocks_this_page = min(len, blocks_per_page);
@@ -62,10 +63,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
if (num_pages == 0) {
fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS);
- bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector =
pblk << (blockbits - SECTOR_SHIFT);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
}
ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0);
if (WARN_ON(ret != bytes_this_page)) {
@@ -81,7 +80,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
err = submit_bio_wait(bio);
if (err)
goto out;
- bio_reset(bio);
+ bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
num_pages = 0;
}
}
@@ -150,12 +149,10 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
return -EINVAL;
/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
- bio = bio_alloc(GFP_NOFS, nr_pages);
+ bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS);
do {
- bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector = pblk << (blockbits - 9);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
i = 0;
offset = 0;
@@ -182,7 +179,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
err = submit_bio_wait(bio);
if (err)
goto out;
- bio_reset(bio);
+ bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
} while (len != 0);
err = 0;
out:
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 4ef3f714046a..4fcca79f39ae 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -69,6 +69,14 @@ void fscrypt_free_bounce_page(struct page *bounce_page)
}
EXPORT_SYMBOL(fscrypt_free_bounce_page);
+/*
+ * Generate the IV for the given logical block number within the given file.
+ * For filenames encryption, lblk_num == 0.
+ *
+ * Keep this in sync with fscrypt_limit_io_blocks(). fscrypt_limit_io_blocks()
+ * needs to know about any IV generation methods where the low bits of IV don't
+ * simply contain the lblk_num (e.g., IV_INO_LBLK_32).
+ */
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci)
{
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index c57bebfa48fe..93c2ca858092 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -17,6 +17,7 @@
#include <linux/buffer_head.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
+#include <linux/uio.h>
#include "fscrypt_private.h"
@@ -315,6 +316,10 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh);
*
* fscrypt_set_bio_crypt_ctx() must have already been called on the bio.
*
+ * This function isn't required in cases where crypto-mergeability is ensured in
+ * another way, such as I/O targeting only a single file (and thus a single key)
+ * combined with fscrypt_limit_io_blocks() to ensure DUN contiguity.
+ *
* Return: true iff the I/O is mergeable
*/
bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
@@ -363,3 +368,91 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio,
return fscrypt_mergeable_bio(bio, inode, next_lblk);
}
EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
+
+/**
+ * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is
+ * supported as far as encryption is concerned
+ * @iocb: the file and position the I/O is targeting
+ * @iter: the I/O data segment(s)
+ *
+ * Return: %true if there are no encryption constraints that prevent DIO from
+ * being supported; %false if DIO is unsupported. (Note that in the
+ * %true case, the filesystem might have other, non-encryption-related
+ * constraints that prevent DIO from actually being supported.)
+ */
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+{
+ const struct inode *inode = file_inode(iocb->ki_filp);
+ const unsigned int blocksize = i_blocksize(inode);
+
+ /* If the file is unencrypted, no veto from us. */
+ if (!fscrypt_needs_contents_encryption(inode))
+ return true;
+
+ /* We only support DIO with inline crypto, not fs-layer crypto. */
+ if (!fscrypt_inode_uses_inline_crypto(inode))
+ return false;
+
+ /*
+ * Since the granularity of encryption is filesystem blocks, the file
+ * position and total I/O length must be aligned to the filesystem block
+ * size -- not just to the block device's logical block size as is
+ * traditionally the case for DIO on many filesystems.
+ *
+ * We require that the user-provided memory buffers be filesystem block
+ * aligned too. It is simpler to have a single alignment value required
+ * for all properties of the I/O, as is normally the case for DIO.
+ * Also, allowing less aligned buffers would imply that data units could
+ * cross bvecs, which would greatly complicate the I/O stack, which
+ * assumes that bios can be split at any bvec boundary.
+ */
+ if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(fscrypt_dio_supported);
+
+/**
+ * fscrypt_limit_io_blocks() - limit I/O blocks to avoid discontiguous DUNs
+ * @inode: the file on which I/O is being done
+ * @lblk: the block at which the I/O is being started from
+ * @nr_blocks: the number of blocks we want to submit starting at @lblk
+ *
+ * Determine the limit to the number of blocks that can be submitted in a bio
+ * targeting @lblk without causing a data unit number (DUN) discontiguity.
+ *
+ * This is normally just @nr_blocks, as normally the DUNs just increment along
+ * with the logical blocks. (Or the file is not encrypted.)
+ *
+ * In rare cases, fscrypt can be using an IV generation method that allows the
+ * DUN to wrap around within logically contiguous blocks, and that wraparound
+ * will occur. If this happens, a value less than @nr_blocks will be returned
+ * so that the wraparound doesn't occur in the middle of a bio, which would
+ * cause encryption/decryption to produce wrong results.
+ *
+ * Return: the actual number of blocks that can be submitted
+ */
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks)
+{
+ const struct fscrypt_info *ci;
+ u32 dun;
+
+ if (!fscrypt_inode_uses_inline_crypto(inode))
+ return nr_blocks;
+
+ if (nr_blocks <= 1)
+ return nr_blocks;
+
+ ci = inode->i_crypt_info;
+ if (!(fscrypt_policy_flags(&ci->ci_policy) &
+ FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))
+ return nr_blocks;
+
+ /* With IV_INO_LBLK_32, the DUN can wrap around from U32_MAX to 0. */
+
+ dun = ci->ci_hashed_ino + lblk;
+
+ return min_t(u64, nr_blocks, (u64)U32_MAX + 1 - dun);
+}
+EXPORT_SYMBOL_GPL(fscrypt_limit_io_blocks);
diff --git a/fs/dax.c b/fs/dax.c
index cd03485867a7..ab0978739eaa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -11,7 +11,6 @@
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/fs.h>
-#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/memcontrol.h>
#include <linux/mm.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index c84269c6e8bf..93f4f5ee07bf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1766,7 +1766,8 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
char *dname;
int err;
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru,
+ GFP_KERNEL);
if (!dentry)
return NULL;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 42e5a766d33c..4f25015aa534 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry)
dentry->d_fsdata = NULL;
drop_nlink(dentry->d_inode);
- fsnotify_unlink(d_inode(dentry->d_parent), dentry);
d_drop(dentry);
+ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 654443558047..38bca4980a1c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -396,11 +396,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
* bio_alloc() is guaranteed to return a bio when allowed to sleep and
* we request a valid number of vectors.
*/
- bio = bio_alloc(GFP_KERNEL, nr_vecs);
-
- bio_set_dev(bio, bdev);
+ bio = bio_alloc(bdev, nr_vecs, dio->op | dio->op_flags, GFP_KERNEL);
bio->bi_iter.bi_sector = first_sector;
- bio_set_op_attrs(bio, dio->op, dio->op_flags);
if (dio->is_async)
bio->bi_end_io = dio_bio_end_aio;
else
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 7d85e64ea62f..9ad61b582f07 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -540,12 +540,13 @@ const struct address_space_operations ecryptfs_aops = {
* XXX: This is pretty broken for multiple reasons: ecryptfs does not
* actually use buffer_heads, and ecryptfs will crash without
* CONFIG_BLOCK. But it matches the behavior before the default for
- * address_space_operations without the ->set_page_dirty method was
+ * address_space_operations without the ->dirty_folio method was
* cleaned up, so this is the best we can do without maintainer
* feedback.
*/
#ifdef CONFIG_BLOCK
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
#endif
.writepage = ecryptfs_writepage,
.readpage = ecryptfs_readpage,
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 39116af0390f..0b1c878317ab 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -38,7 +38,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
struct ecryptfs_inode_info *inode_info;
struct inode *inode = NULL;
- inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL);
+ inode_info = alloc_inode_sb(sb, ecryptfs_inode_info_cache, GFP_KERNEL);
if (unlikely(!inode_info))
goto out;
if (ecryptfs_init_crypt_stat(&inode_info->crypt_stat)) {
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 62b155b9366b..b287f47c165b 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -69,7 +69,7 @@ static struct kmem_cache * efs_inode_cachep;
static struct inode *efs_alloc_inode(struct super_block *sb)
{
struct efs_inode_info *ei;
- ei = kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, efs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fa7ddb7ad980..780db1e5f4b7 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -28,10 +28,10 @@ void erofs_put_metabuf(struct erofs_buf *buf)
buf->page = NULL;
}
-void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
- erofs_blk_t blkaddr, enum erofs_kmap_type type)
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type)
{
- struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
+ struct address_space *const mapping = inode->i_mapping;
erofs_off_t offset = blknr_to_addr(blkaddr);
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
@@ -60,6 +60,12 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
return buf->base + (offset & ~PAGE_MASK);
}
+void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type)
+{
+ return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
+}
+
static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
@@ -252,12 +258,10 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret;
iomap->offset = map.m_la;
- if (flags & IOMAP_DAX) {
+ if (flags & IOMAP_DAX)
iomap->dax_dev = mdev.m_daxdev;
- iomap->offset += mdev.m_dax_part_off;
- } else {
+ else
iomap->bdev = mdev.m_bdev;
- }
iomap->length = map.m_llen;
iomap->flags = 0;
iomap->private = NULL;
@@ -284,6 +288,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa;
+ if (flags & IOMAP_DAX)
+ iomap->addr += mdev.m_dax_part_off;
}
return 0;
}
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index eee9b0b31b63..18e59821c597 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
*/
#include "internal.h"
@@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
static int erofs_readdir(struct file *f, struct dir_context *ctx)
{
struct inode *dir = file_inode(f);
- struct address_space *mapping = dir->i_mapping;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
const size_t dirsize = i_size_read(dir);
unsigned int i = ctx->pos / EROFS_BLKSIZ;
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
@@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
bool initial = true;
while (ctx->pos < dirsize) {
- struct page *dentry_page;
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
- dentry_page = read_mapping_page(mapping, i, NULL);
- if (dentry_page == ERR_PTR(-ENOMEM)) {
- err = -ENOMEM;
- break;
- } else if (IS_ERR(dentry_page)) {
+ de = erofs_bread(&buf, dir, i, EROFS_KMAP);
+ if (IS_ERR(de)) {
erofs_err(dir->i_sb,
"fail to readdir of logical block %u of nid %llu",
i, EROFS_I(dir)->nid);
- err = -EFSCORRUPTED;
+ err = PTR_ERR(de);
break;
}
- de = (struct erofs_dirent *)kmap(dentry_page);
-
nameoff = le16_to_cpu(de->nameoff);
-
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
erofs_err(dir->i_sb,
@@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
err = erofs_fill_dentries(dir, ctx, de, &ofs,
nameoff, maxsize);
skip_this:
- kunmap(dentry_page);
-
- put_page(dentry_page);
-
ctx->pos = blknr_to_addr(i) + ofs;
if (err)
@@ -130,6 +120,7 @@ skip_this:
++i;
ofs = 0;
}
+ erofs_put_metabuf(&buf);
return err < 0 ? err : 0;
}
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 3ea62c6fb00a..1238ca104f09 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -12,6 +12,7 @@
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
+#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -186,8 +187,8 @@ struct erofs_inode_extended {
__le32 i_uid;
__le32 i_gid;
- __le64 i_ctime;
- __le32 i_ctime_nsec;
+ __le64 i_mtime;
+ __le32 i_mtime_nsec;
__le32 i_nlink;
__u8 i_reserved2[16];
};
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index ff62f84f47d3..e8b37ba5e9ad 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -113,8 +113,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
set_nlink(inode, le32_to_cpu(die->i_nlink));
/* extended inode has its own timestamp */
- inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
- inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
+ inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
+ inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
inode->i_size = le64_to_cpu(die->i_size);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b8272fb95fd6..5298c4ee277d 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -325,7 +325,7 @@ struct erofs_inode {
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
unsigned long z_tailextent_headlcn;
- unsigned int z_idataoff;
+ erofs_off_t z_idataoff;
unsigned short z_idata_size;
};
#endif /* CONFIG_EROFS_FS_ZIP */
@@ -479,6 +479,8 @@ struct erofs_map_dev {
extern const struct file_operations erofs_file_fops;
void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 8629e616028c..554efa363317 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
*/
#include "xattr.h"
@@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
return ERR_PTR(-ENOENT);
}
-static struct page *find_target_block_classic(struct inode *dir,
- struct erofs_qstr *name,
- int *_ndirents)
+static void *find_target_block_classic(struct erofs_buf *target,
+ struct inode *dir,
+ struct erofs_qstr *name,
+ int *_ndirents)
{
unsigned int startprfx, endprfx;
int head, back;
- struct address_space *const mapping = dir->i_mapping;
- struct page *candidate = ERR_PTR(-ENOENT);
+ void *candidate = ERR_PTR(-ENOENT);
startprfx = endprfx = 0;
head = 0;
@@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir,
while (head <= back) {
const int mid = head + (back - head) / 2;
- struct page *page = read_mapping_page(mapping, mid, NULL);
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ struct erofs_dirent *de;
- if (!IS_ERR(page)) {
- struct erofs_dirent *de = kmap_atomic(page);
+ de = erofs_bread(&buf, dir, mid, EROFS_KMAP);
+ if (!IS_ERR(de)) {
const int nameoff = nameoff_from_disk(de->nameoff,
EROFS_BLKSIZ);
const int ndirents = nameoff / sizeof(*de);
@@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir,
struct erofs_qstr dname;
if (!ndirents) {
- kunmap_atomic(de);
- put_page(page);
+ erofs_put_metabuf(&buf);
erofs_err(dir->i_sb,
"corrupted dir block %d @ nid %llu",
mid, EROFS_I(dir)->nid);
DBG_BUGON(1);
- page = ERR_PTR(-EFSCORRUPTED);
+ de = ERR_PTR(-EFSCORRUPTED);
goto out;
}
@@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir,
/* string comparison without already matched prefix */
diff = erofs_dirnamecmp(name, &dname, &matched);
- kunmap_atomic(de);
if (!diff) {
*_ndirents = 0;
@@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir,
startprfx = matched;
if (!IS_ERR(candidate))
- put_page(candidate);
- candidate = page;
+ erofs_put_metabuf(target);
+ *target = buf;
+ candidate = de;
*_ndirents = ndirents;
} else {
- put_page(page);
+ erofs_put_metabuf(&buf);
back = mid - 1;
endprfx = matched;
@@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir,
}
out: /* free if the candidate is valid */
if (!IS_ERR(candidate))
- put_page(candidate);
- return page;
+ erofs_put_metabuf(target);
+ return de;
}
return candidate;
}
@@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir,
erofs_nid_t *nid, unsigned int *d_type)
{
int ndirents;
- struct page *page;
- void *data;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
struct erofs_qstr qn;
@@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir,
qn.end = name->name + name->len;
ndirents = 0;
- page = find_target_block_classic(dir, &qn, &ndirents);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ de = find_target_block_classic(&buf, dir, &qn, &ndirents);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
- data = kmap_atomic(page);
/* the target page has been mapped */
if (ndirents)
- de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
- else
- de = (struct erofs_dirent *)data;
+ de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
-
- kunmap_atomic(data);
- put_page(page);
-
+ erofs_put_metabuf(&buf);
return PTR_ERR_OR_ZERO(de);
}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 915eefe0d7e2..0c4b41130c2f 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -84,7 +84,7 @@ static void erofs_inode_init_once(void *ptr)
static struct inode *erofs_alloc_inode(struct super_block *sb)
{
struct erofs_inode *vi =
- kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+ alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
if (!vi)
return NULL;
@@ -281,21 +281,19 @@ static int erofs_init_devices(struct super_block *sb,
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
- struct page *page;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_super_block *dsb;
unsigned int blkszbits;
void *data;
int ret;
- page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
- if (IS_ERR(page)) {
+ data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
+ if (IS_ERR(data)) {
erofs_err(sb, "cannot read erofs superblock");
- return PTR_ERR(page);
+ return PTR_ERR(data);
}
sbi = EROFS_SB(sb);
-
- data = kmap(page);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
ret = -EINVAL;
@@ -365,8 +363,7 @@ static int erofs_read_superblock(struct super_block *sb)
if (erofs_sb_has_ztailpacking(sbi))
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
out:
- kunmap(page);
- put_page(page);
+ erofs_put_metabuf(&buf);
return ret;
}
@@ -535,25 +532,29 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
return ret;
}
-static void erofs_managed_cache_invalidatepage(struct page *page,
- unsigned int offset,
- unsigned int length)
+/*
+ * It will be called only on inode eviction. In case that there are still some
+ * decompression requests in progress, wait with rescheduling for a bit here.
+ * We could introduce an extra locking instead but it seems unnecessary.
+ */
+static void erofs_managed_cache_invalidate_folio(struct folio *folio,
+ size_t offset, size_t length)
{
- const unsigned int stop = length + offset;
+ const size_t stop = length + offset;
- DBG_BUGON(!PageLocked(page));
+ DBG_BUGON(!folio_test_locked(folio));
/* Check for potential overflow in debug mode */
- DBG_BUGON(stop > PAGE_SIZE || stop < length);
+ DBG_BUGON(stop > folio_size(folio) || stop < length);
- if (offset == 0 && stop == PAGE_SIZE)
- while (!erofs_managed_cache_releasepage(page, GFP_NOFS))
+ if (offset == 0 && stop == folio_size(folio))
+ while (!erofs_managed_cache_releasepage(&folio->page, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.releasepage = erofs_managed_cache_releasepage,
- .invalidatepage = erofs_managed_cache_invalidatepage,
+ .invalidate_folio = erofs_managed_cache_invalidate_folio,
};
static int erofs_init_managed_cache(struct super_block *sb)
@@ -568,8 +569,7 @@ static int erofs_init_managed_cache(struct super_block *sb)
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
- mapping_set_gfp_mask(inode->i_mapping,
- GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index dac252bc9228..f3babf1e6608 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- kobject_del(&sbi->s_kobj);
- kobject_put(&sbi->s_kobj);
- wait_for_completion(&sbi->s_kobj_unregister);
+ if (sbi->s_kobj.state_in_sysfs) {
+ kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+ }
}
int __init erofs_init_sysfs(void)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 498b7666efe8..0ed880f42525 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -192,7 +192,10 @@ enum z_erofs_collectmode {
COLLECT_PRIMARY_FOLLOWED,
};
-struct z_erofs_collector {
+struct z_erofs_decompress_frontend {
+ struct inode *const inode;
+ struct erofs_map_blocks map;
+
struct z_erofs_pagevec_ctor vector;
struct z_erofs_pcluster *pcl, *tailpcl;
@@ -202,13 +205,6 @@ struct z_erofs_collector {
z_erofs_next_pcluster_t owned_head;
enum z_erofs_collectmode mode;
-};
-
-struct z_erofs_decompress_frontend {
- struct inode *const inode;
-
- struct z_erofs_collector clt;
- struct erofs_map_blocks map;
bool readahead;
/* used for applying cache strategy on the fly */
@@ -216,30 +212,30 @@ struct z_erofs_decompress_frontend {
erofs_off_t headoffset;
};
-#define COLLECTOR_INIT() { \
- .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = COLLECT_PRIMARY_FOLLOWED }
-
#define DECOMPRESS_FRONTEND_INIT(__i) { \
- .inode = __i, .clt = COLLECTOR_INIT(), \
- .backmost = true, }
+ .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
+ .mode = COLLECT_PRIMARY_FOLLOWED }
static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
static DEFINE_MUTEX(z_pagemap_global_lock);
-static void preload_compressed_pages(struct z_erofs_collector *clt,
- struct address_space *mc,
- enum z_erofs_cache_alloctype type,
- struct page **pagepool)
+static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
+ enum z_erofs_cache_alloctype type,
+ struct page **pagepool)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
+ struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
+ struct z_erofs_pcluster *pcl = fe->pcl;
bool standalone = true;
+ /*
+ * optimistic allocation without direct reclaim since inplace I/O
+ * can be used if low memory otherwise.
+ */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct page **pages;
pgoff_t index;
- if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
+ if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
return;
pages = pcl->compressed_pages;
@@ -288,7 +284,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
* managed cache since it can be moved to the bypass queue instead.
*/
if (standalone)
- clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
@@ -350,47 +346,47 @@ int erofs_try_to_free_cached_page(struct page *page)
}
/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
-static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
+static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
struct page *page)
{
- struct z_erofs_pcluster *const pcl = clt->pcl;
+ struct z_erofs_pcluster *const pcl = fe->pcl;
- while (clt->icpage_ptr > pcl->compressed_pages)
- if (!cmpxchg(--clt->icpage_ptr, NULL, page))
+ while (fe->icpage_ptr > pcl->compressed_pages)
+ if (!cmpxchg(--fe->icpage_ptr, NULL, page))
return true;
return false;
}
/* callers must be with collection lock held */
-static int z_erofs_attach_page(struct z_erofs_collector *clt,
+static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct page *page, enum z_erofs_page_type type,
bool pvec_safereuse)
{
int ret;
/* give priority for inplaceio */
- if (clt->mode >= COLLECT_PRIMARY &&
+ if (fe->mode >= COLLECT_PRIMARY &&
type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- z_erofs_try_inplace_io(clt, page))
+ z_erofs_try_inplace_io(fe, page))
return 0;
- ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
+ ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
pvec_safereuse);
- clt->cl->vcnt += (unsigned int)ret;
+ fe->cl->vcnt += (unsigned int)ret;
return ret ? 0 : -EAGAIN;
}
-static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
+static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
- z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
+ struct z_erofs_pcluster *pcl = f->pcl;
+ z_erofs_next_pcluster_t *owned_head = &f->owned_head;
/* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* so we can attach this pcluster to our submission chain. */
- clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ f->mode = COLLECT_PRIMARY_FOLLOWED;
return;
}
@@ -401,24 +397,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- clt->mode = COLLECT_PRIMARY_HOOKED;
- clt->tailpcl = NULL;
+ f->mode = COLLECT_PRIMARY_HOOKED;
+ f->tailpcl = NULL;
return;
}
/* type 3, it belongs to a chain, but it isn't the end of the chain */
- clt->mode = COLLECT_PRIMARY;
+ f->mode = COLLECT_PRIMARY;
}
-static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
+static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
+ struct z_erofs_pcluster *pcl = fe->pcl;
struct z_erofs_collection *cl;
unsigned int length;
/* to avoid unexpected loop formed by corrupted images */
- if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
+ if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -449,15 +445,15 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
}
mutex_lock(&cl->lock);
/* used to check tail merging loop due to corrupted images */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = pcl;
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = pcl;
- z_erofs_try_to_claim_pcluster(clt);
- clt->cl = cl;
+ z_erofs_try_to_claim_pcluster(fe);
+ fe->cl = cl;
return 0;
}
-static int z_erofs_register_collection(struct z_erofs_collector *clt,
+static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
@@ -485,8 +481,8 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
/* new pclusters should be claimed as type 1, primary and followed */
- pcl->next = clt->owned_head;
- clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ pcl->next = fe->owned_head;
+ fe->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl);
cl->pageofs = map->m_la & ~PAGE_MASK;
@@ -512,18 +508,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
}
if (grp != &pcl->obj) {
- clt->pcl = container_of(grp,
+ fe->pcl = container_of(grp,
struct z_erofs_pcluster, obj);
err = -EEXIST;
goto err_out;
}
}
/* used to check tail merging loop due to corrupted images */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = pcl;
- clt->owned_head = &pcl->next;
- clt->pcl = pcl;
- clt->cl = cl;
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = pcl;
+ fe->owned_head = &pcl->next;
+ fe->pcl = pcl;
+ fe->cl = cl;
return 0;
err_out:
@@ -532,18 +528,18 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_collector *clt,
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
struct erofs_workgroup *grp;
int ret;
- DBG_BUGON(clt->cl);
+ DBG_BUGON(fe->cl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
- DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
- DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
+ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (map->m_flags & EROFS_MAP_META) {
if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
@@ -555,28 +551,28 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
- clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else {
tailpacking:
- ret = z_erofs_register_collection(clt, inode, map);
+ ret = z_erofs_register_collection(fe, inode, map);
if (!ret)
goto out;
if (ret != -EEXIST)
return ret;
}
- ret = z_erofs_lookup_collection(clt, inode, map);
+ ret = z_erofs_lookup_collection(fe, inode, map);
if (ret) {
- erofs_workgroup_put(&clt->pcl->obj);
+ erofs_workgroup_put(&fe->pcl->obj);
return ret;
}
out:
- z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
- clt->cl->pagevec, clt->cl->vcnt);
+ z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
+ fe->cl->pagevec, fe->cl->vcnt);
/* since file-backed online pages are traversed in reverse order */
- clt->icpage_ptr = clt->pcl->compressed_pages +
- z_erofs_pclusterpages(clt->pcl);
+ fe->icpage_ptr = fe->pcl->compressed_pages +
+ z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -610,24 +606,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl)
erofs_workgroup_put(&pcl->obj);
}
-static bool z_erofs_collector_end(struct z_erofs_collector *clt)
+static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
{
- struct z_erofs_collection *cl = clt->cl;
+ struct z_erofs_collection *cl = fe->cl;
if (!cl)
return false;
- z_erofs_pagevec_ctor_exit(&clt->vector, false);
+ z_erofs_pagevec_ctor_exit(&fe->vector, false);
mutex_unlock(&cl->lock);
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
- if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+ if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
z_erofs_collection_put(cl);
- clt->cl = NULL;
+ fe->cl = NULL;
return true;
}
@@ -651,7 +647,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct inode *const inode = fe->inode;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct erofs_map_blocks *const map = &fe->map;
- struct z_erofs_collector *const clt = &fe->clt;
const loff_t offset = page_offset(page);
bool tight = true;
@@ -672,7 +667,7 @@ repeat:
if (offset + cur >= map->m_la &&
offset + cur < map->m_la + map->m_llen) {
/* didn't get a valid collection previously (very rare) */
- if (!clt->cl)
+ if (!fe->cl)
goto restart_now;
goto hitted;
}
@@ -680,7 +675,7 @@ repeat:
/* go ahead the next map_blocks */
erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
- if (z_erofs_collector_end(clt))
+ if (z_erofs_collector_end(fe))
fe->backmost = false;
map->m_la = offset + cur;
@@ -693,11 +688,11 @@ restart_now:
if (!(map->m_flags & EROFS_MAP_MAPPED))
goto hitted;
- err = z_erofs_collector_begin(clt, inode, map);
+ err = z_erofs_collector_begin(fe, inode, map);
if (err)
goto err_out;
- if (z_erofs_is_inline_pcluster(clt->pcl)) {
+ if (z_erofs_is_inline_pcluster(fe->pcl)) {
void *mp;
mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
@@ -709,20 +704,18 @@ restart_now:
goto err_out;
}
get_page(fe->map.buf.page);
- WRITE_ONCE(clt->pcl->compressed_pages[0], fe->map.buf.page);
- clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
+ fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
} else {
- /* preload all compressed pages (can change mode if needed) */
+ /* bind cache first when cached decompression is preferred */
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
map->m_la))
cache_strategy = TRYALLOC;
else
cache_strategy = DONTALLOC;
- preload_compressed_pages(clt, MNGD_MAPPING(sbi),
- cache_strategy, pagepool);
+ z_erofs_bind_cache(fe, cache_strategy, pagepool);
}
-
hitted:
/*
* Ensure the current partial page belongs to this submit chain rather
@@ -730,8 +723,8 @@ hitted:
* those chains are handled asynchronously thus the page cannot be used
* for inplace I/O or pagevec (should be processed in strict order.)
*/
- tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
- clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+ tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
+ fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -746,18 +739,18 @@ hitted:
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
if (cur)
- tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+ tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
retry:
- err = z_erofs_attach_page(clt, page, page_type,
- clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+ err = z_erofs_attach_page(fe, page, page_type,
+ fe->mode >= COLLECT_PRIMARY_FOLLOWED);
/* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
- err = z_erofs_attach_page(clt, newpage,
+ err = z_erofs_attach_page(fe, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
if (!err)
goto retry;
@@ -773,7 +766,7 @@ retry:
/* bump up the number of spiltted parts of a page */
++spiltted;
/* also update nr_pages */
- clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
+ fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1);
next_part:
/* can be used for verification */
map->m_llen = offset + cur - map->m_la;
@@ -810,68 +803,11 @@ static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
return false;
}
-static void z_erofs_decompressqueue_work(struct work_struct *work);
-static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
- bool sync, int bios)
-{
- struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
-
- /* wake up the caller thread for sync decompression */
- if (sync) {
- unsigned long flags;
-
- spin_lock_irqsave(&io->u.wait.lock, flags);
- if (!atomic_add_return(bios, &io->pending_bios))
- wake_up_locked(&io->u.wait);
- spin_unlock_irqrestore(&io->u.wait.lock, flags);
- return;
- }
-
- if (atomic_add_return(bios, &io->pending_bios))
- return;
- /* Use workqueue and sync decompression for atomic contexts only */
- if (in_atomic() || irqs_disabled()) {
- queue_work(z_erofs_workqueue, &io->u.work);
- /* enable sync decompression for readahead */
- if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
- sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
- return;
- }
- z_erofs_decompressqueue_work(&io->u.work);
-}
-
static bool z_erofs_page_is_invalidated(struct page *page)
{
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
-static void z_erofs_decompressqueue_endio(struct bio *bio)
-{
- tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
- struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
- blk_status_t err = bio->bi_status;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- DBG_BUGON(PageUptodate(page));
- DBG_BUGON(z_erofs_page_is_invalidated(page));
-
- if (err)
- SetPageError(page);
-
- if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
- if (!err)
- SetPageUptodate(page);
- unlock_page(page);
- }
- }
- z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
- bio_put(bio);
-}
-
static int z_erofs_decompress_pcluster(struct super_block *sb,
struct z_erofs_pcluster *pcl,
struct page **pagepool)
@@ -1123,13 +1059,42 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
kvfree(bgq);
}
+static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+ bool sync, int bios)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
+ /* wake up the caller thread for sync decompression */
+ if (sync) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&io->u.wait.lock, flags);
+ if (!atomic_add_return(bios, &io->pending_bios))
+ wake_up_locked(&io->u.wait);
+ spin_unlock_irqrestore(&io->u.wait.lock, flags);
+ return;
+ }
+
+ if (atomic_add_return(bios, &io->pending_bios))
+ return;
+ /* Use workqueue and sync decompression for atomic contexts only */
+ if (in_atomic() || irqs_disabled()) {
+ queue_work(z_erofs_workqueue, &io->u.work);
+ /* enable sync decompression for readahead */
+ if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
+ sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
+ return;
+ }
+ z_erofs_decompressqueue_work(&io->u.work);
+}
+
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
unsigned int nr,
struct page **pagepool,
- struct address_space *mc,
- gfp_t gfp)
+ struct address_space *mc)
{
const pgoff_t index = pcl->obj.index;
+ gfp_t gfp = mapping_gfp_mask(mc);
bool tocache = false;
struct address_space *mapping;
@@ -1300,6 +1265,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
qtail[JQ_BYPASS] = &pcl->next;
}
+static void z_erofs_decompressqueue_endio(struct bio *bio)
+{
+ tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
+ struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+ blk_status_t err = bio->bi_status;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ struct page *page = bvec->bv_page;
+
+ DBG_BUGON(PageUptodate(page));
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
+
+ if (err)
+ SetPageError(page);
+
+ if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
+ if (!err)
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ }
+ z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
+ bio_put(bio);
+}
+
static void z_erofs_submit_queue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
struct page **pagepool,
@@ -1310,7 +1302,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
- z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
+ z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
pgoff_t last_index;
struct block_device *last_bdev;
@@ -1358,8 +1350,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
struct page *page;
page = pickup_page_for_submission(pcl, i++, pagepool,
- MNGD_MAPPING(sbi),
- GFP_NOFS);
+ MNGD_MAPPING(sbi));
if (!page)
continue;
@@ -1371,15 +1362,14 @@ submit_bio_retry:
}
if (!bio) {
- bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+ bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
+ REQ_OP_READ, GFP_NOIO);
bio->bi_end_io = z_erofs_decompressqueue_endio;
- bio_set_dev(bio, mdev.m_bdev);
last_bdev = mdev.m_bdev;
bio->bi_iter.bi_sector = (sector_t)cur <<
LOG_SECTORS_PER_BLOCK;
bio->bi_private = bi_private;
- bio->bi_opf = REQ_OP_READ;
if (f->readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
@@ -1418,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb,
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
- if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
+ if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
@@ -1518,7 +1508,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
err = z_erofs_do_read_page(&f, page, &pagepool);
z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
- (void)z_erofs_collector_end(&f.clt);
+ (void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
@@ -1568,7 +1558,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
put_page(page);
}
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
- (void)z_erofs_collector_end(&f.clt);
+ (void)z_erofs_collector_end(&f);
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, nr_pages));
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 18d7fd1a5064..572f0b8151ba 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -431,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
- struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
- unsigned long lcn = m->lcn;
- int err;
- if (lcn < lookback_distance) {
- erofs_err(m->inode->i_sb,
- "bogus lookback distance @ nid %llu", vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
+ while (m->lcn >= lookback_distance) {
+ unsigned long lcn = m->lcn - lookback_distance;
+ int err;
- /* load extent head logical cluster if needed */
- lcn -= lookback_distance;
- err = z_erofs_load_cluster_from_disk(m, lcn, false);
- if (err)
- return err;
+ /* load extent head logical cluster if needed */
+ err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ if (err)
+ return err;
- switch (m->type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
- if (!m->delta[0]) {
+ switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ if (!m->delta[0]) {
+ erofs_err(m->inode->i_sb,
+ "invalid lookback distance 0 @ nid %llu",
+ vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ lookback_distance = m->delta[0];
+ continue;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
+ m->headtype = m->type;
+ m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
+ return 0;
+ default:
erofs_err(m->inode->i_sb,
- "invalid lookback distance 0 @ nid %llu",
- vi->nid);
+ "unknown type %u @ lcn %lu of nid %llu",
+ m->type, lcn, vi->nid);
DBG_BUGON(1);
- return -EFSCORRUPTED;
+ return -EOPNOTSUPP;
}
- return z_erofs_extent_lookback(m, m->delta[0]);
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
- m->headtype = m->type;
- map->m_la = (lcn << lclusterbits) | m->clusterofs;
- break;
- default:
- erofs_err(m->inode->i_sb,
- "unknown type %u @ lcn %lu of nid %llu",
- m->type, lcn, vi->nid);
- DBG_BUGON(1);
- return -EOPNOTSUPP;
}
- return 0;
+
+ erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
+ vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
@@ -494,7 +493,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
- map->m_plen = 1 << lclusterbits;
+ map->m_plen = 1ULL << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
@@ -540,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = m->compressedlcs << lclusterbits;
+ map->m_plen = (u64)m->compressedlcs << lclusterbits;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
@@ -630,6 +629,13 @@ static int z_erofs_do_map_blocks(struct inode *inode,
if (endoff >= m.clusterofs) {
m.headtype = m.type;
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ /*
+ * For ztailpacking files, in order to inline data more
+ * effectively, special EOF lclusters are now supported
+ * which can have three parts at most.
+ */
+ if (ztailpacking && end > inode->i_size)
+ end = inode->i_size;
break;
}
/* m.lcn should be >= 1 if endoff < m.clusterofs */
diff --git a/fs/exec.c b/fs/exec.c
index 79f2c9483302..8256e8bb9ad3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,7 +118,7 @@ bool path_noexec(const struct path *path)
* Note that a shared library must be both readable and executable due to
* security reasons.
*
- * Also note that we take the address to load from from the file itself.
+ * Also note that we take the address to load from the file itself.
*/
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
@@ -495,8 +495,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* the stack. They aren't stored until much later when we can't
* signal to the parent that the child has run out of stack space.
* Instead, calculate it here so it's possible to fail gracefully.
+ *
+ * In the case of argc = 0, make sure there is space for adding a
+ * empty string (which will bump argc to 1), to ensure confused
+ * userspace programs don't start processing from argv[1], thinking
+ * argc can never be 0, to keep them from walking envp by accident.
+ * See do_execveat_common().
*/
- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
if (limit <= ptr_size)
return -E2BIG;
limit -= ptr_size;
@@ -536,7 +542,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
if (!valid_arg_len(bprm, len))
goto out;
- /* We're going to work our way backwords. */
+ /* We're going to work our way backwards. */
pos = bprm->p;
str += len;
bprm->p -= len;
@@ -1269,7 +1275,7 @@ int begin_new_exec(struct linux_binprm * bprm)
/*
* Must be called _before_ exec_mmap() as bprm->mm is
- * not visibile until then. This also enables the update
+ * not visible until then. This also enables the update
* to be lockless.
*/
retval = set_mm_exe_file(bprm->mm, bprm->file);
@@ -1897,6 +1903,9 @@ static int do_execveat_common(int fd, struct filename *filename,
}
retval = count(argv, MAX_ARG_STRINGS);
+ if (retval == 0)
+ pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
+ current->comm, bprm->filename);
if (retval < 0)
goto out_free;
bprm->argc = retval;
@@ -1923,6 +1932,19 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out_free;
+ /*
+ * When argv is empty, add an empty string ("") as argv[0] to
+ * ensure confused userspace programs that start processing
+ * from argv[1] won't end up walking envp. See also
+ * bprm_stack_limits().
+ */
+ if (bprm->argc == 0) {
+ retval = copy_string_kernel("", bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = 1;
+ }
+
retval = bprm_execve(bprm, fd, filename, flags);
out_free:
free_bprm(bprm);
@@ -1951,6 +1973,8 @@ int kernel_execve(const char *kernel_filename,
}
retval = count_strings_kernel(argv);
+ if (WARN_ON_ONCE(retval == 0))
+ retval = -EINVAL;
if (retval < 0)
goto out_free;
bprm->argc = retval;
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index df805bd05508..fc0ea1684880 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -490,7 +490,8 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
}
static const struct address_space_operations exfat_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = exfat_readpage,
.readahead = exfat_readahead,
.writepage = exfat_writepage,
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c9fb7dcec16..9f892903419a 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -183,7 +183,7 @@ static struct inode *exfat_alloc_inode(struct super_block *sb)
{
struct exfat_inode_info *ei;
- ei = kmem_cache_alloc(exfat_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, exfat_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index df14e750e9fe..998dd2ac8008 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -170,11 +170,6 @@ static void ext2_preread_inode(struct inode *inode)
unsigned long offset;
unsigned long block;
struct ext2_group_desc * gdp;
- struct backing_dev_info *bdi;
-
- bdi = inode_to_bdi(inode);
- if (bdi_rw_congested(bdi))
- return;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 602578b72d8c..52377a0ee735 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -967,7 +967,8 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc
}
const struct address_space_operations ext2_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = ext2_readpage,
.readahead = ext2_readahead,
.writepage = ext2_writepage,
@@ -982,7 +983,8 @@ const struct address_space_operations ext2_aops = {
};
const struct address_space_operations ext2_nobh_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = ext2_readpage,
.readahead = ext2_readahead,
.writepage = ext2_nobh_writepage,
@@ -998,8 +1000,7 @@ const struct address_space_operations ext2_nobh_aops = {
static const struct address_space_operations ext2_dax_aops = {
.writepages = ext2_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
};
/*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 94f1fbd7d3ac..e48aaf52ce93 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -180,7 +180,7 @@ static struct kmem_cache * ext2_inode_cachep;
static struct inode *ext2_alloc_inode(struct super_block *sb)
{
struct ext2_inode_info *ei;
- ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, ext2_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
ei->i_block_alloc_info = NULL;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5a35768d6149..57e82e25f8e2 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -139,7 +139,7 @@ fail:
/*
* Inode operation get_posix_acl().
*
- * inode->i_mutex: don't care
+ * inode->i_rwsem: don't care
*/
struct posix_acl *
ext4_get_acl(struct inode *inode, int type, bool rcu)
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu)
/*
* Set the access or default ACL of an inode.
*
- * inode->i_mutex: down unless called from ext4_new_inode
+ * inode->i_rwsem: down unless called from ext4_new_inode
*/
static int
__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
@@ -271,8 +271,8 @@ out_stop:
/*
* Initialize the ACLs of a new inode. Called from ext4_new_inode.
*
- * dir->i_mutex: down
- * inode->i_mutex: up (access to inode is still exclusive)
+ * dir->i_rwsem: down
+ * inode->i_rwsem: up (access to inode is still exclusive)
*/
int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a0fb0c4bdc7c..78ee3ef795ae 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -411,6 +411,7 @@ verified:
* ext4_read_block_bitmap_nowait()
* @sb: super block
* @block_group: given block group
+ * @ignore_locked: ignore locked buffers
*
* Read the bitmap for a given block_group,and validate the
* bits for block/inode/inode tables are set in the bitmaps
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4666b55b736e..5504f72bbbbe 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb)
call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
}
-/*
- * Returns 1 if the passed-in block region (start_blk,
- * start_blk+count) is valid; 0 if some part of the block region
- * overlaps with some other filesystem metadata blocks.
- */
-int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
- unsigned int count)
+int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
+ ext4_fsblk_t start_blk, unsigned int count)
{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_system_blocks *system_blks;
struct ext4_system_zone *entry;
struct rb_node *n;
@@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
- ret = (entry->ino == inode->i_ino);
+ ret = 0;
+ if (inode)
+ ret = (entry->ino == inode->i_ino);
break;
}
}
@@ -338,6 +335,17 @@ out_rcu:
return ret;
}
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with some other filesystem metadata blocks.
+ */
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+ unsigned int count)
+{
+ return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count);
+}
+
int ext4_check_blockref(const char *function, unsigned int line,
struct inode *inode, __le32 *p, unsigned int max)
{
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 71a3cdceaa03..3f87cca49f0c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1028,7 +1028,7 @@ struct ext4_inode_info {
/*
* Extended attributes can be read independently of the main file
- * data. Taking i_mutex even when reading would cause contention
+ * data. Taking i_rwsem even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
@@ -1046,6 +1046,8 @@ struct ext4_inode_info {
/* Fast commit related info */
+ /* For tracking dentry create updates */
+ struct list_head i_fc_dilist;
struct list_head i_fc_list; /*
* inodes that need fast commit
* protected by sbi->s_fc_lock.
@@ -1279,7 +1281,7 @@ struct ext4_inode_info {
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
-extern void ext4_set_bits(void *bm, int cur, int len);
+extern void mb_set_bits(void *bm, int cur, int len);
/*
* Maximal mount counts between two filesystem checks
@@ -1750,6 +1752,7 @@ struct ext4_sb_info {
spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
+ tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
#endif
@@ -1795,10 +1798,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
enum {
EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FS_ABORTED, /* Fatal error detected */
- EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
- EXT4_MF_FC_COMMITTING /* File system underoing a fast
- * commit.
- */
+ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
};
static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
@@ -2485,7 +2485,7 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
struct fscrypt_str cf_name;
#endif
};
@@ -2721,7 +2721,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
extern int ext4_fname_setup_ci_filename(struct inode *dir,
const struct qstr *iname,
struct ext4_filename *fname);
@@ -2754,7 +2754,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
ext4_fname_from_fscrypt_name(fname, &name);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
#endif
return err;
@@ -2773,7 +2773,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
ext4_fname_from_fscrypt_name(fname, &name);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
#endif
return err;
@@ -2790,7 +2790,7 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
@@ -2806,7 +2806,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
#endif
@@ -2822,7 +2822,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
static inline void ext4_fname_free_filename(struct ext4_filename *fname)
{
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
@@ -2926,7 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
@@ -2935,6 +2935,9 @@ void ext4_fc_replay_cleanup(struct super_block *sb);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);
void ext4_fc_destroy_dentry_cache(void);
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+ ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ int len, int replay);
/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
@@ -3407,7 +3410,7 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0
#endif
-/* Update i_disksize. Requires i_mutex to avoid races with truncate */
+/* Update i_disksize. Requires i_rwsem to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
@@ -3418,7 +3421,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem);
}
-/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
+/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */
static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
{
int changed = 0;
@@ -3706,6 +3709,9 @@ extern int ext4_inode_block_valid(struct inode *inode,
unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
+extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
+ ext4_fsblk_t start_blk, unsigned int count);
+
/* extents.c */
struct ext4_ext_path;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 0e4fa644df01..db2ae4a2b38d 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
/*
* This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking
- * i_mutex for direct I/O reads. This only works for extent-based
+ * i_rwsem for direct I/O reads. This only works for extent-based
* files, and it doesn't work if data journaling is enabled, since the
* dioread_nolock code uses b_private to pass information back to the
* I/O completion handler, and this conflicts with the jbd's use of
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74c91da585d7..0d98cf402282 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
- * i_mutex. So we can safely drop the i_data_sem here.
+ * i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0);
@@ -3368,7 +3368,6 @@ static int ext4_split_extent(handle_t *handle,
return -EFSCORRUPTED;
}
unwritten = ext4_ext_is_unwritten(ex);
- split_flag1 = 0;
if (map->m_lblk >= ee_block) {
split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
@@ -4572,7 +4571,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
- /* Wait all existing dio workers, newcomers will block on i_mutex */
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
/* Preallocate the range including the unaligned edges */
@@ -4738,7 +4737,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
- /* Wait all existing dio workers, newcomers will block on i_mutex */
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
@@ -5334,7 +5333,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
@@ -5474,7 +5473,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
@@ -5571,7 +5570,7 @@ out_mutex:
* stuff such as page-cache locking consistency, bh mapping consistency or
* extent's data copying must be performed by caller.
* Locking:
- * i_mutex is held for both inodes
+ * i_rwsem is held for both inodes
* i_data_sem is locked for write for both inodes
* Assumptions:
* All pages from requested range are locked for both inodes
@@ -6091,11 +6090,15 @@ int ext4_ext_clear_bb(struct inode *inode)
ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, 0);
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+ 0, path[j].p_block, 1, 1);
}
ext4_ext_drop_refs(path);
kfree(path);
}
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+ map.m_lblk, map.m_pblk, map.m_len, 1);
}
cur = cur + map.m_len;
}
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 5ae8026a0c56..3d72565ec6e8 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -199,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode)
ext4_fc_reset_inode(inode);
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list);
+ INIT_LIST_HEAD(&ei->i_fc_dilist);
init_waitqueue_head(&ei->i_fc_wait);
atomic_set(&ei->i_fc_updates, 0);
}
@@ -279,6 +280,8 @@ void ext4_fc_stop_update(struct inode *inode)
void ext4_fc_del(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_fc_dentry_update *fc_dentry;
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
@@ -286,7 +289,7 @@ void ext4_fc_del(struct inode *inode)
restart:
spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
- if (list_empty(&ei->i_fc_list)) {
+ if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
return;
}
@@ -295,23 +298,62 @@ restart:
ext4_fc_wait_committing_inode(inode);
goto restart;
}
- list_del_init(&ei->i_fc_list);
- spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+
+ if (!list_empty(&ei->i_fc_list))
+ list_del_init(&ei->i_fc_list);
+
+ /*
+ * Since this inode is getting removed, let's also remove all FC
+ * dentry create references, since it is not needed to log it anyways.
+ */
+ if (list_empty(&ei->i_fc_dilist)) {
+ spin_unlock(&sbi->s_fc_lock);
+ return;
+ }
+
+ fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
+ WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
+ list_del_init(&fc_dentry->fcd_list);
+ list_del_init(&fc_dentry->fcd_dilist);
+
+ WARN_ON(!list_empty(&ei->i_fc_dilist));
+ spin_unlock(&sbi->s_fc_lock);
+
+ if (fc_dentry->fcd_name.name &&
+ fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
+ kfree(fc_dentry->fcd_name.name);
+ kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
+
+ return;
}
/*
- * Mark file system as fast commit ineligible. This means that next commit
- * operation would result in a full jbd2 commit.
+ * Mark file system as fast commit ineligible, and record latest
+ * ineligible transaction tid. This means until the recorded
+ * transaction, commit operation would result in a full jbd2 commit.
*/
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ tid_t tid;
if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (handle && !IS_ERR(handle))
+ tid = handle->h_transaction->t_tid;
+ else {
+ read_lock(&sbi->s_journal->j_state_lock);
+ tid = sbi->s_journal->j_running_transaction ?
+ sbi->s_journal->j_running_transaction->t_tid : 0;
+ read_unlock(&sbi->s_journal->j_state_lock);
+ }
+ spin_lock(&sbi->s_fc_lock);
+ if (sbi->s_fc_ineligible_tid < tid)
+ sbi->s_fc_ineligible_tid = tid;
+ spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -337,13 +379,6 @@ static int ext4_fc_track_template(
tid_t tid = 0;
int ret;
- if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
- (sbi->s_mount_state & EXT4_FC_REPLAY))
- return -EOPNOTSUPP;
-
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
- return -EINVAL;
-
tid = handle->h_transaction->t_tid;
mutex_lock(&ei->i_fc_lock);
if (tid == ei->i_sync_tid) {
@@ -361,7 +396,8 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
+ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -387,7 +423,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -400,7 +436,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_NOMEM);
+ EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock);
return -ENOMEM;
}
@@ -412,13 +448,28 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.name = node->fcd_iname;
}
node->fcd_name.len = dentry->d_name.len;
-
+ INIT_LIST_HEAD(&node->fcd_dilist);
spin_lock(&sbi->s_fc_lock);
- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
+ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
+
+ /*
+ * This helps us keep a track of all fc_dentry updates which is part of
+ * this ext4 inode. So in case the inode is getting unlinked, before
+ * even we get a chance to fsync, we could remove all fc_dentry
+ * references while evicting the inode in ext4_fc_del().
+ * Also with this, we don't need to loop over all the inodes in
+ * sbi->s_fc_q to get the corresponding inode in
+ * ext4_fc_commit_dentry_updates().
+ */
+ if (dentry_update->op == EXT4_FC_TAG_CREAT) {
+ WARN_ON(!list_empty(&ei->i_fc_dilist));
+ list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
+ }
spin_unlock(&sbi->s_fc_lock);
mutex_lock(&ei->i_fc_lock);
@@ -436,12 +487,22 @@ void __ext4_fc_track_unlink(handle_t *handle,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_unlink(inode, dentry, ret);
+ trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
}
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_unlink(handle, inode, dentry);
}
void __ext4_fc_track_link(handle_t *handle,
@@ -455,12 +516,22 @@ void __ext4_fc_track_link(handle_t *handle,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_link(inode, dentry, ret);
+ trace_ext4_fc_track_link(handle, inode, dentry, ret);
}
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_link(handle, inode, dentry);
}
void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
@@ -474,12 +545,22 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
(void *)&args, 0);
- trace_ext4_fc_track_create(inode, dentry, ret);
+ trace_ext4_fc_track_create(handle, inode, dentry, ret);
}
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
- __ext4_fc_track_create(handle, d_inode(dentry), dentry);
+ struct inode *inode = d_inode(dentry);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
+ __ext4_fc_track_create(handle, inode, dentry);
}
/* __track_fn for inode tracking */
@@ -495,6 +576,7 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret;
if (S_ISDIR(inode->i_mode))
@@ -502,12 +584,19 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_INODE_JOURNAL_DATA);
+ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
- trace_ext4_fc_track_inode(inode, ret);
+ trace_ext4_fc_track_inode(handle, inode, ret);
}
struct __track_range_args {
@@ -545,18 +634,26 @@ static int __track_range(struct inode *inode, void *arg, bool update)
void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct __track_range_args args;
int ret;
if (S_ISDIR(inode->i_mode))
return;
+ if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return;
+
args.start = start;
args.end = end;
ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
- trace_ext4_fc_track_range(inode, start, end, ret);
+ trace_ext4_fc_track_range(handle, inode, start, end, ret);
}
static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
@@ -879,7 +976,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;
spin_lock(&sbi->s_fc_lock);
- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
while (atomic_read(&ei->i_fc_updates)) {
@@ -939,7 +1035,7 @@ __releases(&sbi->s_fc_lock)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
struct inode *inode;
- struct ext4_inode_info *ei, *ei_n;
+ struct ext4_inode_info *ei;
int ret;
if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
@@ -955,21 +1051,16 @@ __releases(&sbi->s_fc_lock)
spin_lock(&sbi->s_fc_lock);
continue;
}
-
- inode = NULL;
- list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
- i_fc_list) {
- if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
- inode = &ei->vfs_inode;
- break;
- }
- }
/*
- * If we don't find inode in our list, then it was deleted,
- * in which case, we don't need to record it's create tag.
+ * With fcd_dilist we need not loop in sbi->s_fc_q to get the
+ * corresponding inode pointer
*/
- if (!inode)
- continue;
+ WARN_ON(list_empty(&fc_dentry->fcd_dilist));
+ ei = list_first_entry(&fc_dentry->fcd_dilist,
+ struct ext4_inode_info, i_fc_dilist);
+ inode = &ei->vfs_inode;
+ WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
+
spin_unlock(&sbi->s_fc_lock);
/*
@@ -1073,11 +1164,12 @@ out:
}
static void ext4_fc_update_stats(struct super_block *sb, int status,
- u64 commit_time, int nblks)
+ u64 commit_time, int nblks, tid_t commit_tid)
{
struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
- jbd_debug(1, "Fast commit ended with status = %d", status);
+ jbd_debug(1, "Fast commit ended with status = %d for tid %u",
+ status, commit_tid);
if (status == EXT4_FC_STATUS_OK) {
stats->fc_num_commits++;
stats->fc_numblks += nblks;
@@ -1095,7 +1187,7 @@ static void ext4_fc_update_stats(struct super_block *sb, int status,
} else {
stats->fc_skipped_commits++;
}
- trace_ext4_fc_commit_stop(sb, nblks, status);
+ trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
}
/*
@@ -1113,13 +1205,13 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
ktime_t start_time, commit_time;
- trace_ext4_fc_commit_start(sb);
-
- start_time = ktime_get();
-
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
return jbd2_complete_transaction(journal, commit_tid);
+ trace_ext4_fc_commit_start(sb, commit_tid);
+
+ start_time = ktime_get();
+
restart_fc:
ret = jbd2_fc_begin_commit(journal, commit_tid);
if (ret == -EALREADY) {
@@ -1127,14 +1219,16 @@ restart_fc:
if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
commit_tid > journal->j_commit_sequence)
goto restart_fc;
- ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
+ ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
+ commit_tid);
return 0;
} else if (ret) {
/*
* Commit couldn't start. Just update stats and perform a
* full commit.
*/
- ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
+ ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
+ commit_tid);
return jbd2_complete_transaction(journal, commit_tid);
}
@@ -1166,12 +1260,12 @@ restart_fc:
* don't react too strongly to vast changes in the commit time
*/
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
- ext4_fc_update_stats(sb, status, commit_time, nblks);
+ ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
return ret;
fallback:
ret = jbd2_fc_end_commit_fallback(journal);
- ext4_fc_update_stats(sb, status, 0, 0);
+ ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
return ret;
}
@@ -1179,7 +1273,7 @@ fallback:
* Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit.
*/
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1189,6 +1283,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
if (full && sbi->s_fc_bh)
sbi->s_fc_bh = NULL;
+ trace_ext4_fc_cleanup(journal, full, tid);
jbd2_fc_release_bufs(journal);
spin_lock(&sbi->s_fc_lock);
@@ -1197,7 +1292,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
- ext4_fc_reset_inode(&iter->vfs_inode);
+ if (iter->i_sync_tid <= tid)
+ ext4_fc_reset_inode(&iter->vfs_inode);
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
@@ -1212,6 +1308,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
struct ext4_fc_dentry_update,
fcd_list);
list_del_init(&fc_dentry->fcd_list);
+ list_del_init(&fc_dentry->fcd_dilist);
spin_unlock(&sbi->s_fc_lock);
if (fc_dentry->fcd_name.name &&
@@ -1226,8 +1323,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_MAIN]);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (tid >= sbi->s_fc_ineligible_tid) {
+ sbi->s_fc_ineligible_tid = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ }
if (full)
sbi->s_fc_bytes = 0;
@@ -1392,14 +1491,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
if (state->fc_modified_inodes[i] == ino)
return 0;
if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
- state->fc_modified_inodes_size +=
- EXT4_FC_REPLAY_REALLOC_INCREMENT;
state->fc_modified_inodes = krealloc(
- state->fc_modified_inodes, sizeof(int) *
- state->fc_modified_inodes_size,
- GFP_KERNEL);
+ state->fc_modified_inodes,
+ sizeof(int) * (state->fc_modified_inodes_size +
+ EXT4_FC_REPLAY_REALLOC_INCREMENT),
+ GFP_KERNEL);
if (!state->fc_modified_inodes)
return -ENOMEM;
+ state->fc_modified_inodes_size +=
+ EXT4_FC_REPLAY_REALLOC_INCREMENT;
}
state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
return 0;
@@ -1431,7 +1531,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
}
inode = NULL;
- ext4_fc_record_modified_inode(sb, ino);
+ ret = ext4_fc_record_modified_inode(sb, ino);
+ if (ret)
+ goto out;
raw_fc_inode = (struct ext4_inode *)
(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
@@ -1563,16 +1665,23 @@ out:
}
/*
- * Record physical disk regions which are in use as per fast commit area. Our
- * simple replay phase allocator excludes these regions from allocation.
+ * Record physical disk regions which are in use as per fast commit area,
+ * and used by inodes during replay phase. Our simple replay phase
+ * allocator excludes these regions from allocation.
*/
-static int ext4_fc_record_regions(struct super_block *sb, int ino,
- ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+ ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
{
struct ext4_fc_replay_state *state;
struct ext4_fc_alloc_region *region;
state = &EXT4_SB(sb)->s_fc_replay_state;
+ /*
+ * during replay phase, the fc_regions_valid may not same as
+ * fc_regions_used, update it when do new additions.
+ */
+ if (replay && state->fc_regions_used != state->fc_regions_valid)
+ state->fc_regions_used = state->fc_regions_valid;
if (state->fc_regions_used == state->fc_regions_size) {
state->fc_regions_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
@@ -1590,6 +1699,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
region->pblk = pblk;
region->len = len;
+ if (replay)
+ state->fc_regions_valid++;
+
return 0;
}
@@ -1621,6 +1733,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
}
ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+ if (ret)
+ goto out;
start = le32_to_cpu(ex->ee_block);
start_pblk = ext4_ext_pblock(ex);
@@ -1638,18 +1752,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
map.m_pblk = 0;
ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0) {
- iput(inode);
- return 0;
- }
+ if (ret < 0)
+ goto out;
if (ret == 0) {
/* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0);
- if (IS_ERR(path)) {
- iput(inode);
- return 0;
- }
+ if (IS_ERR(path))
+ goto out;
memset(&newex, 0, sizeof(newex));
newex.ee_block = cpu_to_le32(cur);
ext4_ext_store_pblock(
@@ -1663,10 +1773,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
up_write((&EXT4_I(inode)->i_data_sem));
ext4_ext_drop_refs(path);
kfree(path);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
goto next;
}
@@ -1679,10 +1787,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex),
start_pblk + cur - start);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
/*
* Mark the old blocks as free since they aren't used
* anymore. We maintain an array of all the modified
@@ -1702,10 +1808,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ext4_ext_is_unwritten(ex), map.m_pblk);
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex), map.m_pblk);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
/*
* We may have split the extent tree while toggling the state.
* Try to shrink the extent tree now.
@@ -1717,6 +1821,7 @@ next:
}
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
sb->s_blocksize_bits);
+out:
iput(inode);
return 0;
}
@@ -1746,6 +1851,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
}
ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+ if (ret)
+ goto out;
jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
inode->i_ino, le32_to_cpu(lrange.fc_lblk),
@@ -1755,10 +1862,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
map.m_len = remaining;
ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0) {
- iput(inode);
- return 0;
- }
+ if (ret < 0)
+ goto out;
if (ret > 0) {
remaining -= ret;
cur += ret;
@@ -1770,18 +1875,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
}
down_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_ext_remove_space(inode, lrange.fc_lblk,
- lrange.fc_lblk + lrange.fc_len - 1);
+ ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
+ le32_to_cpu(lrange.fc_lblk) +
+ le32_to_cpu(lrange.fc_len) - 1);
up_write(&EXT4_I(inode)->i_data_sem);
- if (ret) {
- iput(inode);
- return 0;
- }
+ if (ret)
+ goto out;
ext4_ext_replay_shrink_inode(inode,
i_size_read(inode) >> sb->s_blocksize_bits);
ext4_mark_inode_dirty(NULL, inode);
+out:
iput(inode);
-
return 0;
}
@@ -1852,8 +1956,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
if (state->fc_regions[i].ino == 0 ||
state->fc_regions[i].len == 0)
continue;
- if (blk >= state->fc_regions[i].pblk &&
- blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
+ if (in_range(blk, state->fc_regions[i].pblk,
+ state->fc_regions[i].len))
return true;
}
return false;
@@ -1937,7 +2041,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
ret = ext4_fc_record_regions(sb,
le32_to_cpu(ext.fc_ino),
le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_len(ex), 0);
if (ret < 0)
break;
ret = JBD2_FC_REPLAY_CONTINUE;
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 083ad1cb705a..80414dcba6e1 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -93,7 +93,6 @@ enum {
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
EXT4_FC_REASON_INODE_JOURNAL_DATA,
- EXT4_FC_COMMIT_FAILED,
EXT4_FC_REASON_MAX
};
@@ -109,6 +108,7 @@ struct ext4_fc_dentry_update {
struct qstr fcd_name; /* Dirent name */
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
struct list_head fcd_list;
+ struct list_head fcd_dilist;
};
struct ext4_fc_stats {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8cc11715518a..8bd66cdc41be 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -36,9 +36,11 @@
#include "acl.h"
#include "truncate.h"
-static bool ext4_dio_supported(struct inode *inode)
+static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
{
- if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (!fscrypt_dio_supported(iocb, iter))
return false;
if (fsverity_active(inode))
return false;
@@ -61,7 +63,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
inode_lock_shared(inode);
}
- if (!ext4_dio_supported(inode)) {
+ if (!ext4_dio_supported(iocb, to)) {
inode_unlock_shared(inode);
/*
* Fallback to buffered I/O if the operation being performed on
@@ -509,7 +511,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
/* Fallback to buffered I/O if the inode does not support direct I/O. */
- if (!ext4_dio_supported(inode)) {
+ if (!ext4_dio_supported(iocb, from)) {
if (ilock_shared)
inode_unlock_shared(inode);
else
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index f34f4176c1e7..147b5241dd94 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -290,7 +290,7 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo)
{
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 89efa78ed4b2..07a8c75b65ed 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
- * i_mutex. So we can safely drop the i_data_sem here.
+ * i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 635bcf68a67e..9c076262770d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct page **pagep,
void **fsdata)
{
- int ret, inline_size;
+ int ret;
handle_t *handle;
struct page *page;
struct ext4_iloc iloc;
@@ -928,14 +928,9 @@ retry_journal:
goto out;
}
- inline_size = ext4_get_max_inline_size(inode);
-
- ret = -ENOSPC;
- if (inline_size >= pos + len) {
- ret = ext4_prepare_inline_data(handle, inode, pos + len);
- if (ret && ret != -ENOSPC)
- goto out_journal;
- }
+ ret = ext4_prepare_inline_data(handle, inode, pos + len);
+ if (ret && ret != -ENOSPC)
+ goto out_journal;
/*
* We cannot recurse into the filesystem as the transaction
@@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
struct ext4_iloc *iloc,
void *buf, int inline_size)
{
- ext4_create_inline_data(handle, inode, inline_size);
+ int ret;
+
+ ret = ext4_create_inline_data(handle, inode, inline_size);
+ if (ret) {
+ ext4_msg(inode->i_sb, KERN_EMERG,
+ "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
+ inode->i_ino, ret);
+ return;
+ }
ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
@@ -1780,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
void *inline_pos;
unsigned int offset;
struct ext4_dir_entry_2 *de;
- bool ret = true;
+ bool ret = false;
err = ext4_get_inode_loc(dir, &iloc);
if (err) {
EXT4_ERROR_INODE_ERR(dir, -err,
"error %d getting inode %lu block",
err, dir->i_ino);
- return true;
+ return false;
}
down_read(&EXT4_I(dir)->xattr_sem);
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
+ ret = true;
goto out;
}
@@ -1801,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
ext4_warning(dir->i_sb,
"bad inline directory (dir #%lu) - no `..'",
dir->i_ino);
- ret = true;
goto out;
}
@@ -1820,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
dir->i_ino, le32_to_cpu(de->inode),
le16_to_cpu(de->rec_len), de->name_len,
inline_size);
- ret = true;
goto out;
}
if (le32_to_cpu(de->inode)) {
- ret = false;
goto out;
}
offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
}
+ ret = true;
out:
up_read(&EXT4_I(dir)->xattr_sem);
brelse(iloc.bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5f79d265d06a..1ce13f69fbec 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -137,8 +137,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
new_size);
}
-static void ext4_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
@@ -186,7 +184,7 @@ void ext4_evict_inode(struct inode *inode)
* journal. So although mm thinks everything is clean and
* ready for reaping the inode might still have some pages to
* write in the running transaction or waiting to be
- * checkpointed. Thus calling jbd2_journal_invalidatepage()
+ * checkpointed. Thus calling jbd2_journal_invalidate_folio()
* (via truncate_inode_pages()) to discard these buffers can
* cause data loss. Also even if we did not discard these
* buffers, we would have no way to find them after the inode
@@ -338,7 +336,7 @@ stop_handle:
return;
no_delete:
if (!list_empty(&EXT4_I(inode)->i_fc_list))
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
}
@@ -1224,7 +1222,7 @@ retry_journal:
/*
* __block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
+ * i_size_read because we hold i_rwsem.
*
* Add inode to orphan list in case we crash before
* truncate finishes
@@ -1571,16 +1569,18 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ struct folio *folio = page_folio(page);
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
+ BUG_ON(!folio_test_locked(folio));
+ BUG_ON(folio_test_writeback(folio));
if (invalidate) {
- if (page_mapped(page))
- clear_page_dirty_for_io(page);
- block_invalidatepage(page, 0, PAGE_SIZE);
- ClearPageUptodate(page);
+ if (folio_mapped(folio))
+ folio_clear_dirty_for_io(folio);
+ block_invalidate_folio(folio, 0,
+ folio_size(folio));
+ folio_clear_uptodate(folio);
}
- unlock_page(page);
+ folio_unlock(folio);
}
pagevec_release(&pvec);
}
@@ -1971,6 +1971,7 @@ out_no_pagelock:
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
int ret = 0;
loff_t size;
unsigned int len;
@@ -1980,8 +1981,8 @@ static int ext4_writepage(struct page *page,
bool keep_towrite = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
- inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
- unlock_page(page);
+ folio_invalidate(folio, 0, folio_size(folio));
+ folio_unlock(folio);
return -EIO;
}
@@ -1993,6 +1994,15 @@ static int ext4_writepage(struct page *page,
else
len = PAGE_SIZE;
+ /* Should never happen but for bugs in other kernel subsystems */
+ if (!page_has_buffers(page)) {
+ ext4_warning_inode(inode,
+ "page %lu does not have buffers attached", page->index);
+ ClearPageDirty(page);
+ unlock_page(page);
+ return 0;
+ }
+
page_bufs = page_buffers(page);
/*
* We cannot do block allocation or other extent handling in this
@@ -2594,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
wait_on_page_writeback(page);
BUG_ON(PageWriteback(page));
+ /*
+ * Should never happen but for buggy code in
+ * other subsystems that call
+ * set_page_dirty() without properly warning
+ * the file system first. See [1] for more
+ * information.
+ *
+ * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
+ */
+ if (!page_has_buffers(page)) {
+ ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
+ ClearPageDirty(page);
+ unlock_page(page);
+ continue;
+ }
+
if (mpd->map.m_len == 0)
mpd->first_page = page->index;
mpd->next_page = page->index + 1;
@@ -3182,40 +3208,39 @@ static void ext4_readahead(struct readahead_control *rac)
ext4_mpage_readpages(inode, rac, NULL);
}
-static void ext4_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void ext4_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- trace_ext4_invalidatepage(page, offset, length);
+ trace_ext4_invalidate_folio(folio, offset, length);
/* No journalling happens on data buffers when this function is used */
- WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
+ WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio)));
- block_invalidatepage(page, offset, length);
+ block_invalidate_folio(folio, offset, length);
}
-static int __ext4_journalled_invalidatepage(struct page *page,
- unsigned int offset,
- unsigned int length)
+static int __ext4_journalled_invalidate_folio(struct folio *folio,
+ size_t offset, size_t length)
{
- journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+ journal_t *journal = EXT4_JOURNAL(folio->mapping->host);
- trace_ext4_journalled_invalidatepage(page, offset, length);
+ trace_ext4_journalled_invalidate_folio(folio, offset, length);
/*
* If it's a full truncate we just forget about the pending dirtying
*/
- if (offset == 0 && length == PAGE_SIZE)
- ClearPageChecked(page);
+ if (offset == 0 && length == folio_size(folio))
+ folio_clear_checked(folio);
- return jbd2_journal_invalidatepage(journal, page, offset, length);
+ return jbd2_journal_invalidate_folio(journal, folio, offset, length);
}
/* Wrapper for aops... */
-static void ext4_journalled_invalidatepage(struct page *page,
- unsigned int offset,
- unsigned int length)
+static void ext4_journalled_invalidate_folio(struct folio *folio,
+ size_t offset,
+ size_t length)
{
- WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0);
+ WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0);
}
static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3409,6 +3434,13 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret < 0)
return ret;
out:
+ /*
+ * When inline encryption is enabled, sometimes I/O to an encrypted file
+ * has to be broken up to guarantee DUN contiguity. Handle this by
+ * limiting the length of the mapping returned.
+ */
+ map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
+
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
return 0;
@@ -3541,29 +3573,32 @@ const struct iomap_ops ext4_iomap_report_ops = {
};
/*
- * Pages can be marked dirty completely asynchronously from ext4's journalling
- * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
- * much here because ->set_page_dirty is called under VFS locks. The page is
- * not necessarily locked.
+ * Whenever the folio is being dirtied, corresponding buffers should already
+ * be attached to the transaction (we take care of this in ext4_page_mkwrite()
+ * and ext4_write_begin()). However we cannot move buffers to dirty transaction
+ * lists here because ->dirty_folio is called under VFS locks and the folio
+ * is not necessarily locked.
*
- * We cannot just dirty the page and leave attached buffers clean, because the
+ * We cannot just dirty the folio and leave attached buffers clean, because the
* buffers' dirty state is "definitive". We cannot just set the buffers dirty
* or jbddirty because all the journalling code will explode.
*
- * So what we do is to mark the page "pending dirty" and next time writepage
+ * So what we do is to mark the folio "pending dirty" and next time writepage
* is called, propagate that into the buffers appropriately.
*/
-static int ext4_journalled_set_page_dirty(struct page *page)
+static bool ext4_journalled_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- SetPageChecked(page);
- return __set_page_dirty_nobuffers(page);
+ WARN_ON_ONCE(!page_has_buffers(&folio->page));
+ folio_set_checked(folio);
+ return filemap_dirty_folio(mapping, folio);
}
-static int ext4_set_page_dirty(struct page *page)
+static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
- WARN_ON_ONCE(!page_has_buffers(page));
- return __set_page_dirty_buffers(page);
+ WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio));
+ WARN_ON_ONCE(!folio_buffers(folio));
+ return block_dirty_folio(mapping, folio);
}
static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
@@ -3580,9 +3615,9 @@ static const struct address_space_operations ext4_aops = {
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_write_end,
- .set_page_dirty = ext4_set_page_dirty,
+ .dirty_folio = ext4_dirty_folio,
.bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
+ .invalidate_folio = ext4_invalidate_folio,
.releasepage = ext4_releasepage,
.direct_IO = noop_direct_IO,
.migratepage = buffer_migrate_page,
@@ -3598,9 +3633,9 @@ static const struct address_space_operations ext4_journalled_aops = {
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_journalled_write_end,
- .set_page_dirty = ext4_journalled_set_page_dirty,
+ .dirty_folio = ext4_journalled_dirty_folio,
.bmap = ext4_bmap,
- .invalidatepage = ext4_journalled_invalidatepage,
+ .invalidate_folio = ext4_journalled_invalidate_folio,
.releasepage = ext4_releasepage,
.direct_IO = noop_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate,
@@ -3615,9 +3650,9 @@ static const struct address_space_operations ext4_da_aops = {
.writepages = ext4_writepages,
.write_begin = ext4_da_write_begin,
.write_end = ext4_da_write_end,
- .set_page_dirty = ext4_set_page_dirty,
+ .dirty_folio = ext4_dirty_folio,
.bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
+ .invalidate_folio = ext4_invalidate_folio,
.releasepage = ext4_releasepage,
.direct_IO = noop_direct_IO,
.migratepage = buffer_migrate_page,
@@ -3629,9 +3664,8 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
.bmap = ext4_bmap,
- .invalidatepage = noop_invalidatepage,
.swap_activate = ext4_iomap_swap_activate,
};
@@ -3979,7 +4013,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
}
- /* Wait all existing dio workers, newcomers will block on i_mutex */
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
/*
@@ -4129,7 +4163,7 @@ int ext4_truncate(struct inode *inode)
/*
* There is a possibility that we're either freeing the inode
* or it's a completely new inode. In those cases we might not
- * have i_mutex locked because it's not necessary.
+ * have i_rwsem locked because it's not necessary.
*/
if (!(inode->i_state & (I_NEW|I_FREEING)))
WARN_ON(!inode_is_locked(inode));
@@ -5204,13 +5238,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
- * buffers that are attached to a page stradding i_size and are undergoing
+ * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate
+ * buffers that are attached to a folio straddling i_size and are undergoing
* commit. In that case we have to wait for commit to finish and try again.
*/
static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
- struct page *page;
unsigned offset;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = 0;
@@ -5218,25 +5251,25 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
offset = inode->i_size & (PAGE_SIZE - 1);
/*
- * If the page is fully truncated, we don't need to wait for any commit
- * (and we even should not as __ext4_journalled_invalidatepage() may
- * strip all buffers from the page but keep the page dirty which can then
- * confuse e.g. concurrent ext4_writepage() seeing dirty page without
+ * If the folio is fully truncated, we don't need to wait for any commit
+ * (and we even should not as __ext4_journalled_invalidate_folio() may
+ * strip all buffers from the folio but keep the folio dirty which can then
+ * confuse e.g. concurrent ext4_writepage() seeing dirty folio without
* buffers). Also we don't need to wait for any commit if all buffers in
- * the page remain valid. This is most beneficial for the common case of
+ * the folio remain valid. This is most beneficial for the common case of
* blocksize == PAGESIZE.
*/
if (!offset || offset > (PAGE_SIZE - i_blocksize(inode)))
return;
while (1) {
- page = find_lock_page(inode->i_mapping,
+ struct folio *folio = filemap_lock_folio(inode->i_mapping,
inode->i_size >> PAGE_SHIFT);
- if (!page)
+ if (!folio)
return;
- ret = __ext4_journalled_invalidatepage(page, offset,
- PAGE_SIZE - offset);
- unlock_page(page);
- put_page(page);
+ ret = __ext4_journalled_invalidate_folio(folio, offset,
+ folio_size(folio) - offset);
+ folio_unlock(folio);
+ folio_put(folio);
if (ret != -EBUSY)
return;
commit_tid = 0;
@@ -5271,7 +5304,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
* transaction are already on disk (truncate waits for pages under
* writeback).
*
- * Called with inode->i_mutex down.
+ * Called with inode->i_rwsem down.
*/
int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct iattr *attr)
@@ -5983,7 +6016,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);
ext4_fc_mark_ineligible(inode->i_sb,
- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle);
ext4_journal_stop(handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bbbedf27b71c..992229ca2d83 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -269,7 +269,7 @@ out:
return err ? err : 0;
}
-/**
+/*
* Swap memory between @a and @b for @len bytes.
*
* @a: pointer to first memory area
@@ -290,7 +290,7 @@ static void memswap(void *a, void *b, size_t len)
}
}
-/**
+/*
* Swap i_data and associated attributes between @inode1 and @inode2.
* This function is used for the primary swap between inode1 and inode2
* and also to revert this primary swap in case of errors.
@@ -344,7 +344,7 @@ void ext4_reset_inode_seed(struct inode *inode)
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen));
}
-/**
+/*
* Swap the information from the given @inode and the inode
* EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
* important fields of the inodes.
@@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = -EINVAL;
goto err_out;
}
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1373,7 +1373,7 @@ mext_out:
err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) {
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
+ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9f86dd947032..252c168454c7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
return 0;
if (ac->ac_criteria >= 2)
return 0;
- if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
+ if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
return 0;
return 1;
}
@@ -1689,7 +1689,7 @@ static int mb_test_and_clear_bits(void *bm, int cur, int len)
return zero_bit;
}
-void ext4_set_bits(void *bm, int cur, int len)
+void mb_set_bits(void *bm, int cur, int len)
{
__u32 *addr;
@@ -1996,7 +1996,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
- ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
+ mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
mb_check_buddy(e4b);
return ret;
@@ -3825,7 +3825,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
* We leak some of the blocks here.
*/
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+ mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3844,7 +3844,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
}
}
#endif
- ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+ mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
if (ext4_has_group_desc_csum(sb) &&
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
@@ -3899,69 +3899,103 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i, clen, err;
+ int i, err;
int already;
+ unsigned int clen, clen_changed, thisgrp_len;
- clen = EXT4_B2C(sbi, len);
+ while (len > 0) {
+ ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
- ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
- bitmap_bh = NULL;
- goto out_err;
- }
+ /*
+ * Check to see if we are freeing blocks across a group
+ * boundary.
+ * In case of flex_bg, this can happen that (block, len) may
+ * span across more than one group. In that case we need to
+ * get the corresponding group metadata to work with.
+ * For this we have goto again loop.
+ */
+ thisgrp_len = min_t(unsigned int, (unsigned int)len,
+ EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
+ clen = EXT4_NUM_B2C(sbi, thisgrp_len);
+
+ if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) {
+ ext4_error(sb, "Marking blocks in system zone - "
+ "Block = %llu, len = %u",
+ block, thisgrp_len);
+ bitmap_bh = NULL;
+ break;
+ }
- err = -EIO;
- gdp = ext4_get_group_desc(sb, group, &gdp_bh);
- if (!gdp)
- goto out_err;
+ bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (IS_ERR(bitmap_bh)) {
+ err = PTR_ERR(bitmap_bh);
+ bitmap_bh = NULL;
+ break;
+ }
- ext4_lock_group(sb, group);
- already = 0;
- for (i = 0; i < clen; i++)
- if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
- already++;
+ err = -EIO;
+ gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp)
+ break;
- if (state)
- ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
- else
- mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
- if (ext4_has_group_desc_csum(sb) &&
- (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb,
- group, gdp));
- }
- if (state)
- clen = ext4_free_group_clusters(sb, gdp) - clen + already;
- else
- clen = ext4_free_group_clusters(sb, gdp) + clen - already;
+ ext4_lock_group(sb, group);
+ already = 0;
+ for (i = 0; i < clen; i++)
+ if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
+ !state)
+ already++;
+
+ clen_changed = clen - already;
+ if (state)
+ mb_set_bits(bitmap_bh->b_data, blkoff, clen);
+ else
+ mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_clusters_after_init(sb, group, gdp));
+ }
+ if (state)
+ clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
+ else
+ clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
- ext4_free_group_clusters_set(sb, gdp, clen);
- ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
- ext4_group_desc_csum_set(sb, group, gdp);
+ ext4_free_group_clusters_set(sb, gdp, clen);
+ ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
+ ext4_group_desc_csum_set(sb, group, gdp);
- ext4_unlock_group(sb, group);
+ ext4_unlock_group(sb, group);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, group);
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group);
+ struct flex_groups *fg = sbi_array_rcu_deref(sbi,
+ s_flex_groups, flex_group);
- atomic64_sub(len,
- &sbi_array_rcu_deref(sbi, s_flex_groups,
- flex_group)->free_clusters);
+ if (state)
+ atomic64_sub(clen_changed, &fg->free_clusters);
+ else
+ atomic64_add(clen_changed, &fg->free_clusters);
+
+ }
+
+ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (err)
+ break;
+ sync_dirty_buffer(bitmap_bh);
+ err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+ sync_dirty_buffer(gdp_bh);
+ if (err)
+ break;
+
+ block += thisgrp_len;
+ len -= thisgrp_len;
+ brelse(bitmap_bh);
+ BUG_ON(len < 0);
}
- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
if (err)
- goto out_err;
- sync_dirty_buffer(bitmap_bh);
- err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
- sync_dirty_buffer(gdp_bh);
-
-out_err:
- brelse(bitmap_bh);
+ brelse(bitmap_bh);
}
/*
@@ -4433,7 +4467,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
while (n) {
entry = rb_entry(n, struct ext4_free_data, efd_node);
- ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
+ mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
n = rb_next(n);
}
return;
@@ -4474,7 +4508,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
if (unlikely(len == 0))
continue;
BUG_ON(groupnr != group);
- ext4_set_bits(bitmap, start, len);
+ mb_set_bits(bitmap, start, len);
preallocated += len;
}
mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
@@ -5753,7 +5787,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
struct super_block *sb = ar->inode->i_sb;
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i = sb->s_blocksize;
+ ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+ ext4_grpblk_t i = 0;
ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -5775,19 +5810,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
ext4_get_group_no_and_offset(sb,
max(ext4_group_first_block_no(sb, group), goal),
NULL, &blkoff);
- i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
+ while (1) {
+ i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
blkoff);
+ if (i >= max)
+ break;
+ if (ext4_fc_replay_check_excluded(sb,
+ ext4_group_first_block_no(sb, group) + i)) {
+ blkoff = i + 1;
+ } else
+ break;
+ }
brelse(bitmap_bh);
- if (i >= sb->s_blocksize)
- continue;
- if (ext4_fc_replay_check_excluded(sb,
- ext4_group_first_block_no(sb, group) + i))
- continue;
- break;
+ if (i < max)
+ break;
}
- if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
+ if (group >= ext4_get_groups_count(sb) || i >= max) {
+ *errp = -ENOSPC;
return 0;
+ }
block = ext4_group_first_block_no(sb, group) + i;
ext4_mb_mark_bb(sb, block, 1, 1);
@@ -5838,17 +5880,17 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
}
/**
- * ext4_free_blocks() -- Free given blocks and update quota
+ * ext4_mb_clear_bb() -- helper function for freeing blocks.
+ * Used by ext4_free_blocks()
* @handle: handle for this transaction
* @inode: inode
- * @bh: optional buffer of the block to be freed
* @block: starting physical block to be freed
* @count: number of blocks to be freed
* @flags: flags used by ext4_free_blocks
*/
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t block,
- unsigned long count, int flags)
+static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t block, unsigned long count,
+ int flags)
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
@@ -5865,80 +5907,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
- if (sbi->s_mount_state & EXT4_FC_REPLAY) {
- ext4_free_blocks_simple(inode, block, count);
- return;
- }
-
- might_sleep();
- if (bh) {
- if (block)
- BUG_ON(block != bh->b_blocknr);
- else
- block = bh->b_blocknr;
- }
-
- if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_inode_block_valid(inode, block, count)) {
- ext4_error(sb, "Freeing blocks not in datazone - "
- "block = %llu, count = %lu", block, count);
- goto error_return;
- }
-
- ext4_debug("freeing block %llu\n", block);
- trace_ext4_free_blocks(inode, block, count, flags);
-
- if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
- BUG_ON(count > 1);
-
- ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
- inode, bh, block);
- }
-
- /*
- * If the extent to be freed does not begin on a cluster
- * boundary, we need to deal with partial clusters at the
- * beginning and end of the extent. Normally we will free
- * blocks at the beginning or the end unless we are explicitly
- * requested to avoid doing so.
- */
- overflow = EXT4_PBLK_COFF(sbi, block);
- if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
- overflow = sbi->s_cluster_ratio - overflow;
- block += overflow;
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else {
- block -= overflow;
- count += overflow;
- }
- }
- overflow = EXT4_LBLK_COFF(sbi, count);
- if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else
- count += sbi->s_cluster_ratio - overflow;
- }
-
- if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
- int i;
- int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
-
- for (i = 0; i < count; i++) {
- cond_resched();
- if (is_metadata)
- bh = sb_find_get_block(inode->i_sb, block + i);
- ext4_forget(handle, is_metadata, inode, bh, block + i);
- }
- }
-
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -5969,13 +5937,7 @@ do_more:
goto error_return;
}
- if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
- in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
- in_range(block, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group) ||
- in_range(block + count - 1, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group)) {
-
+ if (!ext4_inode_block_valid(inode, block, count)) {
ext4_error(sb, "Freeing blocks in system zone - "
"Block = %llu, count = %lu", block, count);
/* err = 0. ext4_std_error should be a no op */
@@ -6046,7 +6008,7 @@ do_more:
NULL);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
- " group:%d block:%d count:%lu failed"
+ " group:%u block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
} else
@@ -6107,6 +6069,103 @@ error_return:
}
/**
+ * ext4_free_blocks() -- Free given blocks and update quota
+ * @handle: handle for this transaction
+ * @inode: inode
+ * @bh: optional buffer of the block to be freed
+ * @block: starting physical block to be freed
+ * @count: number of blocks to be freed
+ * @flags: flags used by ext4_free_blocks
+ */
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t block,
+ unsigned long count, int flags)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned int overflow;
+ struct ext4_sb_info *sbi;
+
+ sbi = EXT4_SB(sb);
+
+ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+ ext4_free_blocks_simple(inode, block, count);
+ return;
+ }
+
+ might_sleep();
+ if (bh) {
+ if (block)
+ BUG_ON(block != bh->b_blocknr);
+ else
+ block = bh->b_blocknr;
+ }
+
+ if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+ !ext4_inode_block_valid(inode, block, count)) {
+ ext4_error(sb, "Freeing blocks not in datazone - "
+ "block = %llu, count = %lu", block, count);
+ return;
+ }
+
+ ext4_debug("freeing block %llu\n", block);
+ trace_ext4_free_blocks(inode, block, count, flags);
+
+ if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+ BUG_ON(count > 1);
+
+ ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
+ inode, bh, block);
+ }
+
+ /*
+ * If the extent to be freed does not begin on a cluster
+ * boundary, we need to deal with partial clusters at the
+ * beginning and end of the extent. Normally we will free
+ * blocks at the beginning or the end unless we are explicitly
+ * requested to avoid doing so.
+ */
+ overflow = EXT4_PBLK_COFF(sbi, block);
+ if (overflow) {
+ if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
+ overflow = sbi->s_cluster_ratio - overflow;
+ block += overflow;
+ if (count > overflow)
+ count -= overflow;
+ else
+ return;
+ } else {
+ block -= overflow;
+ count += overflow;
+ }
+ }
+ overflow = EXT4_LBLK_COFF(sbi, count);
+ if (overflow) {
+ if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
+ if (count > overflow)
+ count -= overflow;
+ else
+ return;
+ } else
+ count += sbi->s_cluster_ratio - overflow;
+ }
+
+ if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+ int i;
+ int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
+
+ for (i = 0; i < count; i++) {
+ cond_resched();
+ if (is_metadata)
+ bh = sb_find_get_block(inode->i_sb, block + i);
+ ext4_forget(handle, is_metadata, inode, bh, block + i);
+ }
+ }
+
+ ext4_mb_clear_bb(handle, inode, block, count, flags);
+ return;
+}
+
+/**
* ext4_group_add_blocks() -- Add given blocks to an existing group
* @handle: handle to this transaction
* @sb: super block
@@ -6162,11 +6221,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
goto error_return;
}
- if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
- in_range(ext4_inode_bitmap(sb, desc), block, count) ||
- in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
- in_range(block + count - 1, ext4_inode_table(sb, desc),
- sbi->s_itb_per_group)) {
+ if (!ext4_sb_block_valid(sb, NULL, block, count)) {
ext4_error(sb, "Adding blocks in system zones - "
"Block = %llu, count = %lu",
block, count);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ff8916e1d38e..7a5353a8cfd7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -485,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode)
* when we add extents we extent the journal
*/
/*
- * Even though we take i_mutex we can still cause block
+ * Even though we take i_rwsem we can still cause block
* allocation via mmap write to holes. If we have allocated
* new blocks we fail migrate. New block allocation will
* clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 52c9bd154122..e37da8d5cd0c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1317,7 +1317,7 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
dx_set_count(entries, count + 1);
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for. If quick is set, assume the name being looked up
@@ -1428,7 +1428,7 @@ static bool ext4_match(struct inode *parent,
f.crypto_buf = fname->crypto_buf;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
if (fname->cf_name.name) {
@@ -1800,7 +1800,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
}
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
@@ -2308,7 +2308,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
@@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode)
if (inode->i_size < ext4_dir_rec_len(1, NULL) +
ext4_dir_rec_len(2, NULL)) {
EXT4_ERROR_INODE(inode, "invalid size");
- return true;
+ return false;
}
/* The first directory block must not be a hole,
* so treat it as DIRENT_HTREE
*/
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
- return true;
+ return false;
de = (struct ext4_dir_entry_2 *) bh->b_data;
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
@@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode)
le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
ext4_warning_inode(inode, "directory missing '.'");
brelse(bh);
- return true;
+ return false;
}
offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
de = ext4_next_entry(de, sb->s_blocksize);
@@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode)
le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
ext4_warning_inode(inode, "directory missing '..'");
brelse(bh);
- return true;
+ return false;
}
offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
while (offset < inode->i_size) {
@@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode)
continue;
}
if (IS_ERR(bh))
- return true;
+ return false;
}
de = (struct ext4_dir_entry_2 *) (bh->b_data +
(offset & (sb->s_blocksize - 1)));
@@ -3126,7 +3126,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
ext4_fc_track_unlink(handle, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
@@ -3231,7 +3231,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
if (!retval)
ext4_fc_track_unlink(handle, dentry);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
@@ -3889,14 +3889,21 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* dirents in directories.
*/
ext4_fc_mark_ineligible(old.inode->i_sb,
- EXT4_FC_REASON_RENAME_DIR);
+ EXT4_FC_REASON_RENAME_DIR, handle);
} else {
+ struct super_block *sb = old.inode->i_sb;
+
if (new.inode)
ext4_fc_track_unlink(handle, new.dentry);
- __ext4_fc_track_link(handle, old.inode, new.dentry);
- __ext4_fc_track_unlink(handle, old.inode, old.dentry);
- if (whiteout)
- __ext4_fc_track_create(handle, whiteout, old.dentry);
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
+ !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
+ __ext4_fc_track_link(handle, old.inode, new.dentry);
+ __ext4_fc_track_unlink(handle, old.inode, old.dentry);
+ if (whiteout)
+ __ext4_fc_track_create(handle, whiteout,
+ old.dentry);
+ }
}
if (new.inode) {
@@ -4049,7 +4056,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(retval))
goto end_rename;
ext4_fc_mark_ineligible(new.inode->i_sb,
- EXT4_FC_REASON_CROSS_RENAME);
+ EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval)
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index 53adc8f570a3..7de0612eb42d 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
* At filesystem recovery time, we walk this list deleting unlinked
* inodes and truncating linked inodes in ext4_orphan_cleanup().
*
- * Orphan list manipulation functions must be called under i_mutex unless
+ * Orphan list manipulation functions must be called under i_rwsem unless
* we are just creating the inode or deleting it.
*/
int ext4_orphan_add(handle_t *handle, struct inode *inode)
@@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/*
* Orphan handling is only valid for files with data blocks
* being truncated, or files being unlinked. Note that we either
- * hold i_mutex, or the inode can not be referenced from outside,
+ * hold i_rwsem, or the inode can not be referenced from outside,
* so i_nlink should not be bumped due to race
*/
ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1d370364230e..17bb78ebd784 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -323,10 +323,9 @@ static void ext4_end_bio(struct bio *bio)
{
ext4_io_end_t *io_end = bio->bi_private;
sector_t bi_sector = bio->bi_iter.bi_sector;
- char b[BDEVNAME_SIZE];
- if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
- bio_devname(bio, b),
+ if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n",
+ bio->bi_bdev,
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
bio->bi_status)) {
@@ -372,10 +371,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
struct bio *bio = io->io_bio;
if (bio) {
- int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
- REQ_SYNC : 0;
+ if (io->io_wbc->sync_mode == WB_SYNC_ALL)
+ io->io_bio->bi_opf |= REQ_SYNC;
io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
- bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
io->io_bio = NULL;
@@ -398,10 +396,9 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
* bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
*/
- bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+ bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO);
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 4cd62f1d848c..1aa26d6634fc 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -365,15 +365,15 @@ int ext4_mpage_readpages(struct inode *inode,
* bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
*/
- bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages));
+ bio = bio_alloc(bdev, bio_max_segs(nr_pages),
+ REQ_OP_READ, GFP_KERNEL);
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, page->index);
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ,
- rac ? REQ_RAHEAD : 0);
+ if (rac)
+ bio->bi_opf |= REQ_RAHEAD;
}
length = first_hole << blkbits;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ee8f02f406cb..90a941d20dff 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -14,6 +14,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/jiffies.h>
#include "ext4_jbd2.h"
@@ -483,7 +484,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
}
ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
first_cluster, first_cluster - start, count2);
- ext4_set_bits(bh->b_data, first_cluster - start, count2);
+ mb_set_bits(bh->b_data, first_cluster - start, count2);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
@@ -632,7 +633,7 @@ handle_bb:
if (overhead != 0) {
ext4_debug("mark backup superblock %#04llx (+0)\n",
start);
- ext4_set_bits(bh->b_data, 0,
+ mb_set_bits(bh->b_data, 0,
EXT4_NUM_B2C(sbi, overhead));
}
ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
@@ -2100,7 +2101,7 @@ retry:
*/
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
flexbg_size)) {
- if (jiffies - last_update_time > HZ * 10) {
+ if (time_is_before_jiffies(last_update_time + HZ * 10)) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
"resized to %llu blocks",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eee0d9ebfa6c..81749eaddf4c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1301,7 +1301,7 @@ static void ext4_put_super(struct super_block *sb)
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
@@ -1316,7 +1316,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
{
struct ext4_inode_info *ei;
- ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
@@ -1961,7 +1961,7 @@ static const struct mount_opts {
{Opt_err, 0, 0}
};
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
static const struct ext4_sb_encodings {
__u16 magic;
char *name;
@@ -2021,12 +2021,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
#define EXT4_SPEC_s_commit_interval (1 << 16)
#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
#define EXT4_SPEC_s_sb_block (1 << 18)
+#define EXT4_SPEC_mb_optimize_scan (1 << 19)
struct ext4_fs_context {
char *s_qf_names[EXT4_MAXQUOTAS];
char *test_dummy_enc_arg;
int s_jquota_fmt; /* Format of quota to use */
- int mb_optimize_scan;
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
#endif
@@ -2045,8 +2045,8 @@ struct ext4_fs_context {
unsigned int mask_s_mount_opt;
unsigned int vals_s_mount_opt2;
unsigned int mask_s_mount_opt2;
- unsigned int vals_s_mount_flags;
- unsigned int mask_s_mount_flags;
+ unsigned long vals_s_mount_flags;
+ unsigned long mask_s_mount_flags;
unsigned int opt_flags; /* MOPT flags */
unsigned int spec;
u32 s_max_batch_time;
@@ -2149,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
{ \
ctx->mask_s_##name |= flag; \
ctx->vals_s_##name |= flag; \
-} \
+}
+
+#define EXT4_CLEAR_CTX(name) \
static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \
unsigned long flag) \
{ \
ctx->mask_s_##name |= flag; \
ctx->vals_s_##name &= ~flag; \
-} \
+}
+
+#define EXT4_TEST_CTX(name) \
static inline unsigned long \
ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
{ \
return (ctx->vals_s_##name & flag); \
-} \
+}
-EXT4_SET_CTX(flags);
+EXT4_SET_CTX(flags); /* set only */
EXT4_SET_CTX(mount_opt);
+EXT4_CLEAR_CTX(mount_opt);
+EXT4_TEST_CTX(mount_opt);
EXT4_SET_CTX(mount_opt2);
-EXT4_SET_CTX(mount_flags);
+EXT4_CLEAR_CTX(mount_opt2);
+EXT4_TEST_CTX(mount_opt2);
+
+static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
+{
+ set_bit(bit, &ctx->mask_s_mount_flags);
+ set_bit(bit, &ctx->vals_s_mount_flags);
+}
static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
@@ -2235,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
param->key);
return 0;
case Opt_abort:
- ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED);
+ ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
return 0;
case Opt_i_version:
ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20");
@@ -2451,12 +2464,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx_clear_mount_opt(ctx, m->mount_opt);
return 0;
case Opt_mb_optimize_scan:
- if (result.int_32 != 0 && result.int_32 != 1) {
+ if (result.int_32 == 1) {
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+ } else if (result.int_32 == 0) {
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+ } else {
ext4_msg(NULL, KERN_WARNING,
"mb_optimize_scan should be set to 0 or 1.");
return -EINVAL;
}
- ctx->mb_optimize_scan = result.int_32;
return 0;
}
@@ -3468,8 +3486,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
*/
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
- unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
+ loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
int meta_blocks;
+ unsigned int ppb = 1 << (bits - 2);
/*
* This is calculated to be the largest file size for a dense, block
@@ -3501,27 +3520,42 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
}
+ /* Compute how many blocks we can address by block tree */
+ res += ppb;
+ res += ppb * ppb;
+ res += ((loff_t)ppb) * ppb * ppb;
+ /* Compute how many metadata blocks are needed */
+ meta_blocks = 1;
+ meta_blocks += 1 + ppb;
+ meta_blocks += 1 + ppb + ppb * ppb;
+ /* Does block tree limit file size? */
+ if (res + meta_blocks <= upper_limit)
+ goto check_lfs;
+
+ res = upper_limit;
+ /* How many metadata blocks are needed for addressing upper_limit? */
+ upper_limit -= EXT4_NDIR_BLOCKS;
/* indirect blocks */
meta_blocks = 1;
+ upper_limit -= ppb;
/* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
+ if (upper_limit < ppb * ppb) {
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
+ res -= meta_blocks;
+ goto check_lfs;
+ }
+ meta_blocks += 1 + ppb;
+ upper_limit -= ppb * ppb;
+ /* tripple indirect blocks for the rest */
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
+ DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
+ res -= meta_blocks;
+check_lfs:
res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
- return (loff_t)res;
+ return res;
}
static ext4_fsblk_t descriptor_loc(struct super_block *sb,
@@ -3606,7 +3640,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
-#ifndef CONFIG_UNICODE
+#if !IS_ENABLED(CONFIG_UNICODE)
if (ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR,
"Filesystem with casefold feature cannot be "
@@ -4369,7 +4403,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
/* Set defaults for the variables that will be set during parsing */
ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
- ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sectors_written_start =
@@ -4610,7 +4643,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err < 0)
goto failed_mount;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
@@ -5082,7 +5115,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
@@ -5320,12 +5353,12 @@ no_journal:
* turned off by passing "mb_optimize_scan=0". This can also be
* turned on forcefully by passing "mb_optimize_scan=1".
*/
- if (ctx->mb_optimize_scan == 1)
- set_opt2(sb, MB_OPTIMIZE_SCAN);
- else if (ctx->mb_optimize_scan == 0)
- clear_opt2(sb, MB_OPTIMIZE_SCAN);
- else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
- set_opt2(sb, MB_OPTIMIZE_SCAN);
+ if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
+ if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
+ set_opt2(sb, MB_OPTIMIZE_SCAN);
+ else
+ clear_opt2(sb, MB_OPTIMIZE_SCAN);
+ }
err = ext4_mb_init(sb);
if (err) {
@@ -5514,7 +5547,7 @@ failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
@@ -5540,7 +5573,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
sbi = ext4_alloc_sbi(sb);
if (!sbi)
- ret = -ENOMEM;
+ return -ENOMEM;
fc->s_fs_info = sbi;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f61e65ae27d8..d233c24ea342 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -309,7 +309,7 @@ EXT4_ATTR_FEATURE(meta_bg_resize);
EXT4_ATTR_FEATURE(encryption);
EXT4_ATTR_FEATURE(test_dummy_encryption_v2);
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
EXT4_ATTR_FEATURE(casefold);
#endif
#ifdef CONFIG_FS_VERITY
@@ -317,7 +317,7 @@ EXT4_ATTR_FEATURE(verity);
#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
EXT4_ATTR_FEATURE(fast_commit);
-#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
+#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
EXT4_ATTR_FEATURE(encrypted_casefold);
#endif
@@ -329,7 +329,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(encryption),
ATTR_LIST(test_dummy_encryption_v2),
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
ATTR_LIST(casefold),
#endif
#ifdef CONFIG_FS_VERITY
@@ -337,7 +337,7 @@ static struct attribute *ext4_feat_attrs[] = {
#endif
ATTR_LIST(metadata_csum_seed),
ATTR_LIST(fast_commit),
-#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
+#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
ATTR_LIST(encrypted_casefold),
#endif
NULL,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e0fc1ed845b..042325349098 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2408,7 +2408,7 @@ retry_inode:
if (IS_SYNC(inode))
ext4_handle_sync(handle);
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
cleanup:
brelse(is.iloc.bh);
@@ -2486,7 +2486,7 @@ retry:
if (error == 0)
error = error2;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
return error;
}
@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error);
goto cleanup;
}
- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
}
error = 0;
cleanup:
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index f46a7339d6cf..03ef087537c7 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -143,3 +143,10 @@ config F2FS_IOSTAT
Support getting IO statistics through sysfs and printing out periodic
IO statistics tracepoint events. You have to turn on "iostat_enable"
sysfs node to enable this feature.
+
+config F2FS_UNFAIR_RWSEM
+ bool "F2FS unfair rw_semaphore"
+ depends on F2FS_FS && BLK_CGROUP
+ help
+ Use unfair rw_semaphore, if system configured IO priority by block
+ cgroup.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 16e826e01f09..eaa240b21f07 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -204,8 +204,9 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu)
return __f2fs_get_acl(inode, type, NULL);
}
-static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
- struct posix_acl **acl)
+static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
+ struct inode *inode, umode_t *mode_p,
+ struct posix_acl **acl)
{
umode_t mode = inode->i_mode;
int error;
@@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
- !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
+ if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
}
-static int __f2fs_set_acl(struct inode *inode, int type,
+static int __f2fs_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int type,
struct posix_acl *acl, struct page *ipage)
{
int name_index;
@@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl && !ipage) {
- error = f2fs_acl_update_mode(inode, &mode, &acl);
+ error = f2fs_acl_update_mode(mnt_userns, inode,
+ &mode, &acl);
if (error)
return error;
set_acl_inode(inode, mode);
@@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
- return __f2fs_set_acl(inode, type, acl, NULL);
+ return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL);
}
/*
@@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
f2fs_mark_inode_dirty_sync(inode, true);
if (default_acl) {
- error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
+ error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl,
ipage);
posix_acl_release(default_acl);
} else {
@@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
}
if (acl) {
if (!error)
- error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
+ error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl,
ipage);
posix_acl_release(acl);
} else {
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 982f0170639f..a8fc4fa511a8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -98,6 +98,13 @@ repeat:
}
if (unlikely(!PageUptodate(page))) {
+ if (page->index == sbi->metapage_eio_ofs &&
+ sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) {
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ } else {
+ sbi->metapage_eio_ofs = page->index;
+ sbi->metapage_eio_cnt = 0;
+ }
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
@@ -282,18 +289,22 @@ out:
return blkno - start;
}
-void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
+ unsigned int ra_blocks)
{
struct page *page;
bool readahead = false;
+ if (ra_blocks == RECOVERY_MIN_RA_BLOCKS)
+ return;
+
page = find_get_page(META_MAPPING(sbi), index);
if (!page || !PageUptodate(page))
readahead = true;
f2fs_put_page(page, 0);
if (readahead)
- f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true);
+ f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true);
}
static int __f2fs_write_meta_page(struct page *page,
@@ -351,13 +362,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
- if (!down_write_trylock(&sbi->cp_global_sem))
+ if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
- up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->cp_global_sem);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -436,26 +447,27 @@ stop:
return nwritten;
}
-static int f2fs_set_meta_page_dirty(struct page *page)
+static bool f2fs_dirty_meta_folio(struct address_space *mapping,
+ struct folio *folio)
{
- trace_f2fs_set_page_dirty(page, META);
-
- if (!PageUptodate(page))
- SetPageUptodate(page);
- if (!PageDirty(page)) {
- __set_page_dirty_nobuffers(page);
- inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
- set_page_private_reference(page);
- return 1;
+ trace_f2fs_set_page_dirty(&folio->page, META);
+
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
+ if (!folio_test_dirty(folio)) {
+ filemap_dirty_folio(mapping, folio);
+ inc_page_count(F2FS_P_SB(&folio->page), F2FS_DIRTY_META);
+ set_page_private_reference(&folio->page);
+ return true;
}
- return 0;
+ return false;
}
const struct address_space_operations f2fs_meta_aops = {
.writepage = f2fs_write_meta_page,
.writepages = f2fs_write_meta_pages,
- .set_page_dirty = f2fs_set_meta_page_dirty,
- .invalidatepage = f2fs_invalidate_page,
+ .dirty_folio = f2fs_dirty_meta_folio,
+ .invalidate_folio = f2fs_invalidate_folio,
.releasepage = f2fs_release_page,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
@@ -864,6 +876,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
struct f2fs_checkpoint *cp_block = NULL;
unsigned long long cur_version = 0, pre_version = 0;
+ unsigned int cp_blocks;
int err;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
@@ -871,15 +884,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (err)
return NULL;
- if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
- sbi->blocks_per_seg) {
+ cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
+
+ if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
- cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+ cp_addr += cp_blocks - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
@@ -1014,7 +1028,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
-void f2fs_update_dirty_page(struct inode *inode, struct page *page)
+void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
@@ -1029,7 +1043,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- set_page_private_reference(page);
+ set_page_private_reference(&folio->page);
}
void f2fs_remove_dirty_inode(struct inode *inode)
@@ -1159,7 +1173,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
if (!is_journalled_quota(sbi))
return false;
- if (!down_write_trylock(&sbi->quota_sem))
+ if (!f2fs_down_write_trylock(&sbi->quota_sem))
return true;
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
ret = false;
@@ -1171,7 +1185,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
ret = true;
}
- up_write(&sbi->quota_sem);
+ f2fs_up_write(&sbi->quota_sem);
return ret;
}
@@ -1228,10 +1242,10 @@ retry_flush_dents:
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
- down_write(&sbi->node_change);
+ f2fs_down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
@@ -1241,15 +1255,15 @@ retry_flush_dents:
}
retry_flush_nodes:
- down_write(&sbi->node_write);
+ f2fs_down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- up_write(&sbi->node_write);
+ f2fs_up_write(&sbi->node_write);
atomic_inc(&sbi->wb_sync_req[NODE]);
err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
return err;
}
@@ -1262,13 +1276,13 @@ retry_flush_nodes:
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
return err;
}
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- up_write(&sbi->node_write);
+ f2fs_up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -1543,6 +1557,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
+ percpu_counter_set(&sbi->rf_node_block_count, 0);
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
@@ -1612,7 +1627,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_warn(sbi, "Start checkpoint disabled!");
}
if (cpc->reason != CP_RESIZE)
- down_write(&sbi->cp_global_sem);
+ f2fs_down_write(&sbi->cp_global_sem);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1693,7 +1708,7 @@ stop:
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
if (cpc->reason != CP_RESIZE)
- up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->cp_global_sem);
return err;
}
@@ -1741,9 +1756,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
struct cp_control cpc = { .reason = CP_SYNC, };
int err;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
return err;
}
@@ -1831,9 +1846,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
int ret;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
ret = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
return ret;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d0c3aeba5945..12a56f9e1572 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
}
if (ret != PAGE_SIZE << dic->log_cluster_size) {
- printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, "
+ printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, "
"expected:%lu\n", KERN_ERR,
- F2FS_I_SB(dic->inode)->sb->s_id,
- dic->rlen,
+ F2FS_I_SB(dic->inode)->sb->s_id, ret,
PAGE_SIZE << dic->log_cluster_size);
return -EIO;
}
@@ -1267,7 +1266,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
- down_read(&sbi->node_write);
+ f2fs_down_read(&sbi->node_write);
} else if (!f2fs_trylock_op(sbi)) {
goto out_free;
}
@@ -1384,7 +1383,7 @@ unlock_continue:
f2fs_put_dnode(&dn);
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
@@ -1410,7 +1409,7 @@ out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
out_free:
@@ -1505,9 +1504,7 @@ continue_unlock:
if (IS_NOQUOTA(cc->inode))
return 0;
ret = 0;
- cond_resched();
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry_write;
}
return ret;
@@ -1750,7 +1747,7 @@ unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
const struct address_space_operations f2fs_compress_aops = {
.releasepage = f2fs_release_page,
- .invalidatepage = f2fs_invalidate_page,
+ .invalidate_folio = f2fs_invalidate_folio,
};
struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c417864c66a..b3131fc8762d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -354,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio)
}
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
- block_t blk_addr, struct bio *bio)
+ block_t blk_addr, sector_t *sector)
{
struct block_device *bdev = sbi->sb->s_bdev;
int i;
@@ -369,10 +369,9 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
}
}
}
- if (bio) {
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
- }
+
+ if (sector)
+ *sector = SECTOR_FROM_BLOCK(blk_addr);
return bdev;
}
@@ -389,14 +388,40 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
return 0;
}
+static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
+{
+ unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
+ unsigned int fua_flag = io_flag & temp_mask;
+ unsigned int meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+
+ /*
+ * data/node io flag bits per temp:
+ * REQ_META | REQ_FUA |
+ * 5 | 4 | 3 | 2 | 1 | 0 |
+ * Cold | Warm | Hot | Cold | Warm | Hot |
+ */
+ if ((1 << fio->temp) & meta_flag)
+ fio->op_flags |= REQ_META;
+ if ((1 << fio->temp) & fua_flag)
+ fio->op_flags |= REQ_FUA;
+}
+
static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
{
struct f2fs_sb_info *sbi = fio->sbi;
+ struct block_device *bdev;
+ sector_t sector;
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset);
+ if (fio->type == DATA)
+ __attach_io_flag(fio, sbi->data_io_flag);
+ else if (fio->type == NODE)
+ __attach_io_flag(fio, sbi->node_io_flag);
- f2fs_target_device(sbi, fio->new_blkaddr, bio);
+ bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
+ bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags, GFP_NOIO,
+ &f2fs_bioset);
+ bio->bi_iter.bi_sector = sector;
if (is_read_io(fio->op)) {
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = NULL;
@@ -500,34 +525,6 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi,
__submit_bio(sbi, bio, type);
}
-static void __attach_io_flag(struct f2fs_io_info *fio)
-{
- struct f2fs_sb_info *sbi = fio->sbi;
- unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
- unsigned int io_flag, fua_flag, meta_flag;
-
- if (fio->type == DATA)
- io_flag = sbi->data_io_flag;
- else if (fio->type == NODE)
- io_flag = sbi->node_io_flag;
- else
- return;
-
- fua_flag = io_flag & temp_mask;
- meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
-
- /*
- * data/node io flag bits per temp:
- * REQ_META | REQ_FUA |
- * 5 | 4 | 3 | 2 | 1 | 0 |
- * Cold | Warm | Hot | Cold | Warm | Hot |
- */
- if ((1 << fio->temp) & meta_flag)
- fio->op_flags |= REQ_META;
- if ((1 << fio->temp) & fua_flag)
- fio->op_flags |= REQ_FUA;
-}
-
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
@@ -535,9 +532,6 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
- __attach_io_flag(fio);
- bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
-
if (is_read_io(fio->op))
trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
else
@@ -590,18 +584,17 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- down_write(&io->io_rwsem);
+ f2fs_down_write(&io->io_rwsem);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
- io->fio.op = REQ_OP_WRITE;
- io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
+ io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
if (!test_opt(sbi, NOBARRIER))
- io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+ io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
- up_write(&io->io_rwsem);
+ f2fs_up_write(&io->io_rwsem);
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -616,9 +609,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- down_read(&io->io_rwsem);
+ f2fs_down_read(&io->io_rwsem);
ret = __has_merged_page(io->bio, inode, page, ino);
- up_read(&io->io_rwsem);
+ f2fs_up_read(&io->io_rwsem);
}
if (ret)
__f2fs_submit_merged_write(sbi, type, temp);
@@ -679,9 +672,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc && !is_read_io(fio->op))
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
- __attach_io_flag(fio);
- bio_set_op_attrs(bio, fio->op, fio->op_flags);
-
inc_page_count(fio->sbi, is_read_io(fio->op) ?
__read_io_type(page): WB_DATA_TYPE(fio->page));
@@ -742,9 +732,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
f2fs_bug_on(sbi, 1);
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_add_tail(&be->list, &io->bio_list);
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
static void del_bio_entry(struct bio_entry *be)
@@ -766,7 +756,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
struct list_head *head = &io->bio_list;
struct bio_entry *be;
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (be->bio != *bio)
continue;
@@ -790,7 +780,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
__submit_bio(sbi, *bio, DATA);
break;
}
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
if (ret) {
@@ -816,7 +806,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (list_empty(head))
continue;
- down_read(&io->bio_list_lock);
+ f2fs_down_read(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
@@ -826,14 +816,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (found)
break;
}
- up_read(&io->bio_list_lock);
+ f2fs_up_read(&io->bio_list_lock);
if (!found)
continue;
found = false;
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
@@ -846,7 +836,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
break;
}
}
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
if (found)
@@ -875,10 +865,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_VECS);
- __attach_io_flag(fio);
f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
fio->page->index, fio, GFP_NOIO);
- bio_set_op_attrs(bio, fio->op, fio->op_flags);
add_bio_entry(fio->sbi, bio, page, fio->temp);
} else {
@@ -906,7 +894,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
f2fs_bug_on(sbi, is_read_io(fio->op));
- down_write(&io->io_rwsem);
+ f2fs_down_write(&io->io_rwsem);
next:
if (fio->in_list) {
spin_lock(&io->io_lock);
@@ -973,7 +961,7 @@ out:
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
- up_write(&io->io_rwsem);
+ f2fs_up_write(&io->io_rwsem);
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
@@ -984,17 +972,17 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
struct bio *bio;
struct bio_post_read_ctx *ctx = NULL;
unsigned int post_read_steps = 0;
+ sector_t sector;
+ struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
- bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
- bio_max_segs(nr_pages), &f2fs_bioset);
+ bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
+ REQ_OP_READ | op_flag,
+ for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
if (!bio)
return ERR_PTR(-ENOMEM);
-
+ bio->bi_iter.bi_sector = sector;
f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
-
- f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (fscrypt_inode_uses_fs_layer_crypto(inode))
post_read_steps |= STEP_DECRYPT;
@@ -1383,9 +1371,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
- down_read(&sbi->node_change);
+ f2fs_down_read(&sbi->node_change);
else
- up_read(&sbi->node_change);
+ f2fs_up_read(&sbi->node_change);
} else {
if (lock)
f2fs_lock_op(sbi);
@@ -2460,6 +2448,9 @@ static inline bool check_inplace_update_policy(struct inode *inode,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int policy = SM_I(sbi)->ipu_policy;
+ if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
+ is_inode_flag_set(inode, FI_OPU_WRITE))
+ return false;
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
@@ -2530,6 +2521,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
return true;
+ if (is_inode_flag_set(inode, FI_OPU_WRITE))
+ return true;
+
if (fio) {
if (page_private_gcing(fio->page))
return true;
@@ -2749,13 +2743,13 @@ write:
* the below discard race condition.
*/
if (IS_NOQUOTA(inode))
- down_read(&sbi->node_write);
+ f2fs_down_read(&sbi->node_write);
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
goto done;
}
@@ -3047,8 +3041,7 @@ result:
} else if (ret == -EAGAIN) {
ret = 0;
if (wbc->sync_mode == WB_SYNC_ALL) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC,
+ f2fs_io_schedule_timeout(
DEFAULT_IO_TIMEOUT);
goto retry_write;
}
@@ -3154,8 +3147,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
f2fs_available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
- /* skip writing during file defragment */
- if (is_inode_flag_set(inode, FI_DO_DEFRAG))
+ /* skip writing in file defragment preparing stage */
+ if (is_inode_flag_set(inode, FI_SKIP_WRITES))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, DATA);
@@ -3163,8 +3156,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_inc(&sbi->wb_sync_req[DATA]);
- else if (atomic_read(&sbi->wb_sync_req[DATA]))
+ else if (atomic_read(&sbi->wb_sync_req[DATA])) {
+ /* to avoid potential deadlock */
+ if (current->plug)
+ blk_finish_plug(current->plug);
goto skip_write;
+ }
if (__should_serialize_io(inode, wbc)) {
mutex_lock(&sbi->writepages);
@@ -3213,14 +3210,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to)
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -3353,7 +3350,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
*fsdata = NULL;
- if (len == PAGE_SIZE)
+ if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
goto repeat;
ret = f2fs_prepare_compress_overwrite(inode, pagep,
@@ -3492,17 +3489,16 @@ unlock_out:
return copied;
}
-void f2fs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length)
+void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
- (offset % PAGE_SIZE || length != PAGE_SIZE))
+ (offset || length != folio_size(folio)))
return;
- if (PageDirty(page)) {
+ if (folio_test_dirty(folio)) {
if (inode->i_ino == F2FS_META_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_META);
} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
@@ -3513,17 +3509,16 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
}
}
- clear_page_private_gcing(page);
+ clear_page_private_gcing(&folio->page);
if (test_opt(sbi, COMPRESS_CACHE) &&
inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(page);
+ clear_page_private_data(&folio->page);
- if (page_private_atomic(page))
- return f2fs_drop_inmem_page(inode, page);
+ if (page_private_atomic(&folio->page))
+ return f2fs_drop_inmem_page(inode, &folio->page);
- detach_page_private(page);
- set_page_private(page, 0);
+ folio_detach_private(folio);
}
int f2fs_release_page(struct page *page, gfp_t wait)
@@ -3550,35 +3545,35 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 1;
}
-static int f2fs_set_data_page_dirty(struct page *page)
+static bool f2fs_dirty_data_folio(struct address_space *mapping,
+ struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = mapping->host;
- trace_f2fs_set_page_dirty(page, DATA);
+ trace_f2fs_set_page_dirty(&folio->page, DATA);
- if (!PageUptodate(page))
- SetPageUptodate(page);
- if (PageSwapCache(page))
- return __set_page_dirty_nobuffers(page);
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
+ BUG_ON(folio_test_swapcache(folio));
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
- if (!page_private_atomic(page)) {
- f2fs_register_inmem_page(inode, page);
- return 1;
+ if (!page_private_atomic(&folio->page)) {
+ f2fs_register_inmem_page(inode, &folio->page);
+ return true;
}
/*
* Previously, this page has been registered, we just
* return here.
*/
- return 0;
+ return false;
}
- if (!PageDirty(page)) {
- __set_page_dirty_nobuffers(page);
- f2fs_update_dirty_page(inode, page);
- return 1;
+ if (!folio_test_dirty(folio)) {
+ filemap_dirty_folio(mapping, folio);
+ f2fs_update_dirty_folio(inode, folio);
+ return true;
}
- return 0;
+ return true;
}
@@ -3721,19 +3716,20 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
unsigned int end_sec = secidx + blkcnt / blk_per_sec;
int ret = 0;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
+ set_inode_flag(inode, FI_OPU_WRITE);
for (; secidx < end_sec; secidx++) {
- down_write(&sbi->pin_sem);
+ f2fs_down_write(&sbi->pin_sem);
f2fs_lock_op(sbi);
f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
f2fs_unlock_op(sbi);
- set_inode_flag(inode, FI_DO_DEFRAG);
+ set_inode_flag(inode, FI_SKIP_WRITES);
for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
struct page *page;
@@ -3741,7 +3737,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
page = f2fs_get_lock_data_page(inode, blkidx, true);
if (IS_ERR(page)) {
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
ret = PTR_ERR(page);
goto done;
}
@@ -3750,22 +3746,23 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
f2fs_put_page(page, 1);
}
- clear_inode_flag(inode, FI_DO_DEFRAG);
+ clear_inode_flag(inode, FI_SKIP_WRITES);
ret = filemap_fdatawrite(inode->i_mapping);
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
if (ret)
break;
}
done:
- clear_inode_flag(inode, FI_DO_DEFRAG);
+ clear_inode_flag(inode, FI_SKIP_WRITES);
+ clear_inode_flag(inode, FI_OPU_WRITE);
clear_inode_flag(inode, FI_ALIGNED_WRITE);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -3938,8 +3935,8 @@ const struct address_space_operations f2fs_dblock_aops = {
.writepages = f2fs_write_data_pages,
.write_begin = f2fs_write_begin,
.write_end = f2fs_write_end,
- .set_page_dirty = f2fs_set_data_page_dirty,
- .invalidatepage = f2fs_invalidate_page,
+ .dirty_folio = f2fs_dirty_data_folio,
+ .invalidate_folio = f2fs_invalidate_folio,
.releasepage = f2fs_release_page,
.direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
@@ -4044,6 +4041,13 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->offset = blks_to_bytes(inode, map.m_lblk);
+ /*
+ * When inline encryption is enabled, sometimes I/O to an encrypted file
+ * has to be broken up to guarantee DUN contiguity. Handle this by
+ * limiting the length of the mapping returned.
+ */
+ map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
+
if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
iomap->length = blks_to_bytes(inode, map.m_len);
if (map.m_flags & F2FS_MAP_MAPPED) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8c50518475a9..fcdf253cd211 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,7 +21,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static DEFINE_MUTEX(f2fs_stat_mutex);
+static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
#ifdef CONFIG_DEBUG_FS
static struct dentry *f2fs_debugfs_root;
#endif
@@ -338,14 +338,16 @@ static char *s_flag[] = {
[SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
[SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
[SBI_IS_RESIZEFS] = " resizefs",
+ [SBI_IS_FREEZING] = " freezefs",
};
static int stat_show(struct seq_file *s, void *v)
{
struct f2fs_stat_info *si;
int i = 0, j = 0;
+ unsigned long flags;
- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);
@@ -474,12 +476,14 @@ static int stat_show(struct seq_file *s, void *v)
si->node_segs, si->bg_node_segs);
seq_printf(s, " - Reclaimed segs : Normal (%d), Idle CB (%d), "
"Idle Greedy (%d), Idle AT (%d), "
- "Urgent High (%d), Urgent Low (%d)\n",
+ "Urgent High (%d), Urgent Mid (%d), "
+ "Urgent Low (%d)\n",
si->sbi->gc_reclaimed_segs[GC_NORMAL],
si->sbi->gc_reclaimed_segs[GC_IDLE_CB],
si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY],
si->sbi->gc_reclaimed_segs[GC_IDLE_AT],
si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH],
+ si->sbi->gc_reclaimed_segs[GC_URGENT_MID],
si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
si->bg_data_blks + si->bg_node_blks);
@@ -532,6 +536,9 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4d\n",
si->ndirty_imeta);
+ seq_printf(s, " - fsync mark: %4lld\n",
+ percpu_counter_sum_positive(
+ &si->sbi->rf_node_block_count));
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
@@ -573,7 +580,7 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0;
}
@@ -584,6 +591,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
+ unsigned long flags;
int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -619,9 +627,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_add_tail(&si->stat_list, &f2fs_stat_list);
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0;
}
@@ -629,10 +637,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ unsigned long flags;
- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_del(&si->stat_list);
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
kfree(si);
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 1820e9c106f7..a0e51937d92e 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -16,7 +16,7 @@
#include "xattr.h"
#include <trace/events/f2fs.h>
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
@@ -79,7 +79,7 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de)
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
if (IS_CASEFOLDED(dir)) {
@@ -174,7 +174,7 @@ void f2fs_free_filename(struct f2fs_filename *fname)
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (fname->cf_name.name) {
kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
fname->cf_name.name = NULL;
@@ -208,7 +208,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
return f2fs_find_target_dentry(&d, fname, max_slots);
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
@@ -266,7 +266,7 @@ static inline int f2fs_match_name(const struct inode *dir,
{
struct fscrypt_name f;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (fname->cf_name.name) {
struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
@@ -766,7 +766,7 @@ add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -793,7 +793,7 @@ add_dentry:
f2fs_update_parent_metadata(dir, inode, current_depth);
fail:
if (inode)
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
f2fs_put_page(dentry_page, 1);
@@ -858,7 +858,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
struct page *page;
int err = 0;
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -869,7 +869,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
clear_inode_flag(inode, FI_NEW_INODE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
fail:
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
return err;
}
@@ -877,7 +877,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, false);
@@ -888,7 +888,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
f2fs_i_links_write(inode, false);
f2fs_i_size_write(inode, 0);
}
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
if (inode->i_nlink == 0)
f2fs_add_orphan_inode(inode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb22fa91c2b2..cd1e65bcf0b0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -123,6 +123,20 @@ typedef u32 nid_t;
#define COMPRESS_EXT_NUM 16
+/*
+ * An implementation of an rwsem that is explicitly unfair to readers. This
+ * prevents priority inversion when a low-priority reader acquires the read lock
+ * while sleeping on the write lock but the write lock is needed by
+ * higher-priority clients.
+ */
+
+struct f2fs_rwsem {
+ struct rw_semaphore internal_rwsem;
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
+ wait_queue_head_t read_waiters;
+#endif
+};
+
struct f2fs_mount_info {
unsigned int opt;
int write_io_size_bits; /* Write IO size bits */
@@ -386,6 +400,10 @@ struct discard_cmd_control {
struct mutex cmd_lock;
unsigned int nr_discards; /* # of discards in the list */
unsigned int max_discards; /* max. discards to be issued */
+ unsigned int max_discard_request; /* max. discard request per round */
+ unsigned int min_discard_issue_time; /* min. interval between discard issue */
+ unsigned int mid_discard_issue_time; /* mid. interval between discard issue */
+ unsigned int max_discard_issue_time; /* max. interval between discard issue */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
@@ -488,7 +506,7 @@ struct f2fs_filename {
*/
struct fscrypt_str crypto_buf;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/*
* For casefolded directories: the casefolded name, but it's left NULL
* if the original name is not valid Unicode, if the directory is both
@@ -561,6 +579,9 @@ enum {
/* maximum retry quota flush count */
#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
+/* maximum retry of EIO'ed meta page */
+#define MAX_RETRY_META_PAGE_EIO 100
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
@@ -574,6 +595,9 @@ enum {
/* number of extent info in extent cache we try to shrink */
#define EXTENT_CACHE_SHRINK_NUMBER 128
+#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS
+#define RECOVERY_MIN_RA_BLOCKS 1
+
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
@@ -721,7 +745,8 @@ enum {
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
FI_INLINE_DOTS, /* indicate inline dot dentries */
- FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_SKIP_WRITES, /* should skip data page writeback */
+ FI_OPU_WRITE, /* used for opu per file */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
FI_HOT_DATA, /* indicate file is hot */
@@ -752,7 +777,7 @@ struct f2fs_inode_info {
/* Use below internally in f2fs*/
unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */
- struct rw_semaphore i_sem; /* protect fi info */
+ struct f2fs_rwsem i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
@@ -777,8 +802,8 @@ struct f2fs_inode_info {
struct extent_tree *extent_tree; /* cached extent_tree entry */
/* avoid racing between foreground op and gc */
- struct rw_semaphore i_gc_rwsem[2];
- struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
+ struct f2fs_rwsem i_gc_rwsem[2];
+ struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
@@ -897,6 +922,7 @@ struct f2fs_nm_info {
nid_t max_nid; /* maximum possible node ids */
nid_t available_nids; /* # of available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
+ nid_t max_rf_node_blocks; /* max # of nodes for recovery */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */
@@ -904,7 +930,7 @@ struct f2fs_nm_info {
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- struct rw_semaphore nat_tree_lock; /* protect nat entry tree */
+ struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
@@ -1017,7 +1043,7 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct rw_semaphore curseg_lock; /* for preventing curseg change */
+ struct f2fs_rwsem curseg_lock; /* for preventing curseg change */
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
@@ -1201,11 +1227,11 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
- struct rw_semaphore io_rwsem; /* blocking op for bio */
+ struct f2fs_rwsem io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
struct list_head bio_list; /* bio entry list head */
- struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */
+ struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
};
#define FDEV(i) (sbi->devs[i])
@@ -1267,6 +1293,7 @@ enum {
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
SBI_IS_RESIZEFS, /* resizefs is in process */
+ SBI_IS_FREEZING, /* freezefs is in process */
};
enum {
@@ -1286,6 +1313,7 @@ enum {
GC_IDLE_AT,
GC_URGENT_HIGH,
GC_URGENT_LOW,
+ GC_URGENT_MID,
MAX_GC_MODE,
};
@@ -1571,7 +1599,7 @@ struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
struct f2fs_super_block *raw_super; /* raw super block pointer */
- struct rw_semaphore sb_lock; /* lock for raw super block */
+ struct f2fs_rwsem sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
@@ -1591,18 +1619,20 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
/* keep migration IO order for LFS mode */
- struct rw_semaphore io_order_lock;
+ struct f2fs_rwsem io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
+ pgoff_t metapage_eio_ofs; /* EIO page offset */
+ int metapage_eio_cnt; /* EIO count */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
int cur_cp_pack; /* remain current cp pack */
spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
- struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */
- struct rw_semaphore cp_rwsem; /* blocking FS operations */
- struct rw_semaphore node_write; /* locking node writes */
- struct rw_semaphore node_change; /* locking node change */
+ struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */
+ struct f2fs_rwsem cp_rwsem; /* blocking FS operations */
+ struct f2fs_rwsem node_write; /* locking node writes */
+ struct f2fs_rwsem node_change; /* locking node change */
wait_queue_head_t cp_wait;
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
@@ -1662,12 +1692,14 @@ struct f2fs_sb_info {
block_t unusable_block_count; /* # of blocks saved by last cp */
unsigned int nquota_files; /* # of quota sysfile */
- struct rw_semaphore quota_sem; /* blocking cp for flags */
+ struct f2fs_rwsem quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;
+ /* # of node block writes as roll forward recovery */
+ struct percpu_counter rf_node_block_count;
/* writeback control */
atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */
@@ -1678,7 +1710,7 @@ struct f2fs_sb_info {
struct f2fs_mount_info mount_opt; /* mount options */
/* for cleaning operations */
- struct rw_semaphore gc_lock; /*
+ struct f2fs_rwsem gc_lock; /*
* semaphore for GC, avoid
* race between GC and GC or CP
*/
@@ -1698,7 +1730,7 @@ struct f2fs_sb_info {
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
- struct rw_semaphore pin_sem;
+ struct f2fs_rwsem pin_sem;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
@@ -2092,9 +2124,81 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
+#define init_f2fs_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __init_f2fs_rwsem((sem), #sem, &__key); \
+} while (0)
+
+static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem,
+ const char *sem_name, struct lock_class_key *key)
+{
+ __init_rwsem(&sem->internal_rwsem, sem_name, key);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
+ init_waitqueue_head(&sem->read_waiters);
+#endif
+}
+
+static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem)
+{
+ return rwsem_is_locked(&sem->internal_rwsem);
+}
+
+static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem)
+{
+ return rwsem_is_contended(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_read(struct f2fs_rwsem *sem)
+{
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
+ wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem));
+#else
+ down_read(&sem->internal_rwsem);
+#endif
+}
+
+static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
+{
+ return down_read_trylock(&sem->internal_rwsem);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
+{
+ down_read_nested(&sem->internal_rwsem, subclass);
+}
+#else
+#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
+#endif
+
+static inline void f2fs_up_read(struct f2fs_rwsem *sem)
+{
+ up_read(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_write(struct f2fs_rwsem *sem)
+{
+ down_write(&sem->internal_rwsem);
+}
+
+static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
+{
+ return down_write_trylock(&sem->internal_rwsem);
+}
+
+static inline void f2fs_up_write(struct f2fs_rwsem *sem)
+{
+ up_write(&sem->internal_rwsem);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
+ wake_up_all(&sem->read_waiters);
+#endif
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- down_read(&sbi->cp_rwsem);
+ f2fs_down_read(&sbi->cp_rwsem);
}
static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
@@ -2103,22 +2207,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
return 0;
}
- return down_read_trylock(&sbi->cp_rwsem);
+ return f2fs_down_read_trylock(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- up_read(&sbi->cp_rwsem);
+ f2fs_up_read(&sbi->cp_rwsem);
}
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- down_write(&sbi->cp_rwsem);
+ f2fs_down_write(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
- up_write(&sbi->cp_rwsem);
+ f2fs_up_write(&sbi->cp_rwsem);
}
static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
@@ -2681,6 +2785,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
if (is_inflight_io(sbi, type))
return false;
+ if (sbi->gc_mode == GC_URGENT_MID)
+ return true;
+
if (sbi->gc_mode == GC_URGENT_LOW &&
(type == DISCARD_TIME || type == GC_TIME))
return true;
@@ -3579,7 +3686,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
-void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
+ unsigned int ra_blocks);
long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write, enum iostat_type io_type);
void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
@@ -3597,7 +3705,7 @@ void f2fs_add_orphan_inode(struct inode *inode);
void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi);
int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
-void f2fs_update_dirty_page(struct inode *inode, struct page *page);
+void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
@@ -3631,7 +3739,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio);
int f2fs_merge_page_bio(struct f2fs_io_info *fio);
void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
- block_t blk_addr, struct bio *bio);
+ block_t blk_addr, sector_t *sector);
int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_set_data_blkaddr(struct dnode_of_data *dn);
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
@@ -3661,8 +3769,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
void f2fs_write_failed(struct inode *inode, loff_t to);
-void f2fs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length);
+void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
int f2fs_release_page(struct page *page, gfp_t wait);
#ifdef CONFIG_MIGRATION
int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
@@ -4371,7 +4478,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int rw = iov_iter_rw(iter);
- if (f2fs_post_read_required(inode))
+ if (!fscrypt_dio_supported(iocb, iter))
+ return true;
+ if (fsverity_active(inode))
+ return true;
+ if (f2fs_compressed_file(inode))
return true;
/* disallow direct IO if any of devices has unaligned blksize */
@@ -4426,6 +4537,12 @@ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
}
+static inline void f2fs_io_schedule_timeout(long timeout)
+{
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ io_schedule_timeout(timeout);
+}
+
#define EFSBADCRC EBADMSG /* Bad CRC detected */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c98ef6af97d..68ddf4c7ca64 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -237,13 +237,13 @@ static void try_to_fix_pino(struct inode *inode)
struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t pino;
- down_write(&fi->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
file_got_pino(inode);
}
- up_write(&fi->i_sem);
+ f2fs_up_write(&fi->i_sem);
}
static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
@@ -318,9 +318,9 @@ go_write:
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
*/
- down_read(&F2FS_I(inode)->i_sem);
+ f2fs_down_read(&F2FS_I(inode)->i_sem);
cp_reason = need_do_checkpoint(inode);
- up_read(&F2FS_I(inode)->i_sem);
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
if (cp_reason) {
/* all the dirty node pages should be flushed for POR */
@@ -812,7 +812,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
{
struct inode *inode = d_inode(path->dentry);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_inode *ri;
+ struct f2fs_inode *ri = NULL;
unsigned int flags;
if (f2fs_has_extra_attr(inode) &&
@@ -844,7 +844,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -904,7 +904,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(mnt_userns, dentry, attr);
if (err)
return err;
@@ -958,7 +958,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
}
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
@@ -970,7 +970,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* larger than i_size.
*/
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -980,10 +980,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
spin_unlock(&F2FS_I(inode)->i_size_lock);
}
- __setattr_copy(&init_user_ns, inode, attr);
+ __setattr_copy(mnt_userns, inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode));
+ err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode));
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
if (!err)
@@ -1112,7 +1112,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache_range(inode, blk_start, blk_end - 1);
@@ -1122,7 +1122,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1355,7 +1355,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
/* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
f2fs_lock_op(sbi);
@@ -1365,7 +1365,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1500,7 +1500,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
truncate_pagecache_range(inode,
@@ -1514,7 +1514,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret) {
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1526,7 +1526,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1600,7 +1600,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
/* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
truncate_pagecache(inode, offset);
@@ -1618,7 +1618,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
}
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
filemap_invalidate_lock(mapping);
@@ -1674,13 +1674,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
next_alloc:
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err && err != -ENODATA && err != -EAGAIN)
goto out_err;
}
- down_write(&sbi->pin_sem);
+ f2fs_down_write(&sbi->pin_sem);
f2fs_lock_op(sbi);
f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
@@ -1690,7 +1690,7 @@ next_alloc:
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
file_dont_truncate(inode);
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
expanded += map.m_len;
sec_len -= map.m_len;
@@ -1989,11 +1989,12 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
@@ -2008,7 +2009,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
- f2fs_disable_compressed_file(inode);
+ if (!f2fs_disable_compressed_file(inode)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (f2fs_is_atomic_file(inode)) {
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
@@ -2020,7 +2024,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
goto out;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/*
* Should wait end_io to count F2FS_WB_CP_DATA correctly by
@@ -2031,7 +2035,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -2044,7 +2048,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
/* add inode in inmem_list first and set atomic_file */
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
@@ -2058,9 +2062,10 @@ out:
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2100,9 +2105,10 @@ err_out:
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
@@ -2135,9 +2141,10 @@ out:
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2164,9 +2171,10 @@ out:
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2351,7 +2359,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
if (err)
return err;
- down_write(&sbi->sb_lock);
+ f2fs_down_write(&sbi->sb_lock);
if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
goto got_it;
@@ -2370,7 +2378,7 @@ got_it:
16))
err = -EFAULT;
out_err:
- up_write(&sbi->sb_lock);
+ f2fs_up_write(&sbi->sb_lock);
mnt_drop_write_file(filp);
return err;
}
@@ -2447,12 +2455,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
return ret;
if (!sync) {
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
}
ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
@@ -2483,12 +2491,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
do_more:
if (!range->sync) {
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
}
ret = f2fs_gc(sbi, range->sync, true, false,
@@ -2559,10 +2567,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
bool fragmented = false;
int err;
- /* if in-place-update policy is enabled, don't waste time here */
- if (f2fs_should_update_inplace(inode, NULL))
- return -EINVAL;
-
pg_start = range->start >> PAGE_SHIFT;
pg_end = (range->start + range->len) >> PAGE_SHIFT;
@@ -2570,6 +2574,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
inode_lock(inode);
+ /* if in-place-update policy is enabled, don't waste time here */
+ set_inode_flag(inode, FI_OPU_WRITE);
+ if (f2fs_should_update_inplace(inode, NULL)) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* writeback all dirty pages in the range */
err = filemap_write_and_wait_range(inode->i_mapping, range->start,
range->start + range->len - 1);
@@ -2651,7 +2662,7 @@ do_map:
goto check;
}
- set_inode_flag(inode, FI_DO_DEFRAG);
+ set_inode_flag(inode, FI_SKIP_WRITES);
idx = map.m_lblk;
while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
@@ -2676,15 +2687,16 @@ check:
if (map.m_lblk < pg_end && cnt < blk_per_seg)
goto do_map;
- clear_inode_flag(inode, FI_DO_DEFRAG);
+ clear_inode_flag(inode, FI_SKIP_WRITES);
err = filemap_fdatawrite(inode->i_mapping);
if (err)
goto out;
}
clear_out:
- clear_inode_flag(inode, FI_DO_DEFRAG);
+ clear_inode_flag(inode, FI_SKIP_WRITES);
out:
+ clear_inode_flag(inode, FI_OPU_WRITE);
inode_unlock(inode);
if (!err)
range->len = (u64)total << PAGE_SHIFT;
@@ -2820,10 +2832,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_balance_fs(sbi, true);
- down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
- if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
goto out_src;
}
@@ -2841,9 +2853,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_unlock_op(sbi);
if (src != dst)
- up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
- up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
if (src != dst)
inode_unlock(dst);
@@ -2938,7 +2950,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
end_segno = min(start_segno + range.segments, dev_end_segno);
while (start_segno < end_segno) {
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
@@ -2990,7 +3002,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *ipage;
+ struct f2fs_inode *ri = NULL;
kprojid_t kprojid;
int err;
@@ -3014,17 +3026,8 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
if (IS_NOQUOTA(inode))
return err;
- ipage = f2fs_get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage))
- return PTR_ERR(ipage);
-
- if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
- i_projid)) {
- err = -EOVERFLOW;
- f2fs_put_page(ipage, 1);
- return err;
- }
- f2fs_put_page(ipage, 1);
+ if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
+ return -EOVERFLOW;
err = f2fs_dquot_initialize(inode);
if (err)
@@ -3215,9 +3218,9 @@ int f2fs_precache_extents(struct inode *inode)
while (map.m_lblk < end) {
map.m_len = end - map.m_lblk;
- down_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
- up_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -3294,11 +3297,11 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
if (!vbuf)
return -ENOMEM;
- down_read(&sbi->sb_lock);
+ f2fs_down_read(&sbi->sb_lock);
count = utf16s_to_utf8s(sbi->raw_super->volume_name,
ARRAY_SIZE(sbi->raw_super->volume_name),
UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
- up_read(&sbi->sb_lock);
+ f2fs_up_read(&sbi->sb_lock);
if (copy_to_user((char __user *)arg, vbuf,
min(FSLABEL_MAX, count)))
@@ -3326,7 +3329,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
if (err)
goto out;
- down_write(&sbi->sb_lock);
+ f2fs_down_write(&sbi->sb_lock);
memset(sbi->raw_super->volume_name, 0,
sizeof(sbi->raw_super->volume_name));
@@ -3336,7 +3339,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
err = f2fs_commit_super(sbi, false);
- up_write(&sbi->sb_lock);
+ f2fs_up_write(&sbi->sb_lock);
mnt_drop_write_file(filp);
out:
@@ -3462,7 +3465,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3499,7 +3502,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
out:
inode_unlock(inode);
@@ -3615,7 +3618,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
goto unlock_inode;
}
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3652,7 +3655,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (ret >= 0) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3770,7 +3773,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
if (ret)
goto err;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
ret = filemap_write_and_wait_range(mapping, range.start,
@@ -3859,7 +3862,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
prev_block, len, range.flags);
out:
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
file_end_write(filp);
@@ -4291,12 +4294,12 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
ret = -EAGAIN;
goto out;
}
} else {
- down_read(&fi->i_gc_rwsem[READ]);
+ f2fs_down_read(&fi->i_gc_rwsem[READ]);
}
/*
@@ -4315,7 +4318,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
ret = iomap_dio_complete(dio);
}
- up_read(&fi->i_gc_rwsem[READ]);
+ f2fs_up_read(&fi->i_gc_rwsem[READ]);
file_accessed(file);
out:
@@ -4497,12 +4500,12 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
goto out;
}
- if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
ret = -EAGAIN;
goto out;
}
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
- up_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
ret = -EAGAIN;
goto out;
}
@@ -4511,9 +4514,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (ret)
goto out;
- down_read(&fi->i_gc_rwsem[WRITE]);
+ f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
if (do_opu)
- down_read(&fi->i_gc_rwsem[READ]);
+ f2fs_down_read(&fi->i_gc_rwsem[READ]);
}
if (whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = WRITE_LIFE_NOT_SET;
@@ -4542,8 +4545,8 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = hint;
if (do_opu)
- up_read(&fi->i_gc_rwsem[READ]);
- up_read(&fi->i_gc_rwsem[WRITE]);
+ f2fs_up_read(&fi->i_gc_rwsem[READ]);
+ f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
if (ret < 0)
goto out;
@@ -4644,12 +4647,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* Don't leave any preallocated blocks around past i_size. */
if (preallocated && i_size_read(inode) < target_size) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
if (!f2fs_truncate(inode))
file_dont_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
} else {
file_dont_truncate(inode);
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ee308a8de432..ea5b93b689cd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -103,23 +103,26 @@ static int gc_thread_func(void *data)
sbi->gc_urgent_high_remaining--;
}
spin_unlock(&sbi->gc_urgent_high_lock);
+ }
+ if (sbi->gc_mode == GC_URGENT_HIGH ||
+ sbi->gc_mode == GC_URGENT_MID) {
wait_ms = gc_th->urgent_sleep_time;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
goto do_gc;
}
if (foreground) {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
goto do_gc;
- } else if (!down_write_trylock(&sbi->gc_lock)) {
+ } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
stat_other_skip_bggc_count(sbi);
goto next;
}
if (!is_idle(sbi, GC_TIME)) {
increase_sleep_time(gc_th, &wait_ms);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
stat_io_skip_bggc_count(sbi);
goto next;
}
@@ -1038,8 +1041,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
- if (f2fs_check_nid_range(sbi, dni->ino))
+ if (f2fs_check_nid_range(sbi, dni->ino)) {
+ f2fs_put_page(node_page, 1);
return false;
+ }
*nofs = ofs_of_node(node_page);
source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
@@ -1230,7 +1235,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
if (lfs_mode)
- down_write(&fio.sbi->io_order_lock);
+ f2fs_down_write(&fio.sbi->io_order_lock);
mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
fio.old_blkaddr, false);
@@ -1316,7 +1321,7 @@ recover_block:
true, true, true);
up_out:
if (lfs_mode)
- up_write(&fio.sbi->io_order_lock);
+ f2fs_up_write(&fio.sbi->io_order_lock);
put_out:
f2fs_put_dnode(&dn);
out:
@@ -1475,7 +1480,7 @@ next_step:
special_file(inode->i_mode))
continue;
- if (!down_write_trylock(
+ if (!f2fs_down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
iput(inode);
sbi->skipped_gc_rwsem++;
@@ -1488,7 +1493,7 @@ next_step:
if (f2fs_post_read_required(inode)) {
int err = ra_data_block(inode, start_bidx);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err) {
iput(inode);
continue;
@@ -1499,7 +1504,7 @@ next_step:
data_page = f2fs_get_read_data_page(inode,
start_bidx, REQ_RAHEAD, true);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -1518,14 +1523,14 @@ next_step:
int err;
if (S_ISREG(inode->i_mode)) {
- if (!down_write_trylock(&fi->i_gc_rwsem[READ])) {
+ if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) {
sbi->skipped_gc_rwsem++;
continue;
}
- if (!down_write_trylock(
+ if (!f2fs_down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
sbi->skipped_gc_rwsem++;
- up_write(&fi->i_gc_rwsem[READ]);
+ f2fs_up_write(&fi->i_gc_rwsem[READ]);
continue;
}
locked = true;
@@ -1548,8 +1553,8 @@ next_step:
submitted++;
if (locked) {
- up_write(&fi->i_gc_rwsem[WRITE]);
- up_write(&fi->i_gc_rwsem[READ]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[READ]);
}
stat_inc_data_blk_count(sbi, 1, gc_type);
@@ -1807,7 +1812,7 @@ stop:
reserved_segments(sbi),
prefree_segments(sbi));
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
put_gc_inode(&gc_list);
@@ -1936,7 +1941,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
long long block_count;
int segs = secs * sbi->segs_per_sec;
- down_write(&sbi->sb_lock);
+ f2fs_down_write(&sbi->sb_lock);
section_count = le32_to_cpu(raw_sb->section_count);
segment_count = le32_to_cpu(raw_sb->segment_count);
@@ -1957,7 +1962,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
cpu_to_le32(dev_segs + segs);
}
- up_write(&sbi->sb_lock);
+ f2fs_up_write(&sbi->sb_lock);
}
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
@@ -2031,7 +2036,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
/* stop other GC */
- if (!down_write_trylock(&sbi->gc_lock))
+ if (!f2fs_down_write_trylock(&sbi->gc_lock))
return -EAGAIN;
/* stop CP to protect MAIN_SEC in free_segment_range */
@@ -2051,15 +2056,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
out_unlock:
f2fs_unlock_op(sbi);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
if (err)
return err;
set_sbi_flag(sbi, SBI_IS_RESIZEFS);
freeze_super(sbi->sb);
- down_write(&sbi->gc_lock);
- down_write(&sbi->cp_global_sem);
+ f2fs_down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->cp_global_sem);
spin_lock(&sbi->stat_lock);
if (shrunk_blocks + valid_user_blocks(sbi) +
@@ -2104,8 +2109,8 @@ recover_out:
spin_unlock(&sbi->stat_lock);
}
out_err:
- up_write(&sbi->cp_global_sem);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->gc_lock);
thaw_super(sbi->sb);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
return err;
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index e3beac546c63..3cb1e7a24740 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -105,7 +105,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
return;
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (IS_CASEFOLDED(dir)) {
/*
* If the casefolded name is provided, hash it instead of the
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 4b5cefa3f90c..a578bf83b803 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -629,7 +629,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
}
if (inode) {
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -658,7 +658,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
f2fs_update_parent_metadata(dir, inode, 0);
fail:
if (inode)
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
out:
f2fs_put_page(ipage, 1);
return err;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0ec8e32a00b4..71f232dcf3c2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
- sb_start_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
@@ -809,7 +810,8 @@ retry:
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
}
- sb_end_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
@@ -885,6 +887,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ set_inode_flag(inode, FI_FREE_NID);
f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
goto out;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a728a0af9ce0..5ed79b29999f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -22,7 +22,8 @@
#include "acl.h"
#include <trace/events/f2fs.h>
-static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
+ struct inode *dir, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
nid_t ino;
@@ -46,7 +47,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(mnt_userns, inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
@@ -67,7 +68,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
- F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
+ F2FS_I(inode)->i_projid = make_kprojid(mnt_userns,
F2FS_DEF_PROJID);
err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
@@ -196,7 +197,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
int i, cold_count, hot_count;
- down_read(&sbi->sb_lock);
+ f2fs_down_read(&sbi->sb_lock);
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
@@ -206,7 +207,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
break;
}
- up_read(&sbi->sb_lock);
+ f2fs_up_read(&sbi->sb_lock);
if (i == cold_count + hot_count)
return;
@@ -299,19 +300,19 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
(!ext_cnt && !noext_cnt))
return;
- down_read(&sbi->sb_lock);
+ f2fs_down_read(&sbi->sb_lock);
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = cold_count; i < cold_count + hot_count; i++) {
if (is_extension_exist(name, extlist[i], false)) {
- up_read(&sbi->sb_lock);
+ f2fs_up_read(&sbi->sb_lock);
return;
}
}
- up_read(&sbi->sb_lock);
+ f2fs_up_read(&sbi->sb_lock);
for (i = 0; i < noext_cnt; i++) {
if (is_extension_exist(name, noext[i], false)) {
@@ -349,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(dir, mode);
+ inode = f2fs_new_inode(mnt_userns, dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -561,7 +562,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out_iput;
}
out_splice:
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (!inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
@@ -622,7 +623,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
goto fail;
}
f2fs_delete_entry(de, page, dir, inode);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
@@ -679,7 +680,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+ inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -750,7 +751,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(dir, S_IFDIR | mode);
+ inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -807,7 +808,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
- inode = f2fs_new_inode(dir, mode);
+ inode = f2fs_new_inode(mnt_userns, dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -834,8 +835,9 @@ out:
return err;
}
-static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
- umode_t mode, struct inode **whiteout)
+static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode,
+ struct inode **whiteout)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -845,7 +847,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
if (err)
return err;
- inode = f2fs_new_inode(dir, mode);
+ inode = f2fs_new_inode(mnt_userns, dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -909,20 +911,22 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
- return __f2fs_tmpfile(dir, dentry, mode, NULL);
+ return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL);
}
-static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
+static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
+ struct inode *dir, struct inode **whiteout)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
return -EIO;
- return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
+ return __f2fs_tmpfile(mnt_userns, dir, NULL,
+ S_IFCHR | WHITEOUT_MODE, whiteout);
}
-static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
struct inode *old_inode = d_inode(old_dentry);
@@ -960,7 +964,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (flags & RENAME_WHITEOUT) {
- err = f2fs_create_whiteout(old_dir, &whiteout);
+ err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout);
if (err)
return err;
}
@@ -1023,11 +1027,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_page = NULL;
new_inode->i_ctime = current_time(new_inode);
- down_write(&F2FS_I(new_inode)->i_sem);
+ f2fs_down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
f2fs_i_links_write(new_inode, false);
f2fs_i_links_write(new_inode, false);
- up_write(&F2FS_I(new_inode)->i_sem);
+ f2fs_up_write(&F2FS_I(new_inode)->i_sem);
if (!new_inode->i_nlink)
f2fs_add_orphan_inode(new_inode);
@@ -1048,13 +1052,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(new_dir, true);
}
- down_write(&F2FS_I(old_inode)->i_sem);
+ f2fs_down_write(&F2FS_I(old_inode)->i_sem);
if (!old_dir_entry || whiteout)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
- up_write(&F2FS_I(old_inode)->i_sem);
+ f2fs_up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
f2fs_mark_inode_dirty_sync(old_inode, false);
@@ -1107,8 +1111,7 @@ out_dir:
out_old:
f2fs_put_page(old_page, 0);
out:
- if (whiteout)
- iput(whiteout);
+ iput(whiteout);
return err;
}
@@ -1214,38 +1217,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
/* update directory entry info of old dir inode */
f2fs_set_link(old_dir, old_entry, old_page, new_inode);
- down_write(&F2FS_I(old_inode)->i_sem);
+ f2fs_down_write(&F2FS_I(old_inode)->i_sem);
if (!old_dir_entry)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
- up_write(&F2FS_I(old_inode)->i_sem);
+ f2fs_up_write(&F2FS_I(old_inode)->i_sem);
old_dir->i_ctime = current_time(old_dir);
if (old_nlink) {
- down_write(&F2FS_I(old_dir)->i_sem);
+ f2fs_down_write(&F2FS_I(old_dir)->i_sem);
f2fs_i_links_write(old_dir, old_nlink > 0);
- up_write(&F2FS_I(old_dir)->i_sem);
+ f2fs_up_write(&F2FS_I(old_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(old_dir, false);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
- down_write(&F2FS_I(new_inode)->i_sem);
+ f2fs_down_write(&F2FS_I(new_inode)->i_sem);
if (!new_dir_entry)
file_lost_pino(new_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(new_inode, old_dir->i_ino);
- up_write(&F2FS_I(new_inode)->i_sem);
+ f2fs_up_write(&F2FS_I(new_inode)->i_sem);
new_dir->i_ctime = current_time(new_dir);
if (new_nlink) {
- down_write(&F2FS_I(new_dir)->i_sem);
+ f2fs_down_write(&F2FS_I(new_dir)->i_sem);
f2fs_i_links_write(new_dir, new_nlink > 0);
- up_write(&F2FS_I(new_dir)->i_sem);
+ f2fs_up_write(&F2FS_I(new_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(new_dir, false);
@@ -1300,7 +1303,8 @@ static int f2fs_rename2(struct user_namespace *mnt_userns,
* VFS has already handled the new dentry existence case,
* here, we just deal with "RENAME_NOREPLACE" as regular rename.
*/
- return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return f2fs_rename(mnt_userns, old_dir, old_dentry,
+ new_dir, new_dentry, flags);
}
static const char *f2fs_encrypted_get_link(struct dentry *dentry,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 50b2874e758c..0b6e741e94a0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool need = false;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
need = true;
}
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
return need;
}
@@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool is_cp = true;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool need_update = true;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -431,14 +431,14 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct nat_entry *new, *e;
/* Let's mitigate lock contention of nat_tree_lock during checkpoint */
- if (rwsem_is_locked(&sbi->cp_global_sem))
+ if (f2fs_rwsem_is_locked(&sbi->cp_global_sem))
return;
new = __alloc_nat_entry(sbi, nid, false);
if (!new)
return;
- down_write(&nm_i->nat_tree_lock);
+ f2fs_down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e)
e = __init_nat_entry(nm_i, new, ne, false);
@@ -447,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
nat_get_blkaddr(e) !=
le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
if (e != new)
__free_nat_entry(new);
}
@@ -459,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
struct nat_entry *e;
struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
- down_write(&nm_i->nat_tree_lock);
+ f2fs_down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = __init_nat_entry(nm_i, new, NULL, true);
@@ -508,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
}
int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -516,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
- if (!down_write_trylock(&nm_i->nat_tree_lock))
+ if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock))
return 0;
spin_lock(&nm_i->nat_list_lock);
@@ -538,7 +538,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
}
spin_unlock(&nm_i->nat_list_lock);
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -560,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
ni->nid = nid;
retry:
/* Check nat cache */
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
return 0;
}
@@ -576,11 +576,11 @@ retry:
* nat_tree_lock. Therefore, we should retry, if we failed to grab here
* while not bothering checkpoint.
*/
- if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
+ if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
down_read(&curseg->journal_rwsem);
- } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+ } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) ||
!down_read_trylock(&curseg->journal_rwsem)) {
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
goto retry;
}
@@ -589,15 +589,15 @@ retry:
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- up_read(&curseg->journal_rwsem);
+ up_read(&curseg->journal_rwsem);
if (i >= 0) {
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
goto cache;
}
/* Fill node_info from nat page */
index = current_nat_addr(sbi, nid);
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
page = f2fs_get_meta_page(sbi, index);
if (IS_ERR(page))
@@ -1609,17 +1609,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
goto redirty_out;
if (wbc->for_reclaim) {
- if (!down_read_trylock(&sbi->node_write))
+ if (!f2fs_down_read_trylock(&sbi->node_write))
goto redirty_out;
} else {
- down_read(&sbi->node_write);
+ f2fs_down_read(&sbi->node_write);
}
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
unlock_page(page);
return 0;
}
@@ -1627,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (__is_valid_data_blkaddr(ni.blk_addr) &&
!f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
DATA_GENERIC_ENHANCE)) {
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
goto redirty_out;
}
@@ -1648,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
if (wbc->for_reclaim) {
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
@@ -1782,6 +1782,7 @@ continue_unlock:
if (!atomic || page == last_page) {
set_fsync_mark(page, 1);
+ percpu_counter_inc(&sbi->rf_node_block_count);
if (IS_INODE(page)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
@@ -2111,8 +2112,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_inc(&sbi->wb_sync_req[NODE]);
- else if (atomic_read(&sbi->wb_sync_req[NODE]))
+ else if (atomic_read(&sbi->wb_sync_req[NODE])) {
+ /* to avoid potential deadlock */
+ if (current->plug)
+ blk_finish_plug(current->plug);
goto skip_write;
+ }
trace_f2fs_writepages(mapping->host, wbc, NODE);
@@ -2132,23 +2137,24 @@ skip_write:
return 0;
}
-static int f2fs_set_node_page_dirty(struct page *page)
+static bool f2fs_dirty_node_folio(struct address_space *mapping,
+ struct folio *folio)
{
- trace_f2fs_set_page_dirty(page, NODE);
+ trace_f2fs_set_page_dirty(&folio->page, NODE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
#ifdef CONFIG_F2FS_CHECK_FS
- if (IS_INODE(page))
- f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+ if (IS_INODE(&folio->page))
+ f2fs_inode_chksum_set(F2FS_P_SB(&folio->page), &folio->page);
#endif
- if (!PageDirty(page)) {
- __set_page_dirty_nobuffers(page);
- inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
- set_page_private_reference(page);
- return 1;
+ if (!folio_test_dirty(folio)) {
+ filemap_dirty_folio(mapping, folio);
+ inc_page_count(F2FS_P_SB(&folio->page), F2FS_DIRTY_NODES);
+ set_page_private_reference(&folio->page);
+ return true;
}
- return 0;
+ return false;
}
/*
@@ -2157,8 +2163,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
const struct address_space_operations f2fs_node_aops = {
.writepage = f2fs_write_node_page,
.writepages = f2fs_write_node_pages,
- .set_page_dirty = f2fs_set_node_page_dirty,
- .invalidatepage = f2fs_invalidate_page,
+ .dirty_folio = f2fs_dirty_node_folio,
+ .invalidate_folio = f2fs_invalidate_folio,
.releasepage = f2fs_release_page,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
@@ -2225,14 +2231,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
unsigned int i;
bool ret = true;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
for (i = 0; i < nm_i->nat_blocks; i++) {
if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
ret = false;
break;
}
}
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
return ret;
}
@@ -2415,7 +2421,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
unsigned int i, idx;
nid_t nid;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
for (i = 0; i < nm_i->nat_blocks; i++) {
if (!test_bit_le(i, nm_i->nat_block_bitmap))
@@ -2438,7 +2444,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
out:
scan_curseg_cache(sbi);
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
}
static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
@@ -2473,7 +2479,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
while (1) {
if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
@@ -2488,7 +2494,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
}
if (ret) {
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
return ret;
}
@@ -2508,7 +2514,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
/* find free nids from current sum_pages */
scan_curseg_cache(sbi);
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -2953,7 +2959,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_ofs;
- down_read(&nm_i->nat_tree_lock);
+ f2fs_down_read(&nm_i->nat_tree_lock);
for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
unsigned int valid = 0, nid_ofs = 0;
@@ -2973,7 +2979,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
__update_nat_bits(nm_i, nat_ofs, valid);
}
- up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read(&nm_i->nat_tree_lock);
}
static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -3071,15 +3077,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* nat_cnt[DIRTY_NAT].
*/
if (cpc->reason & CP_UMOUNT) {
- down_write(&nm_i->nat_tree_lock);
+ f2fs_down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
}
if (!nm_i->nat_cnt[DIRTY_NAT])
return 0;
- down_write(&nm_i->nat_tree_lock);
+ f2fs_down_write(&nm_i->nat_tree_lock);
/*
* if there are no enough space in journal to store dirty nat
@@ -3108,7 +3114,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
break;
}
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
/* Allow dirty nats by node block allocation in write_begin */
return err;
@@ -3218,6 +3224,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
+ nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
@@ -3228,7 +3235,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
- init_rwsem(&nm_i->nat_tree_lock);
+ init_f2fs_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -3334,7 +3341,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
- down_write(&nm_i->nat_tree_lock);
+ f2fs_down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -3364,7 +3371,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
kmem_cache_free(nat_entry_set_slab, setvec[idx]);
}
}
- up_write(&nm_i->nat_tree_lock);
+ f2fs_up_write(&nm_i->nat_tree_lock);
kvfree(nm_i->nat_block_bitmap);
if (nm_i->free_nid_bitmap) {
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 18b98cf0465b..4c1d34bfea78 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -31,6 +31,9 @@
/* control total # of nats */
#define DEF_NAT_CACHE_THRESHOLD 100000
+/* control total # of node writes used for roll-fowrad recovery */
+#define DEF_RF_NODE_BLOCKS 0
+
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
#define SETVEC_SIZE 32
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9683c80ff8c2..3cb7f8a43b4d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -46,7 +46,7 @@
static struct kmem_cache *fsync_entry_slab;
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
@@ -56,6 +56,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
return false;
+ if (NM_I(sbi)->max_rf_node_blocks &&
+ percpu_counter_sum_positive(&sbi->rf_node_block_count) >=
+ NM_I(sbi)->max_rf_node_blocks)
+ return false;
return true;
}
@@ -149,7 +153,7 @@ static int init_recovered_filename(const struct inode *dir,
if (err)
return err;
f2fs_hash_filename(dir, fname);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/* Case-sensitive match is fine for recovery */
kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
fname->cf_name.name = NULL;
@@ -343,6 +347,19 @@ static int recover_inode(struct inode *inode, struct page *page)
return 0;
}
+static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
+ unsigned int ra_blocks, unsigned int blkaddr,
+ unsigned int next_blkaddr)
+{
+ if (blkaddr + 1 == next_blkaddr)
+ ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
+ ra_blocks * 2);
+ else if (next_blkaddr % sbi->blocks_per_seg)
+ ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
+ ra_blocks / 2);
+ return ra_blocks;
+}
+
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
@@ -350,6 +367,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
+ unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
valid_user_blocks(sbi);
int err = 0;
@@ -424,11 +442,14 @@ next:
break;
}
+ ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
+ next_blkaddr_of_node(page));
+
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
- f2fs_ra_meta_pages_cond(sbi, blkaddr);
+ f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
return err;
}
@@ -704,6 +725,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct page *page = NULL;
int err = 0;
block_t blkaddr;
+ unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -715,8 +737,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
- f2fs_ra_meta_pages_cond(sbi, blkaddr);
-
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -759,9 +779,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
if (entry->blkaddr == blkaddr)
list_move_tail(&entry->list, tmp_inode_list);
next:
+ ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
+ next_blkaddr_of_node(page));
+
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
+
+ f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
if (!err)
f2fs_allocate_new_segments(sbi);
@@ -796,7 +821,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
- down_write(&sbi->cp_global_sem);
+ f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list, check_only);
@@ -845,7 +870,7 @@ skip:
if (!err)
clear_sbi_flag(sbi, SBI_POR_DOING);
- up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->cp_global_sem);
/* let's drop all the directory inodes for clean checkpoint */
destroy_fsync_dnodes(&dir_list, err);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1dabc8244083..22dfeb991529 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -313,8 +313,7 @@ next:
skip:
iput(inode);
}
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
- cond_resched();
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
if (gc_failure) {
if (++looped >= count)
return;
@@ -471,7 +470,7 @@ int f2fs_commit_inmem_pages(struct inode *inode)
f2fs_balance_fs(sbi, true);
- down_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -483,7 +482,7 @@ int f2fs_commit_inmem_pages(struct inode *inode)
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
- up_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
return err;
}
@@ -521,7 +520,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
io_schedule();
finish_wait(&sbi->gc_thread->fggc_wq, &wait);
} else {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
f2fs_gc(sbi, false, false, false, NULL_SEGNO);
}
}
@@ -529,7 +528,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
{
- int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
+ int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -570,7 +569,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
/* there is background inflight IO or foreground operation recently */
if (is_inflight_io(sbi, REQ_TIME) ||
- (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem)))
+ (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
return;
/* exceed periodical checkpoint timeout threshold */
@@ -803,8 +802,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
do {
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
} while (ret && --count);
if (ret) {
@@ -1156,14 +1154,14 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->ordered = false;
dpolicy->granularity = granularity;
- dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->max_requests = dcc->max_discard_request;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
dpolicy->timeout = false;
if (discard_type == DPOLICY_BG) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->min_interval = dcc->min_discard_issue_time;
+ dpolicy->mid_interval = dcc->mid_discard_issue_time;
+ dpolicy->max_interval = dcc->max_discard_issue_time;
dpolicy->io_aware = true;
dpolicy->sync = false;
dpolicy->ordered = true;
@@ -1171,12 +1169,12 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->granularity = 1;
if (atomic_read(&dcc->discard_cmd_cnt))
dpolicy->max_interval =
- DEF_MIN_DISCARD_ISSUE_TIME;
+ dcc->min_discard_issue_time;
}
} else if (discard_type == DPOLICY_FORCE) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->min_interval = dcc->min_discard_issue_time;
+ dpolicy->mid_interval = dcc->mid_discard_issue_time;
+ dpolicy->max_interval = dcc->max_discard_issue_time;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
@@ -1781,7 +1779,7 @@ static int issue_discard_thread(void *data)
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
struct discard_policy dpolicy;
- unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+ unsigned int wait_ms = dcc->min_discard_issue_time;
int issued;
set_freezable();
@@ -2180,6 +2178,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+ dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
+ dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
+ dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
+ dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
dcc->undiscard_blks = 0;
dcc->next_pos = 0;
dcc->root = RB_ROOT_CACHED;
@@ -2821,7 +2823,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
if (!sbi->am.atgc_enabled)
return;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&SIT_I(sbi)->sentry_lock);
@@ -2831,7 +2833,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
up_write(&SIT_I(sbi)->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
@@ -2982,7 +2984,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&SIT_I(sbi)->sentry_lock);
@@ -3006,7 +3008,7 @@ unlock:
type, segno, curseg->segno);
mutex_unlock(&curseg->curseg_mutex);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
@@ -3038,23 +3040,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi,
void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
__allocate_new_section(sbi, type, force);
up_write(&SIT_I(sbi)->sentry_lock);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
int i;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
__allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -3133,7 +3135,7 @@ next:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
trimmed += __wait_all_discard_cmd(sbi, NULL);
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto next;
}
skip:
@@ -3192,9 +3194,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (sbi->discard_blks == 0)
goto out;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
if (err)
goto out;
@@ -3431,7 +3433,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
struct seg_entry *se = NULL;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
@@ -3514,7 +3516,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
@@ -3550,7 +3552,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
if (keep_order)
- down_read(&fio->sbi->io_order_lock);
+ f2fs_down_read(&fio->sbi->io_order_lock);
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio);
@@ -3570,7 +3572,7 @@ reallocate:
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
if (keep_order)
- up_read(&fio->sbi->io_order_lock);
+ f2fs_up_read(&fio->sbi->io_order_lock);
}
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -3705,7 +3707,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
se = get_seg_entry(sbi, segno);
type = se->type;
- down_write(&SM_I(sbi)->curseg_lock);
+ f2fs_down_write(&SM_I(sbi)->curseg_lock);
if (!recover_curseg) {
/* for recovery flow */
@@ -3774,7 +3776,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
- up_write(&SM_I(sbi)->curseg_lock);
+ f2fs_up_write(&SM_I(sbi)->curseg_lock);
}
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -4789,6 +4791,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
sanity_check_seg_type(sbi, curseg->seg_type);
+ if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
+ f2fs_err(sbi,
+ "Current segment has invalid alloc_type:%d",
+ curseg->alloc_type);
+ return -EFSCORRUPTED;
+ }
+
if (f2fs_test_bit(blkofs, se->cur_valid_map))
goto out;
@@ -5258,7 +5267,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sm_info->sit_entry_set);
- init_rwsem(&sm_info->curseg_lock);
+ init_f2fs_rwsem(&sm_info->curseg_lock);
if (!f2fs_readonly(sbi->sb)) {
err = f2fs_create_flush_cmd_control(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0291cd55cf09..5c94caf0c0a1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -651,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi)
* pages over min_fsync_blocks. (=default option)
* F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
* F2FS_IPU_NOCACHE - disable IPU bio cache.
- * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
+ * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has
+ * FI_OPU_WRITE flag.
+ * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
@@ -667,6 +669,7 @@ enum {
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
F2FS_IPU_NOCACHE,
+ F2FS_IPU_HONOR_OPU_WRITE,
};
static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 76e6a3df9aba..ea939db18f88 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -257,7 +257,7 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
va_end(args);
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
static const struct f2fs_sb_encodings {
__u16 magic;
char *name;
@@ -1259,7 +1259,7 @@ default_check:
return -EINVAL;
}
#endif
-#ifndef CONFIG_UNICODE
+#if !IS_ENABLED(CONFIG_UNICODE)
if (f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
@@ -1345,8 +1345,12 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
{
struct f2fs_inode_info *fi;
- fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
- GFP_F2FS_ZERO, false, F2FS_SB(sb));
+ if (time_to_inject(F2FS_SB(sb), FAULT_SLAB_ALLOC)) {
+ f2fs_show_injection_info(F2FS_SB(sb), FAULT_SLAB_ALLOC);
+ return NULL;
+ }
+
+ fi = alloc_inode_sb(sb, f2fs_inode_cachep, GFP_F2FS_ZERO);
if (!fi)
return NULL;
@@ -1355,16 +1359,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
atomic_set(&fi->i_compr_blocks, 0);
- init_rwsem(&fi->i_sem);
+ init_f2fs_rwsem(&fi->i_sem);
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
- init_rwsem(&fi->i_gc_rwsem[READ]);
- init_rwsem(&fi->i_gc_rwsem[WRITE]);
- init_rwsem(&fi->i_xattr_sem);
+ init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
+ init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
+ init_f2fs_rwsem(&fi->i_xattr_sem);
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
@@ -1501,8 +1505,9 @@ static void f2fs_free_inode(struct inode *inode)
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
{
- percpu_counter_destroy(&sbi->alloc_valid_block_count);
percpu_counter_destroy(&sbi->total_valid_inode_count);
+ percpu_counter_destroy(&sbi->rf_node_block_count);
+ percpu_counter_destroy(&sbi->alloc_valid_block_count);
}
static void destroy_device_list(struct f2fs_sb_info *sbi)
@@ -1619,7 +1624,7 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
@@ -1662,11 +1667,15 @@ static int f2fs_freeze(struct super_block *sb)
/* ensure no checkpoint required */
if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
return -EINVAL;
+
+ /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
+ set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}
static int f2fs_unfreeze(struct super_block *sb)
{
+ clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}
@@ -2075,6 +2084,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
{
unsigned int s_flags = sbi->sb->s_flags;
struct cp_control cpc;
+ unsigned int gc_mode;
int err = 0;
int ret;
block_t unusable;
@@ -2087,8 +2097,11 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
f2fs_update_time(sbi, DISABLE_TIME);
+ gc_mode = sbi->gc_mode;
+ sbi->gc_mode = GC_URGENT_HIGH;
+
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err == -ENODATA) {
err = 0;
@@ -2110,7 +2123,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto restore_flag;
}
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
err = f2fs_write_checkpoint(sbi, &cpc);
@@ -2122,8 +2135,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->stat_lock);
out_unlock:
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
restore_flag:
+ sbi->gc_mode = gc_mode;
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return err;
}
@@ -2135,19 +2149,18 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
/* we should flush all the data to keep data consistency */
do {
sync_inodes_sb(sbi->sb);
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
if (unlikely(retry < 0))
f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
clear_sbi_flag(sbi, SBI_CP_DISABLED);
set_sbi_flag(sbi, SBI_IS_DIRTY);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
f2fs_sync_fs(sbi->sb, 1);
}
@@ -2504,8 +2517,7 @@ retry:
&page, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@ -2688,7 +2700,7 @@ int f2fs_quota_sync(struct super_block *sb, int type)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct quota_info *dqopt = sb_dqopt(sb);
int cnt;
- int ret;
+ int ret = 0;
/*
* Now when everything is written we can discard the pagecache so
@@ -2699,26 +2711,26 @@ int f2fs_quota_sync(struct super_block *sb, int type)
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_active(sb, type))
- return 0;
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
inode_lock(dqopt->files[cnt]);
/*
* do_quotactl
* f2fs_quota_sync
- * down_read(quota_sem)
+ * f2fs_down_read(quota_sem)
* dquot_writeback_dquots()
* f2fs_dquot_commit
* block_operation
- * down_read(quota_sem)
+ * f2fs_down_read(quota_sem)
*/
f2fs_lock_op(sbi);
- down_read(&sbi->quota_sem);
+ f2fs_down_read(&sbi->quota_sem);
ret = f2fs_quota_sync_file(sbi, cnt);
- up_read(&sbi->quota_sem);
+ f2fs_up_read(&sbi->quota_sem);
f2fs_unlock_op(sbi);
inode_unlock(dqopt->files[cnt]);
@@ -2843,11 +2855,11 @@ static int f2fs_dquot_commit(struct dquot *dquot)
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
- down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING);
+ f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING);
ret = dquot_commit(dquot);
if (ret < 0)
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
+ f2fs_up_read(&sbi->quota_sem);
return ret;
}
@@ -2856,11 +2868,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot)
struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
- down_read(&sbi->quota_sem);
+ f2fs_down_read(&sbi->quota_sem);
ret = dquot_acquire(dquot);
if (ret < 0)
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
+ f2fs_up_read(&sbi->quota_sem);
return ret;
}
@@ -3574,6 +3586,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
+ sbi->gc_mode = GC_NORMAL;
sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
@@ -3601,14 +3614,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- init_rwsem(&sbi->io_order_lock);
+ init_f2fs_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
sbi->dirty_device = 0;
spin_lock_init(&sbi->dev_lock);
- init_rwsem(&sbi->sb_lock);
- init_rwsem(&sbi->pin_sem);
+ init_f2fs_rwsem(&sbi->sb_lock);
+ init_f2fs_rwsem(&sbi->pin_sem);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -3619,11 +3632,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
if (err)
return err;
+ err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL);
+ if (err)
+ goto err_valid_block;
+
err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
GFP_KERNEL);
if (err)
- percpu_counter_destroy(&sbi->alloc_valid_block_count);
+ goto err_node_block;
+ return 0;
+err_node_block:
+ percpu_counter_destroy(&sbi->rf_node_block_count);
+err_valid_block:
+ percpu_counter_destroy(&sbi->alloc_valid_block_count);
return err;
}
@@ -3903,7 +3925,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
{
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) {
const struct f2fs_sb_encodings *encoding_info;
struct unicode_map *encoding;
@@ -3957,7 +3979,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
if (f2fs_block_unit_discard(sbi))
sm_i->dcc_info->discard_granularity = 1;
- sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
+ sm_i->ipu_policy = 1 << F2FS_IPU_FORCE |
+ 1 << F2FS_IPU_HONOR_OPU_WRITE;
}
sbi->readdir_ra = 1;
@@ -4067,11 +4090,11 @@ try_onemore:
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
- init_rwsem(&sbi->gc_lock);
+ init_f2fs_rwsem(&sbi->gc_lock);
mutex_init(&sbi->writepages);
- init_rwsem(&sbi->cp_global_sem);
- init_rwsem(&sbi->node_write);
- init_rwsem(&sbi->node_change);
+ init_f2fs_rwsem(&sbi->cp_global_sem);
+ init_f2fs_rwsem(&sbi->node_write);
+ init_f2fs_rwsem(&sbi->node_change);
/* disallow all the data/node/meta page writes */
set_sbi_flag(sbi, SBI_POR_DOING);
@@ -4092,18 +4115,18 @@ try_onemore:
}
for (j = HOT; j < n; j++) {
- init_rwsem(&sbi->write_io[i][j].io_rwsem);
+ init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
sbi->write_io[i][j].sbi = sbi;
sbi->write_io[i][j].bio = NULL;
spin_lock_init(&sbi->write_io[i][j].io_lock);
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
- init_rwsem(&sbi->write_io[i][j].bio_list_lock);
+ init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
}
}
- init_rwsem(&sbi->cp_rwsem);
- init_rwsem(&sbi->quota_sem);
+ init_f2fs_rwsem(&sbi->cp_rwsem);
+ init_f2fs_rwsem(&sbi->quota_sem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
@@ -4458,7 +4481,7 @@ free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
sb->s_encoding = NULL;
#endif
@@ -4528,7 +4551,7 @@ static struct file_system_type f2fs_fs_type = {
.name = "f2fs",
.mount = f2fs_mount,
.kill_sb = kill_f2fs_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("f2fs");
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index df406c16b2eb..4c50aedd5144 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -41,6 +41,16 @@ enum {
ATGC_INFO, /* struct atgc_management */
};
+static const char *gc_mode_names[MAX_GC_MODE] = {
+ "GC_NORMAL",
+ "GC_IDLE_CB",
+ "GC_IDLE_GREEDY",
+ "GC_IDLE_AT",
+ "GC_URGENT_HIGH",
+ "GC_URGENT_LOW",
+ "GC_URGENT_MID"
+};
+
struct f2fs_attr {
struct attribute attr;
ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
@@ -201,7 +211,7 @@ static ssize_t unusable_show(struct f2fs_attr *a,
static ssize_t encoding_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = sbi->sb;
if (f2fs_sb_has_casefold(sbi))
@@ -316,8 +326,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
#endif
+ if (!strcmp(a->attr.name, "gc_urgent"))
+ return sysfs_emit(buf, "%s\n",
+ gc_mode_names[sbi->gc_mode]);
+
if (!strcmp(a->attr.name, "gc_segment_mode"))
- return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode);
+ return sysfs_emit(buf, "%s\n",
+ gc_mode_names[sbi->gc_segment_mode]);
if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
return sysfs_emit(buf, "%u\n",
@@ -363,7 +378,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN)
return -EINVAL;
- down_write(&sbi->sb_lock);
+ f2fs_down_write(&sbi->sb_lock);
ret = f2fs_update_extension_list(sbi, name, hot, set);
if (ret)
@@ -373,7 +388,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (ret)
f2fs_update_extension_list(sbi, name, hot, !set);
out:
- up_write(&sbi->sb_lock);
+ f2fs_up_write(&sbi->sb_lock);
return ret ? ret : count;
}
@@ -468,6 +483,13 @@ out:
}
} else if (t == 2) {
sbi->gc_mode = GC_URGENT_LOW;
+ } else if (t == 3) {
+ sbi->gc_mode = GC_URGENT_MID;
+ if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
+ wake_up_interruptible_all(
+ &sbi->gc_thread->gc_wait_queue_head);
+ }
} else {
return -EINVAL;
}
@@ -481,7 +503,7 @@ out:
} else if (t == GC_IDLE_AT) {
if (!sbi->am.atgc_enabled)
return -EINVAL;
- sbi->gc_mode = GC_AT;
+ sbi->gc_mode = GC_IDLE_AT;
} else {
sbi->gc_mode = GC_NORMAL;
}
@@ -716,6 +738,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
@@ -728,6 +754,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
@@ -778,7 +805,7 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks);
#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption);
F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
F2FS_FEATURE_RO_ATTR(encrypted_casefold);
#endif
#endif /* CONFIG_FS_ENCRYPTION */
@@ -797,7 +824,7 @@ F2FS_FEATURE_RO_ATTR(lost_found);
F2FS_FEATURE_RO_ATTR(verity);
#endif
F2FS_FEATURE_RO_ATTR(sb_checksum);
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
F2FS_FEATURE_RO_ATTR(casefold);
#endif
F2FS_FEATURE_RO_ATTR(readonly);
@@ -832,6 +859,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(reclaim_segments),
ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
+ ATTR_LIST(max_discard_request),
+ ATTR_LIST(min_discard_issue_time),
+ ATTR_LIST(mid_discard_issue_time),
+ ATTR_LIST(max_discard_issue_time),
ATTR_LIST(discard_granularity),
ATTR_LIST(pending_discard),
ATTR_LIST(batched_trim_sections),
@@ -847,6 +878,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
+ ATTR_LIST(max_roll_forward_node_blocks),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
ATTR_LIST(discard_idle_interval),
@@ -910,7 +942,7 @@ static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_FS_ENCRYPTION
ATTR_LIST(encryption),
ATTR_LIST(test_dummy_encryption_v2),
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
ATTR_LIST(encrypted_casefold),
#endif
#endif /* CONFIG_FS_ENCRYPTION */
@@ -929,7 +961,7 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(verity),
#endif
ATTR_LIST(sb_checksum),
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
ATTR_LIST(casefold),
#endif
ATTR_LIST(readonly),
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index fe5acdccaae1..3d793202cc9f 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -208,7 +208,7 @@ cleanup:
* from re-instantiating cached pages we are truncating (since unlike
* normal file accesses, garbage collection isn't limited by i_size).
*/
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
truncate_inode_pages(inode->i_mapping, inode->i_size);
err2 = f2fs_truncate(inode);
if (err2) {
@@ -216,7 +216,7 @@ cleanup:
err2);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
return err ?: err2;
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8e5cd9c916ff..c76c15086e5f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -525,10 +525,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- down_read(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr, &base_size, &is_inline);
- up_read(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -562,9 +562,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
int error;
size_t rest = buffer_size;
- down_read(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = read_all_xattrs(inode, NULL, &base_addr);
- up_read(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -786,9 +786,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
- down_write(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_xattr_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
- up_write(&F2FS_I(inode)->i_xattr_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_xattr_sem);
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a6f1c6d426d1..bf6051bdf1d1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -342,7 +342,8 @@ int fat_block_truncate_page(struct inode *inode, loff_t from)
}
static const struct address_space_operations fat_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = fat_readpage,
.readahead = fat_readahead,
.writepage = fat_writepage,
@@ -745,7 +746,7 @@ static struct kmem_cache *fat_inode_cachep;
static struct inode *fat_alloc_inode(struct super_block *sb)
{
struct msdos_inode_info *ei;
- ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, fat_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
diff --git a/fs/file_table.c b/fs/file_table.c
index 57edef16dce4..7d2e692b66a9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -27,6 +27,7 @@
#include <linux/task_work.h>
#include <linux/ima.h>
#include <linux/swap.h>
+#include <linux/kmemleak.h>
#include <linux/atomic.h>
@@ -119,6 +120,11 @@ static struct ctl_table fs_stat_sysctls[] = {
static int __init init_fs_stat_sysctls(void)
{
register_sysctl_init("fs", fs_stat_sysctls);
+ if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
+ struct ctl_table_header *hdr;
+ hdr = register_sysctl_mount_point("fs/binfmt_misc");
+ kmemleak_not_leak(hdr);
+ }
return 0;
}
fs_initcall(init_fs_stat_sysctls);
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 578a5062706e..22eed5a73ac2 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -124,7 +124,7 @@ static struct inode *vxfs_alloc_inode(struct super_block *sb)
{
struct vxfs_inode_info *vi;
- vi = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL);
+ vi = alloc_inode_sb(sb, vxfs_inode_cachep, GFP_KERNEL);
if (!vi)
return NULL;
inode_init_once(&vi->vfs_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f8d7fe6db989..591fe9cf1659 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -894,43 +894,6 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
/**
- * inode_congested - test whether an inode is congested
- * @inode: inode to test for congestion (may be NULL)
- * @cong_bits: mask of WB_[a]sync_congested bits to test
- *
- * Tests whether @inode is congested. @cong_bits is the mask of congestion
- * bits to test and the return value is the mask of set bits.
- *
- * If cgroup writeback is enabled for @inode, the congestion state is
- * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
- * associated with @inode is congested; otherwise, the root wb's congestion
- * state is used.
- *
- * @inode is allowed to be NULL as this function is often called on
- * mapping->host which is NULL for the swapper space.
- */
-int inode_congested(struct inode *inode, int cong_bits)
-{
- /*
- * Once set, ->i_wb never becomes NULL while the inode is alive.
- * Start transaction iff ->i_wb is visible.
- */
- if (inode && inode_to_wb_is_valid(inode)) {
- struct bdi_writeback *wb;
- struct wb_lock_cookie lock_cookie = {};
- bool congested;
-
- wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
- congested = wb_congested(wb, cong_bits);
- unlocked_inode_to_wb_end(inode, &lock_cookie);
- return congested;
- }
-
- return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
-}
-EXPORT_SYMBOL_GPL(inode_congested);
-
-/**
* wb_split_bdi_pages - split nr_pages to write according to bandwidth
* @wb: target bdi_writeback to split @nr_pages to
* @nr_pages: number of pages to write for the whole bdi
@@ -1903,8 +1866,7 @@ static long writeback_sb_inodes(struct super_block *sb,
* unplug, so get our IOs out the door before we
* give up the CPU.
*/
- if (current->plug)
- blk_flush_plug(current->plug, false);
+ blk_flush_plug(current->plug, false);
cond_resched();
}
@@ -2234,7 +2196,6 @@ void wb_workfn(struct work_struct *work)
long pages_written;
set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
- current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
!test_bit(WB_registered, &wb->state))) {
@@ -2263,8 +2224,6 @@ void wb_workfn(struct work_struct *work)
wb_wakeup(wb);
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
wb_wakeup_delayed(wb);
-
- current->flags &= ~PF_SWAPWRITE;
}
/*
@@ -2301,8 +2260,7 @@ void wakeup_flusher_threads(enum wb_reason reason)
/*
* If we are expecting writeback progress we must submit plugged IO.
*/
- if (blk_needs_flush_plug(current))
- blk_flush_plug(current->plug, true);
+ blk_flush_plug(current->plug, true);
rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
diff --git a/fs/fscache/io.c b/fs/fscache/io.c
index 7a769ea57720..c8c7fe9e9a6e 100644
--- a/fs/fscache/io.c
+++ b/fs/fscache/io.c
@@ -159,27 +159,29 @@ int __fscache_begin_write_operation(struct netfs_cache_resources *cres,
EXPORT_SYMBOL(__fscache_begin_write_operation);
/**
- * fscache_set_page_dirty - Mark page dirty and pin a cache object for writeback
- * @page: The page being dirtied
+ * fscache_dirty_folio - Mark folio dirty and pin a cache object for writeback
+ * @mapping: The mapping the folio belongs to.
+ * @folio: The folio being dirtied.
* @cookie: The cookie referring to the cache object
*
- * Set the dirty flag on a page and pin an in-use cache object in memory when
- * dirtying a page so that writeback can later write to it. This is intended
- * to be called from the filesystem's ->set_page_dirty() method.
+ * Set the dirty flag on a folio and pin an in-use cache object in memory
+ * so that writeback can later write to it. This is intended
+ * to be called from the filesystem's ->dirty_folio() method.
*
- * Returns 1 if PG_dirty was set on the page, 0 otherwise.
+ * Return: true if the dirty flag was set on the folio, false otherwise.
*/
-int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie)
+bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
+ struct fscache_cookie *cookie)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = mapping->host;
bool need_use = false;
_enter("");
- if (!__set_page_dirty_nobuffers(page))
- return 0;
+ if (!filemap_dirty_folio(mapping, folio))
+ return false;
if (!fscache_cookie_valid(cookie))
- return 1;
+ return true;
if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
spin_lock(&inode->i_lock);
@@ -192,9 +194,9 @@ int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie)
if (need_use)
fscache_use_cookie(cookie, true);
}
- return 1;
+ return true;
}
-EXPORT_SYMBOL(fscache_set_page_dirty);
+EXPORT_SYMBOL(fscache_dirty_folio);
struct fscache_write_request {
struct netfs_cache_resources cache_resources;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 000d2e5627e9..7cede9a3bc96 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -164,7 +164,6 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
{
unsigned val;
struct fuse_conn *fc;
- struct fuse_mount *fm;
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -178,22 +177,6 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
down_read(&fc->killsb);
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
-
- /*
- * Get any fuse_mount belonging to this fuse_conn; s_bdi is
- * shared between all of them
- */
-
- if (!list_empty(&fc->mounts)) {
- fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
- if (fc->num_background < fc->congestion_threshold) {
- clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
- clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
- } else {
- set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
- set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
- }
- }
spin_unlock(&fc->bg_lock);
up_read(&fc->killsb);
fuse_conn_put(fc);
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 182b24a14804..d7d3a7f06862 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1326,8 +1326,7 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
static const struct address_space_operations fuse_dax_file_aops = {
.writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
};
static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cd54a529460d..0e537e580dc1 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -315,10 +315,6 @@ void fuse_request_end(struct fuse_req *req)
wake_up(&fc->blocked_waitq);
}
- if (fc->num_background == fc->congestion_threshold && fm->sb) {
- clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
- clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
- }
fc->num_background--;
fc->active_background--;
flush_bg_queue(fc);
@@ -540,10 +536,6 @@ static bool fuse_request_queue_background(struct fuse_req *req)
fc->num_background++;
if (fc->num_background == fc->max_background)
fc->blocked = 1;
- if (fc->num_background == fc->congestion_threshold && fm->sb) {
- set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
- set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
- }
list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc);
queued = true;
@@ -941,7 +933,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
while (count) {
if (cs->write && cs->pipebufs && page) {
- return fuse_ref_page(cs, page, offset, count);
+ /*
+ * Can't control lifetime of pipe buffers, so always
+ * copy user pages.
+ */
+ if (cs->req->args->user_pages) {
+ err = fuse_copy_fill(cs);
+ if (err)
+ return err;
+ } else {
+ return fuse_ref_page(cs, page, offset, count);
+ }
} else if (!cs->len) {
if (cs->move_pages && page &&
offset == 0 && count == PAGE_SIZE) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 656e921f3506..9ff27b8a9782 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1773,7 +1773,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
/*
* Only call invalidate_inode_pages2() after removing
- * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+ * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
*/
if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 829094451774..f18d14d5fea1 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -966,6 +966,14 @@ static void fuse_readahead(struct readahead_control *rac)
struct fuse_io_args *ia;
struct fuse_args_pages *ap;
+ if (fc->num_background >= fc->congestion_threshold &&
+ rac->ra->async_size >= readahead_count(rac))
+ /*
+ * Congested and only async pages left, so skip the
+ * rest.
+ */
+ break;
+
nr_pages = readahead_count(rac) - nr_pages;
if (nr_pages > max_pages)
nr_pages = max_pages;
@@ -1413,6 +1421,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
+ ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
else
@@ -1958,6 +1967,7 @@ err:
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
int err;
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
@@ -1973,6 +1983,10 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
return 0;
}
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ fc->num_background >= fc->congestion_threshold)
+ return AOP_WRITEPAGE_ACTIVATE;
+
err = fuse_writepage_locked(page);
unlock_page(page);
@@ -2226,6 +2240,10 @@ static int fuse_writepages(struct address_space *mapping,
if (fuse_is_bad(inode))
goto out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ fc->num_background >= fc->congestion_threshold)
+ return 0;
+
data.inode = inode;
data.wpa = NULL;
data.ff = NULL;
@@ -2330,17 +2348,17 @@ unlock:
return copied;
}
-static int fuse_launder_page(struct page *page)
+static int fuse_launder_folio(struct folio *folio)
{
int err = 0;
- if (clear_page_dirty_for_io(page)) {
- struct inode *inode = page->mapping->host;
+ if (folio_clear_dirty_for_io(folio)) {
+ struct inode *inode = folio->mapping->host;
/* Serialize with pending writeback for the same page */
- fuse_wait_on_page_writeback(inode, page->index);
- err = fuse_writepage_locked(page);
+ fuse_wait_on_page_writeback(inode, folio->index);
+ err = fuse_writepage_locked(&folio->page);
if (!err)
- fuse_wait_on_page_writeback(inode, page->index);
+ fuse_wait_on_page_writeback(inode, folio->index);
}
return err;
}
@@ -3161,8 +3179,8 @@ static const struct address_space_operations fuse_file_aops = {
.readahead = fuse_readahead,
.writepage = fuse_writepage,
.writepages = fuse_writepages,
- .launder_page = fuse_launder_page,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .launder_folio = fuse_launder_folio,
+ .dirty_folio = filemap_dirty_folio,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
.write_begin = fuse_write_begin,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e59fbdefeb..eac4984cc753 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -256,6 +256,7 @@ struct fuse_args {
bool nocreds:1;
bool in_pages:1;
bool out_pages:1;
+ bool user_pages:1;
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ee846ce371d8..8c0665c5dff8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -23,6 +23,7 @@
#include <linux/exportfs.h>
#include <linux/posix_acl.h>
#include <linux/pid_namespace.h>
+#include <uapi/linux/magic.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -50,8 +51,6 @@ MODULE_PARM_DESC(max_user_congthresh,
"Global limit for the maximum congestion threshold an "
"unprivileged user can set");
-#define FUSE_SUPER_MAGIC 0x65735546
-
#define FUSE_DEFAULT_BLKSIZE 512
/** Maximum number of outstanding background requests */
@@ -73,7 +72,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct fuse_inode *fi;
- fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
+ fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
if (!fi)
return NULL;
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index fbc09dab1f85..df58966bc874 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -394,9 +394,12 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
args.out_args[1].value = ptr;
err = fuse_simple_request(fm, &args);
- if (!err && outarg.flags & FUSE_IOCTL_RETRY)
- err = -EIO;
-
+ if (!err) {
+ if (outarg.result < 0)
+ err = outarg.result;
+ else if (outarg.flags & FUSE_IOCTL_RETRY)
+ err = -EIO;
+ }
return err;
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 9d737904d07c..86b7dbb6a0d4 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -8,6 +8,7 @@
#include <linux/dax.h>
#include <linux/pci.h>
#include <linux/pfn_t.h>
+#include <linux/memremap.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_fs.h>
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 005e920f5d4a..72c9f31ce724 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -606,18 +606,12 @@ out:
gfs2_trans_end(sdp);
}
-/**
- * jdata_set_page_dirty - Page dirtying function
- * @page: The page to dirty
- *
- * Returns: 1 if it dirtyed the page, or 0 otherwise
- */
-
-static int jdata_set_page_dirty(struct page *page)
+static bool jdata_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
if (current->journal_info)
- SetPageChecked(page);
- return __set_page_dirty_buffers(page);
+ folio_set_checked(folio);
+ return block_dirty_folio(mapping, folio);
}
/**
@@ -672,22 +666,23 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
unlock_buffer(bh);
}
-static void gfs2_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void gfs2_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
- unsigned int stop = offset + length;
- int partial_page = (offset || length < PAGE_SIZE);
+ struct gfs2_sbd *sdp = GFS2_SB(folio->mapping->host);
+ size_t stop = offset + length;
+ int partial_page = (offset || length < folio_size(folio));
struct buffer_head *bh, *head;
unsigned long pos = 0;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
if (!partial_page)
- ClearPageChecked(page);
- if (!page_has_buffers(page))
+ folio_clear_checked(folio);
+ head = folio_buffers(folio);
+ if (!head)
goto out;
- bh = head = page_buffers(page);
+ bh = head;
do {
if (pos + bh->b_size > stop)
return;
@@ -699,7 +694,7 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset,
} while (bh != head);
out:
if (!partial_page)
- try_to_release_page(page, 0);
+ filemap_release_folio(folio, 0);
}
/**
@@ -779,9 +774,9 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readahead = gfs2_readahead,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.releasepage = iomap_releasepage,
- .invalidatepage = iomap_invalidatepage,
+ .invalidate_folio = iomap_invalidate_folio,
.bmap = gfs2_bmap,
.direct_IO = noop_direct_IO,
.migratepage = iomap_migrate_page,
@@ -794,9 +789,9 @@ static const struct address_space_operations gfs2_jdata_aops = {
.writepages = gfs2_jdata_writepages,
.readpage = gfs2_readpage,
.readahead = gfs2_readahead,
- .set_page_dirty = jdata_set_page_dirty,
+ .dirty_folio = jdata_dirty_folio,
.bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
+ .invalidate_folio = gfs2_invalidate_folio,
.releasepage = gfs2_releasepage,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e718cfc19a7..8c39a8571b1f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -704,10 +704,11 @@ static int gfs2_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
- if (gfs2_rs_active(&ip->i_res))
- gfs2_rs_delete(ip, &inode->i_writecount);
- if (file->f_mode & FMODE_WRITE)
+ if (file->f_mode & FMODE_WRITE) {
+ if (gfs2_rs_active(&ip->i_res))
+ gfs2_rs_delete(ip, &inode->i_writecount);
gfs2_qa_put(ip);
+ }
return 0;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b7ab8430333c..6b23399eaee0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -301,9 +301,6 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl)
void gfs2_glock_put(struct gfs2_glock *gl)
{
- /* last put could call sleepable dlm api */
- might_sleep();
-
if (lockref_put_or_lock(&gl->gl_lockref))
return;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ca0bb3a73912..4ae1eefae616 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -265,10 +265,9 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
bio_end_io_t *end_io)
{
struct super_block *sb = sdp->sd_vfs;
- struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+ struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
- bio_set_dev(bio, sb->s_bdev);
bio->bi_end_io = end_io;
bio->bi_private = sdp;
@@ -489,10 +488,9 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
{
struct bio *new;
- new = bio_alloc(GFP_NOIO, nr_iovecs);
- bio_copy_dev(new, prev);
+ new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
+ bio_clone_blkg_association(new, prev);
new->bi_iter.bi_sector = bio_end_sector(prev);
- new->bi_opf = prev->bi_opf;
new->bi_write_hint = prev->bi_write_hint;
bio_chain(new, prev);
submit_bio(prev);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 72d30a682ece..d8bd1d48bd78 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -89,13 +89,15 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
}
const struct address_space_operations gfs2_meta_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
const struct address_space_operations gfs2_rgrp_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
@@ -222,9 +224,8 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
struct buffer_head *bh = *bhs;
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, num);
+ bio = bio_alloc(bh->b_bdev, num, op | op_flags, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio_set_dev(bio, bh->b_bdev);
while (num > 0) {
bh = *bhs;
if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
@@ -235,7 +236,6 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
num--;
}
bio->bi_end_io = gfs2_meta_read_endio;
- bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
}
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7f8410d8fdc1..c9b423c874a3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -251,14 +251,12 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
ClearPageDirty(page);
lock_page(page);
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS);
bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
- bio_set_dev(bio, sb->s_bdev);
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = end_bio_io_page;
bio->bi_private = page;
- bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
submit_bio(bio);
wait_on_page_locked(page);
bio_put(bio);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 64c67090f503..cf9cf66522b3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1425,7 +1425,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
struct gfs2_inode *ip;
- ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
+ ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
if (!ip)
return NULL;
ip->i_flags = 0;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a6002b2d146d..d87ea98cf535 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -15,7 +15,7 @@
#include <linux/kobject.h>
#include <linux/uaccess.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a5143246282..55f45e9b4930 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -159,7 +159,8 @@ static int hfs_writepages(struct address_space *mapping,
}
const struct address_space_operations hfs_btree_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = hfs_readpage,
.writepage = hfs_writepage,
.write_begin = hfs_write_begin,
@@ -169,7 +170,8 @@ const struct address_space_operations hfs_btree_aops = {
};
const struct address_space_operations hfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = hfs_readpage,
.writepage = hfs_writepage,
.write_begin = hfs_write_begin,
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 5beb82652435..8082eb01127c 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -9,7 +9,7 @@
*/
#include <linux/cdrom.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <linux/nls.h>
#include <linux/slab.h>
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 12d9bae39363..6764afa98a6f 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -162,7 +162,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
{
struct hfs_inode_info *i;
- i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL);
+ i = alloc_inode_sb(sb, hfs_inode_cachep, GFP_KERNEL);
return i ? &i->vfs_inode : NULL;
}
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d08a8d1d40a4..446a816aa8e1 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -156,7 +156,8 @@ static int hfsplus_writepages(struct address_space *mapping,
}
const struct address_space_operations hfsplus_btree_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = hfsplus_readpage,
.writepage = hfsplus_writepage,
.write_begin = hfsplus_write_begin,
@@ -166,7 +167,8 @@ const struct address_space_operations hfsplus_btree_aops = {
};
const struct address_space_operations hfsplus_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = hfsplus_readpage,
.writepage = hfsplus_writepage,
.write_begin = hfsplus_write_begin,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b9e3db3f855f..8479add998b5 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -624,7 +624,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
{
struct hfsplus_inode_info *i;
- i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL);
+ i = alloc_inode_sb(sb, hfsplus_inode_cachep, GFP_KERNEL);
return i ? &i->vfs_inode : NULL;
}
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 51ae6f1eb4a5..0b8ad6586df5 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -12,7 +12,6 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/cdrom.h>
-#include <linux/genhd.h>
#include <asm/unaligned.h>
#include "hfsplus_fs.h"
@@ -64,10 +63,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
offset = start & (io_size - 1);
sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
- bio = bio_alloc(GFP_NOIO, 1);
+ bio = bio_alloc(sb->s_bdev, 1, op | op_flags, GFP_NOIO);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, sb->s_bdev);
- bio_set_op_attrs(bio, op, op_flags);
if (op != WRITE && data)
*data = (u8 *)buf + offset;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ef481c3d9019..14f9ac973a2e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -14,6 +14,7 @@
#include <linux/statfs.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/writeback.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include "hostfs.h"
@@ -222,7 +223,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
{
struct hostfs_inode_info *hi;
- hi = kmem_cache_alloc(hostfs_inode_cache, GFP_KERNEL_ACCOUNT);
+ hi = alloc_inode_sb(sb, hostfs_inode_cache, GFP_KERNEL_ACCOUNT);
if (hi == NULL)
return NULL;
hi->fd = -1;
@@ -504,7 +505,7 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
static const struct address_space_operations hostfs_aops = {
.writepage = hostfs_writepage,
.readpage = hostfs_readpage,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.write_begin = hostfs_write_begin,
.write_end = hostfs_write_end,
};
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fb37f57130aa..99493a23c5d0 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -245,7 +245,8 @@ static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
const struct address_space_operations hpfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = hpfs_readpage,
.writepage = hpfs_writepage,
.readahead = hpfs_readahead,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a7dbfc892022..1cb89595b875 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -232,7 +232,7 @@ static struct kmem_cache * hpfs_inode_cachep;
static struct inode *hpfs_alloc_inode(struct super_block *sb)
{
struct hpfs_inode_info *ei;
- ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, hpfs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a7c6c7498be0..99c7477cee5c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1110,7 +1110,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
return NULL;
- p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
+ p = alloc_inode_sb(sb, hugetlbfs_inode_cachep, GFP_KERNEL);
if (unlikely(!p)) {
hugetlbfs_inc_free_inodes(sbinfo);
return NULL;
@@ -1144,7 +1144,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
static const struct address_space_operations hugetlbfs_aops = {
.write_begin = hugetlbfs_write_begin,
.write_end = hugetlbfs_write_end,
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
.migratepage = hugetlbfs_migrate_page,
.error_remove_page = hugetlbfs_error_remove_page,
};
diff --git a/fs/inode.c b/fs/inode.c
index 63324df6fa27..9d9b422504d1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -259,7 +259,7 @@ static struct inode *alloc_inode(struct super_block *sb)
if (ops->alloc_inode)
inode = ops->alloc_inode(sb);
else
- inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
+ inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
if (!inode)
return NULL;
diff --git a/fs/internal.h b/fs/internal.h
index 8590c973c2f4..fb2c2ea807d7 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -158,11 +158,6 @@ extern void dput_to_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *);
/*
- * read_write.c
- */
-extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
-
-/*
* pipe.c
*/
extern const struct file_operations pipefifo_fops;
@@ -184,7 +179,9 @@ int sb_init_dio_done_wq(struct super_block *sb);
/*
* fs/stat.c:
*/
-int do_statx(int dfd, const char __user *filename, unsigned flags,
+
+int getname_statx_lookup_flags(int flags);
+int do_statx(int dfd, struct filename *filename, unsigned int flags,
unsigned int mask, struct statx __user *buffer);
/*
diff --git a/fs/io-wq.c b/fs/io-wq.c
index bb7f161bb19c..5b93fa67d346 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -76,6 +76,7 @@ struct io_wqe_acct {
unsigned max_workers;
int index;
atomic_t nr_running;
+ raw_spinlock_t lock;
struct io_wq_work_list work_list;
unsigned long flags;
};
@@ -91,7 +92,7 @@ enum {
*/
struct io_wqe {
raw_spinlock_t lock;
- struct io_wqe_acct acct[2];
+ struct io_wqe_acct acct[IO_WQ_ACCT_NR];
int node;
@@ -224,12 +225,12 @@ static void io_worker_exit(struct io_worker *worker)
if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
- preempt_disable();
+ raw_spin_unlock(&wqe->lock);
io_wqe_dec_running(worker);
worker->flags = 0;
+ preempt_disable();
current->flags &= ~PF_IO_WORKER;
preempt_enable();
- raw_spin_unlock(&wqe->lock);
kfree_rcu(worker, rcu);
io_worker_ref_put(wqe->wq);
@@ -238,10 +239,15 @@ static void io_worker_exit(struct io_worker *worker)
static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
{
+ bool ret = false;
+
+ raw_spin_lock(&acct->lock);
if (!wq_list_empty(&acct->work_list) &&
!test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
- return true;
- return false;
+ ret = true;
+ raw_spin_unlock(&acct->lock);
+
+ return ret;
}
/*
@@ -385,7 +391,6 @@ fail:
}
static void io_wqe_dec_running(struct io_worker *worker)
- __must_hold(wqe->lock)
{
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
@@ -393,13 +398,14 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (!(worker->flags & IO_WORKER_F_UP))
return;
- if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
- atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
- raw_spin_unlock(&wqe->lock);
- io_queue_worker_create(worker, acct, create_worker_cb);
- raw_spin_lock(&wqe->lock);
- }
+ if (!atomic_dec_and_test(&acct->nr_running))
+ return;
+ if (!io_acct_run_queue(acct))
+ return;
+
+ atomic_inc(&acct->nr_running);
+ atomic_inc(&wqe->wq->worker_refs);
+ io_queue_worker_create(worker, acct, create_worker_cb);
}
/*
@@ -407,11 +413,12 @@ static void io_wqe_dec_running(struct io_worker *worker)
* it's currently on the freelist
*/
static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
- __must_hold(wqe->lock)
{
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
+ raw_spin_lock(&wqe->lock);
hlist_nulls_del_init_rcu(&worker->nulls_node);
+ raw_spin_unlock(&wqe->lock);
}
}
@@ -456,7 +463,7 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
struct io_worker *worker)
- __must_hold(wqe->lock)
+ __must_hold(acct->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work, *tail;
@@ -498,9 +505,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
* work being added and clearing the stalled bit.
*/
set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&acct->lock);
unstalled = io_wait_on_hash(wqe, stall_hash);
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&acct->lock);
if (unstalled) {
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
if (wq_has_sleeper(&wqe->wq->hash->wait))
@@ -538,7 +545,6 @@ static void io_assign_current_work(struct io_worker *worker,
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
static void io_worker_handle_work(struct io_worker *worker)
- __releases(wqe->lock)
{
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
@@ -555,7 +561,9 @@ static void io_worker_handle_work(struct io_worker *worker)
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
+ raw_spin_lock(&acct->lock);
work = io_get_next_work(acct, worker);
+ raw_spin_unlock(&acct->lock);
if (work) {
__io_worker_busy(wqe, worker);
@@ -569,10 +577,9 @@ static void io_worker_handle_work(struct io_worker *worker)
raw_spin_lock(&worker->lock);
worker->next_work = work;
raw_spin_unlock(&worker->lock);
- }
- raw_spin_unlock(&wqe->lock);
- if (!work)
+ } else {
break;
+ }
io_assign_current_work(worker, work);
__set_current_state(TASK_RUNNING);
@@ -608,8 +615,6 @@ static void io_worker_handle_work(struct io_worker *worker)
wake_up(&wq->hash->wait);
}
} while (work);
-
- raw_spin_lock(&wqe->lock);
} while (1);
}
@@ -633,12 +638,10 @@ static int io_wqe_worker(void *data)
long ret;
set_current_state(TASK_INTERRUPTIBLE);
-loop:
- raw_spin_lock(&wqe->lock);
- if (io_acct_run_queue(acct)) {
+ while (io_acct_run_queue(acct))
io_worker_handle_work(worker);
- goto loop;
- }
+
+ raw_spin_lock(&wqe->lock);
/* timed out, exit unless we're the last worker */
if (last_timeout && acct->nr_workers > 1) {
acct->nr_workers--;
@@ -662,10 +665,8 @@ loop:
last_timeout = !ret;
}
- if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- raw_spin_lock(&wqe->lock);
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
io_worker_handle_work(worker);
- }
audit_free(current);
io_worker_exit(worker);
@@ -705,10 +706,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
return;
worker->flags &= ~IO_WORKER_F_RUNNING;
-
- raw_spin_lock(&worker->wqe->lock);
io_wqe_dec_running(worker);
- raw_spin_unlock(&worker->wqe->lock);
}
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
@@ -778,10 +776,12 @@ static void create_worker_cont(struct callback_head *cb)
.cancel_all = true,
};
+ raw_spin_unlock(&wqe->lock);
while (io_acct_cancel_pending_work(wqe, acct, &match))
- raw_spin_lock(&wqe->lock);
+ ;
+ } else {
+ raw_spin_unlock(&wqe->lock);
}
- raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wqe->wq);
kfree(worker);
return;
@@ -914,6 +914,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+ struct io_cb_cancel_data match;
unsigned work_flags = work->flags;
bool do_create;
@@ -927,10 +928,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return;
}
- raw_spin_lock(&wqe->lock);
+ raw_spin_lock(&acct->lock);
io_wqe_insert_work(wqe, work);
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+ raw_spin_unlock(&acct->lock);
+ raw_spin_lock(&wqe->lock);
rcu_read_lock();
do_create = !io_wqe_activate_free_worker(wqe, acct);
rcu_read_unlock();
@@ -946,18 +949,18 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return;
raw_spin_lock(&wqe->lock);
- /* fatal condition, failed to create the first worker */
- if (!acct->nr_workers) {
- struct io_cb_cancel_data match = {
- .fn = io_wq_work_match_item,
- .data = work,
- .cancel_all = false,
- };
-
- if (io_acct_cancel_pending_work(wqe, acct, &match))
- raw_spin_lock(&wqe->lock);
+ if (acct->nr_workers) {
+ raw_spin_unlock(&wqe->lock);
+ return;
}
raw_spin_unlock(&wqe->lock);
+
+ /* fatal condition, failed to create the first worker */
+ match.fn = io_wq_work_match_item,
+ match.data = work,
+ match.cancel_all = false,
+
+ io_acct_cancel_pending_work(wqe, acct, &match);
}
}
@@ -1032,22 +1035,23 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match)
- __releases(wqe->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
+ raw_spin_lock(&acct->lock);
wq_list_for_each(node, prev, &acct->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
io_wqe_remove_pending(wqe, work, prev);
- raw_spin_unlock(&wqe->lock);
+ raw_spin_unlock(&acct->lock);
io_run_cancel(work, wqe);
match->nr_pending++;
/* not safe to continue after unlock */
return true;
}
+ raw_spin_unlock(&acct->lock);
return false;
}
@@ -1061,7 +1065,6 @@ retry:
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
- raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
break;
@@ -1103,13 +1106,11 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
- raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
- if (match.nr_pending && !match.cancel_all) {
- raw_spin_unlock(&wqe->lock);
+ if (match.nr_pending && !match.cancel_all)
return IO_WQ_CANCEL_OK;
- }
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_running_work(wqe, &match);
raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
@@ -1190,6 +1191,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
acct->index = i;
atomic_set(&acct->nr_running, 0);
INIT_WQ_LIST(&acct->work_list);
+ raw_spin_lock_init(&acct->lock);
}
wqe->wq = wq;
raw_spin_lock_init(&wqe->lock);
@@ -1282,9 +1284,7 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
- raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
- raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
@@ -1376,7 +1376,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < IO_WQ_ACCT_NR; i++) {
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
new_count[i] = task_rlimit(current, RLIMIT_NPROC);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e54c4127422e..496a2af7d12c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,6 +63,7 @@
#include <net/sock.h>
#include <net/af_unix.h>
#include <net/scm.h>
+#include <net/busy_poll.h>
#include <linux/anon_inodes.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -263,11 +264,18 @@ struct io_rsrc_data {
bool quiesce;
};
+struct io_buffer_list {
+ struct list_head list;
+ struct list_head buf_list;
+ __u16 bgid;
+};
+
struct io_buffer {
struct list_head list;
__u64 addr;
__u32 len;
__u16 bid;
+ __u16 bgid;
};
struct io_restriction {
@@ -326,6 +334,14 @@ struct io_submit_state {
struct blk_plug plug;
};
+struct io_ev_fd {
+ struct eventfd_ctx *cq_ev_fd;
+ unsigned int eventfd_async: 1;
+ struct rcu_head rcu;
+};
+
+#define IO_BUFFERS_HASH_BITS 5
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
@@ -335,11 +351,11 @@ struct io_ring_ctx {
unsigned int flags;
unsigned int compat: 1;
unsigned int drain_next: 1;
- unsigned int eventfd_async: 1;
unsigned int restricted: 1;
unsigned int off_timeout_used: 1;
unsigned int drain_active: 1;
unsigned int drain_disabled: 1;
+ unsigned int has_evfd: 1;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -378,7 +394,9 @@ struct io_ring_ctx {
struct list_head timeout_list;
struct list_head ltimeout_list;
struct list_head cq_overflow_list;
- struct xarray io_buffers;
+ struct list_head *io_buffers;
+ struct list_head io_buffers_cache;
+ struct list_head apoll_cache;
struct xarray personalities;
u32 pers_next;
unsigned sq_thread_idle;
@@ -395,11 +413,16 @@ struct io_ring_ctx {
struct list_head sqd_list;
unsigned long check_cq_overflow;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ /* used to track busy poll napi_id */
+ struct list_head napi_list;
+ spinlock_t napi_lock; /* napi_list lock */
+#endif
struct {
unsigned cached_cq_tail;
unsigned cq_entries;
- struct eventfd_ctx *cq_ev_fd;
+ struct io_ev_fd __rcu *io_ev_fd;
struct wait_queue_head cq_wait;
unsigned cq_extra;
atomic_t cq_timeouts;
@@ -421,6 +444,8 @@ struct io_ring_ctx {
struct hlist_head *cancel_hash;
unsigned cancel_hash_bits;
bool poll_multi_queue;
+
+ struct list_head io_buffers_comp;
} ____cacheline_aligned_in_smp;
struct io_restriction restrictions;
@@ -436,6 +461,8 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
+
+ struct list_head io_buffers_pages;
};
/* Keep this last, we don't need it for the fast path */
@@ -461,6 +488,11 @@ struct io_ring_ctx {
};
};
+/*
+ * Arbitrary limit, can be raised if need be
+ */
+#define IO_RINGFD_REG_MAX 16
+
struct io_uring_task {
/* submission side */
int cached_refs;
@@ -476,6 +508,7 @@ struct io_uring_task {
struct io_wq_work_list task_list;
struct io_wq_work_list prior_task_list;
struct callback_head task_work;
+ struct file **registered_rings;
bool task_running;
};
@@ -642,7 +675,7 @@ struct io_statx {
int dfd;
unsigned int mask;
unsigned int flags;
- const char __user *filename;
+ struct filename *filename;
struct statx __user *buffer;
};
@@ -690,6 +723,12 @@ struct io_hardlink {
int flags;
};
+struct io_msg {
+ struct file *file;
+ u64 user_data;
+ u32 len;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -741,6 +780,8 @@ enum {
REQ_F_ARM_LTIMEOUT_BIT,
REQ_F_ASYNC_DATA_BIT,
REQ_F_SKIP_LINK_CQES_BIT,
+ REQ_F_SINGLE_POLL_BIT,
+ REQ_F_DOUBLE_POLL_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -799,6 +840,10 @@ enum {
REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
/* don't post CQEs while failing linked requests */
REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
+ /* single poll may be active */
+ REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT),
+ /* double poll may active */
+ REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
};
struct async_poll {
@@ -825,7 +870,7 @@ enum {
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
* access the file pointer through any of the sub-structs,
- * or directly as just 'ki_filp' in this struct.
+ * or directly as just 'file' in this struct.
*/
struct io_kiocb {
union {
@@ -855,6 +900,7 @@ struct io_kiocb {
struct io_mkdir mkdir;
struct io_symlink symlink;
struct io_hardlink hardlink;
+ struct io_msg msg;
};
u8 opcode;
@@ -877,6 +923,7 @@ struct io_kiocb {
/* used by request caches, completion batching and iopoll */
struct io_wq_work_node comp_list;
atomic_t refs;
+ atomic_t poll_refs;
struct io_kiocb *link;
struct io_task_work io_task_work;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
@@ -885,12 +932,11 @@ struct io_kiocb {
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
void *async_data;
- struct io_wq_work work;
/* custom credentials, valid IFF REQ_F_CREDS is set */
- const struct cred *creds;
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
- atomic_t poll_refs;
+ const struct cred *creds;
+ struct io_wq_work work;
};
struct io_tctx_node {
@@ -1105,6 +1151,9 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_MKDIRAT] = {},
[IORING_OP_SYMLINKAT] = {},
[IORING_OP_LINKAT] = {},
+ [IORING_OP_MSG_RING] = {
+ .needs_file = 1,
+ },
};
/* requests with any of those set should undergo io_disarm_next() */
@@ -1141,6 +1190,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
+static void io_eventfd_signal(struct io_ring_ctx *ctx);
static struct kmem_cache *req_cachep;
@@ -1267,36 +1317,88 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
}
}
-static unsigned int __io_put_kbuf(struct io_kiocb *req)
+static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list)
{
struct io_buffer *kbuf = req->kbuf;
unsigned int cflags;
- cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
- cflags |= IORING_CQE_F_BUFFER;
+ cflags = IORING_CQE_F_BUFFER | (kbuf->bid << IORING_CQE_BUFFER_SHIFT);
req->flags &= ~REQ_F_BUFFER_SELECTED;
- kfree(kbuf);
+ list_add(&kbuf->list, list);
req->kbuf = NULL;
return cflags;
}
-static inline unsigned int io_put_kbuf(struct io_kiocb *req)
+static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
{
if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
return 0;
- return __io_put_kbuf(req);
+ return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
}
-static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+static inline unsigned int io_put_kbuf(struct io_kiocb *req,
+ unsigned issue_flags)
{
- bool got = percpu_ref_tryget(ref);
+ unsigned int cflags;
- /* already at zero, wait for ->release() */
- if (!got)
- wait_for_completion(compl);
- percpu_ref_resurrect(ref);
- if (got)
- percpu_ref_put(ref);
+ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ return 0;
+
+ /*
+ * We can add this buffer back to two lists:
+ *
+ * 1) The io_buffers_cache list. This one is protected by the
+ * ctx->uring_lock. If we already hold this lock, add back to this
+ * list as we can grab it from issue as well.
+ * 2) The io_buffers_comp list. This one is protected by the
+ * ctx->completion_lock.
+ *
+ * We migrate buffers from the comp_list to the issue cache list
+ * when we need one.
+ */
+ if (issue_flags & IO_URING_F_UNLOCKED) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ spin_lock(&ctx->completion_lock);
+ cflags = __io_put_kbuf(req, &ctx->io_buffers_comp);
+ spin_unlock(&ctx->completion_lock);
+ } else {
+ cflags = __io_put_kbuf(req, &req->ctx->io_buffers_cache);
+ }
+
+ return cflags;
+}
+
+static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
+{
+ struct list_head *hash_list;
+ struct io_buffer_list *bl;
+
+ hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+ list_for_each_entry(bl, hash_list, list)
+ if (bl->bgid == bgid || bgid == -1U)
+ return bl;
+
+ return NULL;
+}
+
+static void io_kbuf_recycle(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
+ struct io_buffer *buf;
+
+ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ return;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ buf = req->kbuf;
+ bl = io_buffer_get_list(ctx, buf->bgid);
+ list_add(&buf->list, &bl->buf_list);
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
+ req->kbuf = NULL;
}
static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
@@ -1409,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
{
struct io_ring_ctx *ctx;
- int hash_bits;
+ int i, hash_bits;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -1436,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
/* set invalid range, so io_import_fixed() fails meeting it */
ctx->dummy_ubuf->ubuf = -1UL;
+ ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
+ sizeof(struct list_head), GFP_KERNEL);
+ if (!ctx->io_buffers)
+ goto err;
+ for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
+ INIT_LIST_HEAD(&ctx->io_buffers[i]);
+
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
goto err;
@@ -1444,14 +1553,17 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->sqo_sq_wait);
INIT_LIST_HEAD(&ctx->sqd_list);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
+ INIT_LIST_HEAD(&ctx->io_buffers_cache);
+ INIT_LIST_HEAD(&ctx->apoll_cache);
init_completion(&ctx->ref_comp);
- xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->cq_wait);
spin_lock_init(&ctx->completion_lock);
spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
+ INIT_LIST_HEAD(&ctx->io_buffers_pages);
+ INIT_LIST_HEAD(&ctx->io_buffers_comp);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
INIT_LIST_HEAD(&ctx->ltimeout_list);
@@ -1464,10 +1576,15 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_WQ_LIST(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ INIT_LIST_HEAD(&ctx->napi_list);
+ spin_lock_init(&ctx->napi_lock);
+#endif
return ctx;
err:
kfree(ctx->dummy_ubuf);
kfree(ctx->cancel_hash);
+ kfree(ctx->io_buffers);
kfree(ctx);
return NULL;
}
@@ -1610,8 +1727,8 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
req->work.flags |= IO_WQ_WORK_CANCEL;
- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
- &req->work, req->flags);
+ trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
+ &req->work, io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
@@ -1681,22 +1798,27 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
spin_unlock_irq(&ctx->timeout_lock);
}
-static __cold void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
-{
- if (ctx->off_timeout_used)
- io_flush_timeouts(ctx);
- if (ctx->drain_active)
- io_queue_deferred(ctx);
-}
-
static inline void io_commit_cqring(struct io_ring_ctx *ctx)
{
- if (unlikely(ctx->off_timeout_used || ctx->drain_active))
- __io_commit_cqring_flush(ctx);
/* order cqe stores with ring update */
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
}
+static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
+{
+ if (ctx->off_timeout_used || ctx->drain_active) {
+ spin_lock(&ctx->completion_lock);
+ if (ctx->off_timeout_used)
+ io_flush_timeouts(ctx);
+ if (ctx->drain_active)
+ io_queue_deferred(ctx);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ }
+ if (ctx->has_evfd)
+ io_eventfd_signal(ctx);
+}
+
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
{
struct io_rings *r = ctx->rings;
@@ -1726,23 +1848,34 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
return &rings->cqes[tail & mask];
}
-static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
+static void io_eventfd_signal(struct io_ring_ctx *ctx)
{
- if (likely(!ctx->cq_ev_fd))
- return false;
+ struct io_ev_fd *ev_fd;
+
+ rcu_read_lock();
+ /*
+ * rcu_dereference ctx->io_ev_fd once and use it for both for checking
+ * and eventfd_signal
+ */
+ ev_fd = rcu_dereference(ctx->io_ev_fd);
+
+ /*
+ * Check again if ev_fd exists incase an io_eventfd_unregister call
+ * completed between the NULL check of ctx->io_ev_fd at the start of
+ * the function and rcu_read_lock.
+ */
+ if (unlikely(!ev_fd))
+ goto out;
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- return false;
- return !ctx->eventfd_async || io_wq_current_is_worker();
+ goto out;
+
+ if (!ev_fd->eventfd_async || io_wq_current_is_worker())
+ eventfd_signal(ev_fd->cq_ev_fd, 1);
+out:
+ rcu_read_unlock();
}
-/*
- * This should only get called when at least one event has been posted.
- * Some applications rely on the eventfd notification count only changing
- * IFF a new CQE has been added to the CQ ring. There's no depedency on
- * 1:1 relationship between how many times this function is called (and
- * hence the eventfd count) and number of CQEs posted to the CQ ring.
- */
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+static inline void io_cqring_wake(struct io_ring_ctx *ctx)
{
/*
* wake_up_all() may seem excessive, but io_wake_function() and
@@ -1751,21 +1884,32 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
*/
if (wq_has_sleeper(&ctx->cq_wait))
wake_up_all(&ctx->cq_wait);
- if (io_should_trigger_evfd(ctx))
- eventfd_signal(ctx->cq_ev_fd, 1);
+}
+
+/*
+ * This should only get called when at least one event has been posted.
+ * Some applications rely on the eventfd notification count only changing
+ * IFF a new CQE has been added to the CQ ring. There's no depedency on
+ * 1:1 relationship between how many times this function is called (and
+ * hence the eventfd count) and number of CQEs posted to the CQ ring.
+ */
+static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+ if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+ ctx->has_evfd))
+ __io_commit_cqring_flush(ctx);
+
+ io_cqring_wake(ctx);
}
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
+ if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+ ctx->has_evfd))
+ __io_commit_cqring_flush(ctx);
- if (ctx->flags & IORING_SETUP_SQPOLL) {
- if (waitqueue_active(&ctx->cq_wait))
- wake_up_all(&ctx->cq_wait);
- }
- if (io_should_trigger_evfd(ctx))
- eventfd_signal(ctx->cq_ev_fd, 1);
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_cqring_wake(ctx);
}
/* Returns true if there are no backlogged entries after the flush */
@@ -1905,8 +2049,6 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
{
struct io_uring_cqe *cqe;
- trace_io_uring_complete(ctx, user_data, res, cflags);
-
/*
* If we can't get a cq entry, userspace overflowed the
* submission (by quite a lot). Increment the overflow count in
@@ -1922,16 +2064,23 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
return io_cqring_event_overflow(ctx, user_data, res, cflags);
}
+static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
+{
+ trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
+ return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+}
+
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+ __io_fill_cqe_req(req, res, cflags);
}
static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
s32 res, u32 cflags)
{
ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
return __io_fill_cqe(ctx, user_data, res, cflags);
}
@@ -1941,7 +2090,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
struct io_ring_ctx *ctx = req->ctx;
if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe(ctx, req->user_data, res, cflags);
+ __io_fill_cqe_req(req, res, cflags);
/*
* If we're the last reference to this request, add to our locked
* free_list cache.
@@ -2000,7 +2149,7 @@ static inline void io_req_complete(struct io_kiocb *req, s32 res)
static void io_req_complete_failed(struct io_kiocb *req, s32 res)
{
req_set_fail(req);
- io_req_complete_post(req, res, 0);
+ io_req_complete_post(req, res, io_put_kbuf(req, 0));
}
static void io_req_complete_fail_submit(struct io_kiocb *req)
@@ -2183,7 +2332,9 @@ static void io_fail_links(struct io_kiocb *req)
nxt = link->link;
link->link = NULL;
- trace_io_uring_fail_link(req, link);
+ trace_io_uring_fail_link(req->ctx, req, req->user_data,
+ req->opcode, link);
+
if (!ignore_cqes) {
link->flags &= ~REQ_F_CQE_SKIP;
io_fill_cqe_req(link, res, 0);
@@ -2302,7 +2453,8 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
if (likely(*uring_locked))
req->io_task_work.func(req, uring_locked);
else
- __io_req_complete_post(req, req->result, io_put_kbuf(req));
+ __io_req_complete_post(req, req->result,
+ io_put_kbuf_comp(req));
node = next;
} while (node);
@@ -2530,8 +2682,16 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
comp_list);
if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe(ctx, req->user_data, req->result,
- req->cflags);
+ __io_fill_cqe_req(req, req->result, req->cflags);
+ if ((req->flags & REQ_F_POLLED) && req->apoll) {
+ struct async_poll *apoll = req->apoll;
+
+ if (apoll->double_poll)
+ kfree(apoll->double_poll);
+ list_add(&apoll->poll.wait.entry,
+ &ctx->apoll_cache);
+ req->flags &= ~REQ_F_POLLED;
+ }
}
io_commit_cqring(ctx);
@@ -2653,7 +2813,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe(ctx, req->user_data, req->result, io_put_kbuf(req));
+ __io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
nr_events++;
}
@@ -2829,14 +2989,14 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
- unsigned int cflags = io_put_kbuf(req);
int res = req->result;
if (*locked) {
- io_req_complete_state(req, res, cflags);
+ io_req_complete_state(req, res, io_put_kbuf(req, 0));
io_req_add_compl_list(req);
} else {
- io_req_complete_post(req, res, cflags);
+ io_req_complete_post(req, res,
+ io_put_kbuf(req, IO_URING_F_UNLOCKED));
}
}
@@ -2845,7 +3005,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
{
if (__io_complete_rw_common(req, res))
return;
- __io_req_complete(req, issue_flags, req->result, io_put_kbuf(req));
+ __io_req_complete(req, issue_flags, req->result,
+ io_put_kbuf(req, issue_flags));
}
static void io_complete_rw(struct kiocb *kiocb, long res)
@@ -3000,14 +3161,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
kiocb->ki_pos = READ_ONCE(sqe->off);
- if (kiocb->ki_pos == -1) {
- if (!(file->f_mode & FMODE_STREAM)) {
- req->flags |= REQ_F_CUR_POS;
- kiocb->ki_pos = file->f_pos;
- } else {
- kiocb->ki_pos = 0;
- }
- }
kiocb->ki_flags = iocb_flags(file);
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
if (unlikely(ret))
@@ -3074,6 +3227,24 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
+static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
+{
+ struct kiocb *kiocb = &req->rw.kiocb;
+ bool is_stream = req->file->f_mode & FMODE_STREAM;
+
+ if (kiocb->ki_pos == -1) {
+ if (!is_stream) {
+ req->flags |= REQ_F_CUR_POS;
+ kiocb->ki_pos = req->file->f_pos;
+ return &kiocb->ki_pos;
+ } else {
+ kiocb->ki_pos = 0;
+ return NULL;
+ }
+ }
+ return is_stream ? NULL : &kiocb->ki_pos;
+}
+
static void kiocb_done(struct io_kiocb *req, ssize_t ret,
unsigned int issue_flags)
{
@@ -3096,14 +3267,10 @@ static void kiocb_done(struct io_kiocb *req, ssize_t ret,
if (req->flags & REQ_F_REISSUE) {
req->flags &= ~REQ_F_REISSUE;
- if (io_resubmit_prep(req)) {
+ if (io_resubmit_prep(req))
io_req_task_queue_reissue(req);
- } else {
- req_set_fail(req);
- req->result = ret;
- req->io_task_work.func = io_req_task_complete;
- io_req_task_work_add(req, false);
- }
+ else
+ io_req_task_queue_fail(req, ret);
}
}
@@ -3201,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
mutex_lock(&ctx->uring_lock);
}
+static void io_buffer_add_list(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl, unsigned int bgid)
+{
+ struct list_head *list;
+
+ list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+ INIT_LIST_HEAD(&bl->buf_list);
+ bl->bgid = bgid;
+ list_add(&bl->list, list);
+}
+
static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
int bgid, unsigned int issue_flags)
{
struct io_buffer *kbuf = req->kbuf;
- struct io_buffer *head;
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
if (req->flags & REQ_F_BUFFER_SELECTED)
return kbuf;
- io_ring_submit_lock(req->ctx, needs_lock);
+ io_ring_submit_lock(ctx, needs_lock);
- lockdep_assert_held(&req->ctx->uring_lock);
+ lockdep_assert_held(&ctx->uring_lock);
- head = xa_load(&req->ctx->io_buffers, bgid);
- if (head) {
- if (!list_empty(&head->list)) {
- kbuf = list_last_entry(&head->list, struct io_buffer,
- list);
- list_del(&kbuf->list);
- } else {
- kbuf = head;
- xa_erase(&req->ctx->io_buffers, bgid);
- }
+ bl = io_buffer_get_list(ctx, bgid);
+ if (bl && !list_empty(&bl->buf_list)) {
+ kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
+ list_del(&kbuf->list);
if (*len > kbuf->len)
*len = kbuf->len;
req->flags |= REQ_F_BUFFER_SELECTED;
@@ -3400,6 +3573,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
struct kiocb *kiocb = &req->rw.kiocb;
struct file *file = req->file;
ssize_t ret = 0;
+ loff_t *ppos;
/*
* Don't support polled IO through this interface, and we can't
@@ -3412,6 +3586,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
!(kiocb->ki_filp->f_flags & O_NONBLOCK))
return -EAGAIN;
+ ppos = io_kiocb_ppos(kiocb);
+
while (iov_iter_count(iter)) {
struct iovec iovec;
ssize_t nr;
@@ -3425,10 +3601,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
if (rw == READ) {
nr = file->f_op->read(file, iovec.iov_base,
- iovec.iov_len, io_kiocb_ppos(kiocb));
+ iovec.iov_len, ppos);
} else {
nr = file->f_op->write(file, iovec.iov_base,
- iovec.iov_len, io_kiocb_ppos(kiocb));
+ iovec.iov_len, ppos);
}
if (nr < 0) {
@@ -3436,13 +3612,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
ret = nr;
break;
}
+ ret += nr;
if (!iov_iter_is_bvec(iter)) {
iov_iter_advance(iter, nr);
} else {
- req->rw.len -= nr;
req->rw.addr += nr;
+ req->rw.len -= nr;
+ if (!req->rw.len)
+ break;
}
- ret += nr;
if (nr != iovec.iov_len)
break;
}
@@ -3629,12 +3807,23 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
struct io_async_rw *rw;
ssize_t ret, ret2;
+ loff_t *ppos;
if (!req_has_async_data(req)) {
ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
if (unlikely(ret < 0))
return ret;
} else {
+ /*
+ * Safe and required to re-import if we're using provided
+ * buffers, as we dropped the selected one before retry.
+ */
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
+ if (unlikely(ret < 0))
+ return ret;
+ }
+
rw = req->async_data;
s = &rw->s;
/*
@@ -3659,7 +3848,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags &= ~IOCB_NOWAIT;
}
- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+ ppos = io_kiocb_update_pos(req);
+
+ ret = rw_verify_area(READ, req->file, ppos, req->result);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -3669,6 +3860,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
req->flags &= ~REQ_F_REISSUE;
+ /* if we can poll, just do that */
+ if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
+ return -EAGAIN;
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
goto done;
@@ -3758,6 +3952,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
struct kiocb *kiocb = &req->rw.kiocb;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
ssize_t ret, ret2;
+ loff_t *ppos;
if (!req_has_async_data(req)) {
ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags);
@@ -3788,7 +3983,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags &= ~IOCB_NOWAIT;
}
- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+ ppos = io_kiocb_update_pos(req);
+
+ ret = rw_verify_area(WRITE, req->file, ppos, req->result);
if (unlikely(ret))
goto out_free;
@@ -4235,6 +4432,45 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_msg_ring_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
+ sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+ return -EINVAL;
+
+ if (req->file->f_op != &io_uring_fops)
+ return -EBADFD;
+
+ req->msg.user_data = READ_ONCE(sqe->off);
+ req->msg.len = READ_ONCE(sqe->len);
+ return 0;
+}
+
+static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ring_ctx *target_ctx;
+ struct io_msg *msg = &req->msg;
+ int ret = -EOVERFLOW;
+ bool filled;
+
+ target_ctx = req->file->private_data;
+
+ spin_lock(&target_ctx->completion_lock);
+ filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len,
+ IORING_CQE_F_MSG);
+ io_commit_cqring(target_ctx);
+ spin_unlock(&target_ctx->completion_lock);
+
+ if (filled) {
+ io_cqring_ev_posted(target_ctx);
+ ret = 0;
+ }
+
+ __io_req_complete(req, issue_flags, ret, 0);
+ return 0;
+}
+
static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -4458,8 +4694,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
return 0;
}
-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
- int bgid, unsigned nbufs)
+static int __io_remove_buffers(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl, unsigned nbufs)
{
unsigned i = 0;
@@ -4468,19 +4704,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
return 0;
/* the head kbuf is the list itself */
- while (!list_empty(&buf->list)) {
+ while (!list_empty(&bl->buf_list)) {
struct io_buffer *nxt;
- nxt = list_first_entry(&buf->list, struct io_buffer, list);
+ nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
list_del(&nxt->list);
- kfree(nxt);
if (++i == nbufs)
return i;
cond_resched();
}
i++;
- kfree(buf);
- xa_erase(&ctx->io_buffers, bgid);
return i;
}
@@ -4489,7 +4722,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_provide_buf *p = &req->pbuf;
struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer *head;
+ struct io_buffer_list *bl;
int ret = 0;
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
@@ -4498,9 +4731,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
ret = -ENOENT;
- head = xa_load(&ctx->io_buffers, p->bgid);
- if (head)
- ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+ bl = io_buffer_get_list(ctx, p->bgid);
+ if (bl)
+ ret = __io_remove_buffers(ctx, bl, p->nbufs);
if (ret < 0)
req_set_fail(req);
@@ -4545,38 +4778,80 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
return 0;
}
-static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+{
+ struct io_buffer *buf;
+ struct page *page;
+ int bufs_in_page;
+
+ /*
+ * Completions that don't happen inline (eg not under uring_lock) will
+ * add to ->io_buffers_comp. If we don't have any free buffers, check
+ * the completion list and splice those entries first.
+ */
+ if (!list_empty_careful(&ctx->io_buffers_comp)) {
+ spin_lock(&ctx->completion_lock);
+ if (!list_empty(&ctx->io_buffers_comp)) {
+ list_splice_init(&ctx->io_buffers_comp,
+ &ctx->io_buffers_cache);
+ spin_unlock(&ctx->completion_lock);
+ return 0;
+ }
+ spin_unlock(&ctx->completion_lock);
+ }
+
+ /*
+ * No free buffers and no completion entries either. Allocate a new
+ * page worth of buffer entries and add those to our freelist.
+ */
+ page = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!page)
+ return -ENOMEM;
+
+ list_add(&page->lru, &ctx->io_buffers_pages);
+
+ buf = page_address(page);
+ bufs_in_page = PAGE_SIZE / sizeof(*buf);
+ while (bufs_in_page) {
+ list_add_tail(&buf->list, &ctx->io_buffers_cache);
+ buf++;
+ bufs_in_page--;
+ }
+
+ return 0;
+}
+
+static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
+ struct io_buffer_list *bl)
{
struct io_buffer *buf;
u64 addr = pbuf->addr;
int i, bid = pbuf->bid;
for (i = 0; i < pbuf->nbufs; i++) {
- buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
- if (!buf)
+ if (list_empty(&ctx->io_buffers_cache) &&
+ io_refill_buffer_cache(ctx))
break;
-
+ buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
+ list);
+ list_move_tail(&buf->list, &bl->buf_list);
buf->addr = addr;
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
buf->bid = bid;
+ buf->bgid = pbuf->bgid;
addr += pbuf->len;
bid++;
- if (!*head) {
- INIT_LIST_HEAD(&buf->list);
- *head = buf;
- } else {
- list_add_tail(&buf->list, &(*head)->list);
- }
+ cond_resched();
}
- return i ? i : -ENOMEM;
+ return i ? 0 : -ENOMEM;
}
static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_provide_buf *p = &req->pbuf;
struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer *head, *list;
+ struct io_buffer_list *bl;
int ret = 0;
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
@@ -4584,14 +4859,18 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
- list = head = xa_load(&ctx->io_buffers, p->bgid);
-
- ret = io_add_buffers(p, &head);
- if (ret >= 0 && !list) {
- ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
- if (ret < 0)
- __io_remove_buffers(ctx, head, p->bgid, -1U);
+ bl = io_buffer_get_list(ctx, p->bgid);
+ if (unlikely(!bl)) {
+ bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ io_buffer_add_list(ctx, bl, p->bgid);
}
+
+ ret = io_add_buffers(ctx, p, bl);
+err:
if (ret < 0)
req_set_fail(req);
/* complete before unlock, IOPOLL may need the lock */
@@ -4721,6 +5000,8 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
+ const char __user *path;
+
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
@@ -4730,10 +5011,22 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->statx.dfd = READ_ONCE(sqe->fd);
req->statx.mask = READ_ONCE(sqe->len);
- req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ path = u64_to_user_ptr(READ_ONCE(sqe->addr));
req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
req->statx.flags = READ_ONCE(sqe->statx_flags);
+ req->statx.filename = getname_flags(path,
+ getname_statx_lookup_flags(req->statx.flags),
+ NULL);
+
+ if (IS_ERR(req->statx.filename)) {
+ int ret = PTR_ERR(req->statx.filename);
+
+ req->statx.filename = NULL;
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -5183,7 +5476,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req));
+ __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
return 0;
}
@@ -5228,7 +5521,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
min_ret = iov_iter_count(&msg.msg_iter);
ret = sock_recvmsg(sock, &msg, flags);
-out_free:
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
@@ -5236,10 +5528,11 @@ out_free:
ret = -EINTR;
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+out_free:
req_set_fail(req);
}
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req));
+ __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
return 0;
}
@@ -5258,8 +5551,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->nofile = rlimit(RLIMIT_NOFILE);
accept->file_slot = READ_ONCE(sqe->file_index);
- if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
- (accept->flags & SOCK_CLOEXEC)))
+ if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
return -EINVAL;
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
@@ -5399,6 +5691,108 @@ IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
+#ifdef CONFIG_NET_RX_BUSY_POLL
+
+#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
+
+struct napi_entry {
+ struct list_head list;
+ unsigned int napi_id;
+ unsigned long timeout;
+};
+
+/*
+ * Add busy poll NAPI ID from sk.
+ */
+static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+ unsigned int napi_id;
+ struct socket *sock;
+ struct sock *sk;
+ struct napi_entry *ne;
+
+ if (!net_busy_loop_on())
+ return;
+
+ sock = sock_from_file(file);
+ if (!sock)
+ return;
+
+ sk = sock->sk;
+ if (!sk)
+ return;
+
+ napi_id = READ_ONCE(sk->sk_napi_id);
+
+ /* Non-NAPI IDs can be rejected */
+ if (napi_id < MIN_NAPI_ID)
+ return;
+
+ spin_lock(&ctx->napi_lock);
+ list_for_each_entry(ne, &ctx->napi_list, list) {
+ if (ne->napi_id == napi_id) {
+ ne->timeout = jiffies + NAPI_TIMEOUT;
+ goto out;
+ }
+ }
+
+ ne = kmalloc(sizeof(*ne), GFP_NOWAIT);
+ if (!ne)
+ goto out;
+
+ ne->napi_id = napi_id;
+ ne->timeout = jiffies + NAPI_TIMEOUT;
+ list_add_tail(&ne->list, &ctx->napi_list);
+out:
+ spin_unlock(&ctx->napi_lock);
+}
+
+static inline void io_check_napi_entry_timeout(struct napi_entry *ne)
+{
+ if (time_after(jiffies, ne->timeout)) {
+ list_del(&ne->list);
+ kfree(ne);
+ }
+}
+
+/*
+ * Busy poll if globally on and supporting sockets found
+ */
+static bool io_napi_busy_loop(struct list_head *napi_list)
+{
+ struct napi_entry *ne, *n;
+
+ list_for_each_entry_safe(ne, n, napi_list, list) {
+ napi_busy_loop(ne->napi_id, NULL, NULL, true,
+ BUSY_POLL_BUDGET);
+ io_check_napi_entry_timeout(ne);
+ }
+ return !list_empty(napi_list);
+}
+
+static void io_free_napi_list(struct io_ring_ctx *ctx)
+{
+ spin_lock(&ctx->napi_lock);
+ while (!list_empty(&ctx->napi_list)) {
+ struct napi_entry *ne =
+ list_first_entry(&ctx->napi_list, struct napi_entry,
+ list);
+
+ list_del(&ne->list);
+ kfree(ne);
+ }
+ spin_unlock(&ctx->napi_lock);
+}
+#else
+static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+}
+
+static inline void io_free_napi_list(struct io_ring_ctx *ctx)
+{
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -5474,8 +5868,12 @@ static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
static void io_poll_remove_entries(struct io_kiocb *req)
{
- struct io_poll_iocb *poll = io_poll_get_single(req);
- struct io_poll_iocb *poll_double = io_poll_get_double(req);
+ /*
+ * Nothing to do if neither of those flags are set. Avoid dipping
+ * into the poll/apoll/double cachelines if we can.
+ */
+ if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
+ return;
/*
* While we hold the waitqueue lock and the waitqueue is nonempty,
@@ -5493,9 +5891,10 @@ static void io_poll_remove_entries(struct io_kiocb *req)
* In that case, only RCU prevents the queue memory from being freed.
*/
rcu_read_lock();
- io_poll_remove_entry(poll);
- if (poll_double)
- io_poll_remove_entry(poll_double);
+ if (req->flags & REQ_F_SINGLE_POLL)
+ io_poll_remove_entry(io_poll_get_single(req));
+ if (req->flags & REQ_F_DOUBLE_POLL)
+ io_poll_remove_entry(io_poll_get_double(req));
rcu_read_unlock();
}
@@ -5527,13 +5926,13 @@ static int io_poll_check_events(struct io_kiocb *req)
return -ECANCELED;
if (!req->result) {
- struct poll_table_struct pt = { ._key = poll->events };
+ struct poll_table_struct pt = { ._key = req->cflags };
- req->result = vfs_poll(req->file, &pt) & poll->events;
+ req->result = vfs_poll(req->file, &pt) & req->cflags;
}
/* multishot, just fill an CQE and proceed */
- if (req->result && !(poll->events & EPOLLONESHOT)) {
+ if (req->result && !(req->cflags & EPOLLONESHOT)) {
__poll_t mask = mangle_poll(req->result & poll->events);
bool filled;
@@ -5545,6 +5944,7 @@ static int io_poll_check_events(struct io_kiocb *req)
if (unlikely(!filled))
return -ECANCELED;
io_cqring_ev_posted(ctx);
+ io_add_napi(req->file, ctx);
} else if (req->result) {
return 0;
}
@@ -5603,29 +6003,36 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
io_req_complete_failed(req, ret);
}
-static void __io_poll_execute(struct io_kiocb *req, int mask)
+static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
{
req->result = mask;
+ /*
+ * This is useful for poll that is armed on behalf of another
+ * request, and where the wakeup path could be on a different
+ * CPU. We want to avoid pulling in req->apoll->events for that
+ * case.
+ */
+ req->cflags = events;
if (req->opcode == IORING_OP_POLL_ADD)
req->io_task_work.func = io_poll_task_func;
else
req->io_task_work.func = io_apoll_task_func;
- trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
+ trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
io_req_task_work_add(req, false);
}
-static inline void io_poll_execute(struct io_kiocb *req, int res)
+static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
{
if (io_poll_get_ownership(req))
- __io_poll_execute(req, res);
+ __io_poll_execute(req, res, events);
}
static void io_poll_cancel_req(struct io_kiocb *req)
{
io_poll_mark_cancelled(req);
/* kick tw, which should complete the request */
- io_poll_execute(req, 0);
+ io_poll_execute(req, 0, 0);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -5639,7 +6046,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (unlikely(mask & POLLFREE)) {
io_poll_mark_cancelled(req);
/* we have to kick tw in case it's not already */
- io_poll_execute(req, 0);
+ io_poll_execute(req, 0, poll->events);
/*
* If the waitqueue is being freed early but someone is already
@@ -5669,8 +6076,9 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && poll->events & EPOLLONESHOT) {
list_del_init(&poll->wait.entry);
poll->head = NULL;
+ req->flags &= ~REQ_F_SINGLE_POLL;
}
- __io_poll_execute(req, mask);
+ __io_poll_execute(req, mask, poll->events);
}
return 1;
}
@@ -5705,12 +6113,14 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
pt->error = -ENOMEM;
return;
}
+ req->flags |= REQ_F_DOUBLE_POLL;
io_init_poll_iocb(poll, first->events, first->wait.func);
*poll_ptr = poll;
if (req->opcode == IORING_OP_POLL_ADD)
req->flags |= REQ_F_ASYNC_DATA;
}
+ req->flags |= REQ_F_SINGLE_POLL;
pt->nr_entries++;
poll->head = head;
poll->wait.private = req;
@@ -5774,9 +6184,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
/* can't multishot if failed, just queue the event we've got */
if (unlikely(ipt->error || !ipt->nr_entries))
poll->events |= EPOLLONESHOT;
- __io_poll_execute(req, mask);
+ __io_poll_execute(req, mask, poll->events);
return 0;
}
+ io_add_napi(req->file, req->ctx);
/*
* Release ownership. If someone tried to queue a tw while it was
@@ -5784,7 +6195,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
*/
v = atomic_dec_return(&req->poll_refs);
if (unlikely(v & IO_POLL_REF_MASK))
- __io_poll_execute(req, 0);
+ __io_poll_execute(req, 0, poll->events);
return 0;
}
@@ -5803,7 +6214,7 @@ enum {
IO_APOLL_READY
};
-static int io_arm_poll_handler(struct io_kiocb *req)
+static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
struct io_ring_ctx *ctx = req->ctx;
@@ -5828,9 +6239,16 @@ static int io_arm_poll_handler(struct io_kiocb *req)
mask |= POLLOUT | POLLWRNORM;
}
- apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
- if (unlikely(!apoll))
- return IO_APOLL_ABORTED;
+ if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+ !list_empty(&ctx->apoll_cache)) {
+ apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+ poll.wait.entry);
+ list_del_init(&apoll->poll.wait.entry);
+ } else {
+ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+ if (unlikely(!apoll))
+ return IO_APOLL_ABORTED;
+ }
apoll->double_poll = NULL;
req->apoll = apoll;
req->flags |= REQ_F_POLLED;
@@ -5840,7 +6258,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
if (ret || ipt.error)
return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
- trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
+ trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
mask, apoll->poll.events);
return IO_APOLL_OK;
}
@@ -5975,7 +6393,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EINVAL;
io_req_set_refcount(req);
- poll->events = io_poll_parse_events(sqe, flags);
+ req->cflags = poll->events = io_poll_parse_events(sqe, flags);
return 0;
}
@@ -6092,10 +6510,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
if (IS_ERR(req))
return PTR_ERR(req);
-
- req_set_fail(req);
- io_fill_cqe_req(req, -ECANCELED, 0);
- io_put_req_deferred(req);
+ io_req_task_queue_fail(req, -ECANCELED);
return 0;
}
@@ -6568,6 +6983,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_symlinkat_prep(req, sqe);
case IORING_OP_LINKAT:
return io_linkat_prep(req, sqe);
+ case IORING_OP_MSG_RING:
+ return io_msg_ring_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6649,7 +7066,7 @@ fail:
goto queue;
}
- trace_io_uring_defer(ctx, req, req->user_data);
+ trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
@@ -6659,7 +7076,7 @@ fail:
static void io_clean_op(struct io_kiocb *req)
{
if (req->flags & REQ_F_BUFFER_SELECTED)
- io_put_kbuf(req);
+ io_put_kbuf_comp(req);
if (req->flags & REQ_F_NEED_CLEANUP) {
switch (req->opcode) {
@@ -6709,6 +7126,10 @@ static void io_clean_op(struct io_kiocb *req)
putname(req->hardlink.oldpath);
putname(req->hardlink.newpath);
break;
+ case IORING_OP_STATX:
+ if (req->statx.filename)
+ putname(req->statx.filename);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -6851,6 +7272,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_LINKAT:
ret = io_linkat(req, issue_flags);
break;
+ case IORING_OP_MSG_RING:
+ ret = io_msg_ring(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
@@ -6926,7 +7350,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
continue;
}
- if (io_arm_poll_handler(req) == IO_APOLL_OK)
+ if (io_arm_poll_handler(req, issue_flags) == IO_APOLL_OK)
return;
/* aborted or ready, in either case retry blocking */
needs_poll = false;
@@ -6983,7 +7407,7 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
{
struct file *file = fget(fd);
- trace_io_uring_file_get(ctx, fd);
+ trace_io_uring_file_get(ctx, req, req->user_data, fd);
/* we don't allow fixed io_uring files */
if (file && unlikely(file->f_op == &io_uring_fops))
@@ -7072,7 +7496,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
{
struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
- switch (io_arm_poll_handler(req)) {
+ switch (io_arm_poll_handler(req, 0)) {
case IO_APOLL_READY:
io_req_task_queue(req);
break;
@@ -7081,8 +7505,12 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req);
io_queue_async_work(req, NULL);
break;
+ case IO_APOLL_OK:
+ io_kbuf_recycle(req);
+ break;
}
if (linked_timeout)
@@ -7281,7 +7709,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
- trace_io_uring_req_failed(sqe, ret);
+ trace_io_uring_req_failed(sqe, ctx, req, ret);
/* fail even hard links since we don't submit */
if (link->head) {
@@ -7308,7 +7736,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
/* don't need @sqe from now on */
- trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
+ trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
req->flags, true,
ctx->flags & IORING_SETUP_SQPOLL);
@@ -7451,8 +7879,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
}
/* will complete beyond this point, count as submitted */
submitted++;
- if (io_submit_sqe(ctx, req, sqe))
- break;
+ if (io_submit_sqe(ctx, req, sqe)) {
+ /*
+ * Continue submitting even for sqe failure if the
+ * ring was setup with IORING_SETUP_SUBMIT_ALL
+ */
+ if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
+ break;
+ }
} while (submitted < nr);
if (unlikely(submitted != nr)) {
@@ -7519,7 +7953,13 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
!(ctx->flags & IORING_SETUP_R_DISABLED))
ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
-
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ spin_lock(&ctx->napi_lock);
+ if (!list_empty(&ctx->napi_list) &&
+ io_napi_busy_loop(&ctx->napi_list))
+ ++ret;
+ spin_unlock(&ctx->napi_lock);
+#endif
if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
wake_up(&ctx->sqo_sq_wait);
if (creds)
@@ -7650,6 +8090,9 @@ struct io_wait_queue {
struct io_ring_ctx *ctx;
unsigned cq_tail;
unsigned nr_timeouts;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned busy_poll_to;
+#endif
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
@@ -7684,17 +8127,17 @@ static int io_run_task_work_sig(void)
{
if (io_run_task_work())
return 1;
- if (!signal_pending(current))
- return 0;
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
return -ERESTARTSYS;
- return -EINTR;
+ if (task_sigpending(current))
+ return -EINTR;
+ return 0;
}
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq,
- signed long *timeout)
+ ktime_t timeout)
{
int ret;
@@ -7706,10 +8149,92 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
if (test_bit(0, &ctx->check_cq_overflow))
return 1;
- *timeout = schedule_timeout(*timeout);
- return !*timeout ? -ETIME : 1;
+ if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
+ return -ETIME;
+ return 1;
+}
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static void io_adjust_busy_loop_timeout(struct timespec64 *ts,
+ struct io_wait_queue *iowq)
+{
+ unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+ struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to);
+
+ if (timespec64_compare(ts, &pollto) > 0) {
+ *ts = timespec64_sub(*ts, pollto);
+ iowq->busy_poll_to = busy_poll_to;
+ } else {
+ u64 to = timespec64_to_ns(ts);
+
+ do_div(to, 1000);
+ iowq->busy_poll_to = to;
+ ts->tv_sec = 0;
+ ts->tv_nsec = 0;
+ }
}
+static inline bool io_busy_loop_timeout(unsigned long start_time,
+ unsigned long bp_usec)
+{
+ if (bp_usec) {
+ unsigned long end_time = start_time + bp_usec;
+ unsigned long now = busy_loop_current_time();
+
+ return time_after(now, end_time);
+ }
+ return true;
+}
+
+static bool io_busy_loop_end(void *p, unsigned long start_time)
+{
+ struct io_wait_queue *iowq = p;
+
+ return signal_pending(current) ||
+ io_should_wake(iowq) ||
+ io_busy_loop_timeout(start_time, iowq->busy_poll_to);
+}
+
+static void io_blocking_napi_busy_loop(struct list_head *napi_list,
+ struct io_wait_queue *iowq)
+{
+ unsigned long start_time =
+ list_is_singular(napi_list) ? 0 :
+ busy_loop_current_time();
+
+ do {
+ if (list_is_singular(napi_list)) {
+ struct napi_entry *ne =
+ list_first_entry(napi_list,
+ struct napi_entry, list);
+
+ napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq,
+ true, BUSY_POLL_BUDGET);
+ io_check_napi_entry_timeout(ne);
+ break;
+ }
+ } while (io_napi_busy_loop(napi_list) &&
+ !io_busy_loop_end(iowq, start_time));
+}
+
+static void io_putback_napi_list(struct io_ring_ctx *ctx,
+ struct list_head *napi_list)
+{
+ struct napi_entry *cne, *lne;
+
+ spin_lock(&ctx->napi_lock);
+ list_for_each_entry(cne, &ctx->napi_list, list)
+ list_for_each_entry(lne, napi_list, list)
+ if (cne->napi_id == lne->napi_id) {
+ list_del(&lne->list);
+ kfree(lne);
+ break;
+ }
+ list_splice(napi_list, &ctx->napi_list);
+ spin_unlock(&ctx->napi_lock);
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
/*
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
@@ -7720,8 +8245,11 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
{
struct io_wait_queue iowq;
struct io_rings *rings = ctx->rings;
- signed long timeout = MAX_SCHEDULE_TIMEOUT;
+ ktime_t timeout = KTIME_MAX;
int ret;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ LIST_HEAD(local_napi_list);
+#endif
do {
io_cqring_overflow_flush(ctx);
@@ -7731,14 +8259,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
break;
} while (1);
- if (uts) {
- struct timespec64 ts;
-
- if (get_timespec64(&ts, uts))
- return -EFAULT;
- timeout = timespec64_to_jiffies(&ts);
- }
-
if (sig) {
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
@@ -7752,6 +8272,30 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ iowq.busy_poll_to = 0;
+ if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
+ spin_lock(&ctx->napi_lock);
+ list_splice_init(&ctx->napi_list, &local_napi_list);
+ spin_unlock(&ctx->napi_lock);
+ }
+#endif
+ if (uts) {
+ struct timespec64 ts;
+
+ if (get_timespec64(&ts, uts))
+ return -EFAULT;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (!list_empty(&local_napi_list))
+ io_adjust_busy_loop_timeout(&ts, &iowq);
+#endif
+ timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
+ }
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ else if (!list_empty(&local_napi_list))
+ iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+#endif
+
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
INIT_LIST_HEAD(&iowq.wq.entry);
@@ -7760,6 +8304,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
trace_io_uring_cqring_wait(ctx, min_events);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (iowq.busy_poll_to)
+ io_blocking_napi_busy_loop(&local_napi_list, &iowq);
+ if (!list_empty(&local_napi_list))
+ io_putback_napi_list(ctx, &local_napi_list);
+#endif
do {
/* if we can't even flush overflow, don't wait for more */
if (!io_cqring_overflow_flush(ctx)) {
@@ -7768,7 +8318,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
}
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
TASK_INTERRUPTIBLE);
- ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+ ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
finish_wait(&ctx->cq_wait, &iowq.wq);
cond_resched();
} while (ret > 0);
@@ -7822,10 +8372,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
unsigned long flags;
bool first_add = false;
+ unsigned long delay = HZ;
spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
node->done = true;
+ /* if we are mid-quiesce then do not delay */
+ if (node->rsrc_data->quiesce)
+ delay = 0;
+
while (!list_empty(&ctx->rsrc_ref_list)) {
node = list_first_entry(&ctx->rsrc_ref_list,
struct io_rsrc_node, node);
@@ -7838,10 +8393,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
if (first_add)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
+static struct io_rsrc_node *io_rsrc_node_alloc(void)
{
struct io_rsrc_node *ref_node;
@@ -7892,7 +8447,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
- ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
+ ctx->rsrc_backup_node = io_rsrc_node_alloc();
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
@@ -7920,7 +8475,15 @@ static __cold int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
ret = wait_for_completion_interruptible(&data->done);
if (!ret) {
mutex_lock(&ctx->uring_lock);
- break;
+ if (atomic_read(&data->refs) > 0) {
+ /*
+ * it has been revived by another thread while
+ * we were unlocked
+ */
+ mutex_unlock(&ctx->uring_lock);
+ } else {
+ break;
+ }
}
atomic_inc(&data->refs);
@@ -8735,8 +9298,16 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
if (unlikely(!tctx))
return -ENOMEM;
+ tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX,
+ sizeof(struct file *), GFP_KERNEL);
+ if (unlikely(!tctx->registered_rings)) {
+ kfree(tctx);
+ return -ENOMEM;
+ }
+
ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
if (unlikely(ret)) {
+ kfree(tctx->registered_rings);
kfree(tctx);
return ret;
}
@@ -8745,6 +9316,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
if (IS_ERR(tctx->io_wq)) {
ret = PTR_ERR(tctx->io_wq);
percpu_counter_destroy(&tctx->inflight);
+ kfree(tctx->registered_rings);
kfree(tctx);
return ret;
}
@@ -8769,6 +9341,7 @@ void __io_uring_free(struct task_struct *tsk)
WARN_ON_ONCE(tctx->io_wq);
WARN_ON_ONCE(tctx->cached_refs);
+ kfree(tctx->registered_rings);
percpu_counter_destroy(&tctx->inflight);
kfree(tctx);
tsk->io_uring = NULL;
@@ -8928,10 +9501,9 @@ static void io_mem_free(void *ptr)
static void *io_mem_alloc(size_t size)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
- __GFP_NORETRY | __GFP_ACCOUNT;
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
- return (void *) __get_free_pages(gfp_flags, get_order(size));
+ return (void *) __get_free_pages(gfp, get_order(size));
}
static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
@@ -9346,33 +9918,55 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
return done ? done : err;
}
-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
+static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int eventfd_async)
{
+ struct io_ev_fd *ev_fd;
__s32 __user *fds = arg;
int fd;
- if (ctx->cq_ev_fd)
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd)
return -EBUSY;
if (copy_from_user(&fd, fds, sizeof(*fds)))
return -EFAULT;
- ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
- if (IS_ERR(ctx->cq_ev_fd)) {
- int ret = PTR_ERR(ctx->cq_ev_fd);
+ ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
+ if (!ev_fd)
+ return -ENOMEM;
- ctx->cq_ev_fd = NULL;
+ ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
+ if (IS_ERR(ev_fd->cq_ev_fd)) {
+ int ret = PTR_ERR(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
return ret;
}
-
+ ev_fd->eventfd_async = eventfd_async;
+ ctx->has_evfd = true;
+ rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
return 0;
}
+static void io_eventfd_put(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_ctx_put(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+}
+
static int io_eventfd_unregister(struct io_ring_ctx *ctx)
{
- if (ctx->cq_ev_fd) {
- eventfd_ctx_put(ctx->cq_ev_fd);
- ctx->cq_ev_fd = NULL;
+ struct io_ev_fd *ev_fd;
+
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd) {
+ ctx->has_evfd = false;
+ rcu_assign_pointer(ctx->io_ev_fd, NULL);
+ call_rcu(&ev_fd->rcu, io_eventfd_put);
return 0;
}
@@ -9381,11 +9975,28 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
static void io_destroy_buffers(struct io_ring_ctx *ctx)
{
- struct io_buffer *buf;
- unsigned long index;
+ int i;
+
+ for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
+ struct list_head *list = &ctx->io_buffers[i];
+
+ while (!list_empty(list)) {
+ struct io_buffer_list *bl;
+
+ bl = list_first_entry(list, struct io_buffer_list, list);
+ __io_remove_buffers(ctx, bl, -1U);
+ list_del(&bl->list);
+ kfree(bl);
+ }
+ }
- xa_for_each(&ctx->io_buffers, index, buf)
- __io_remove_buffers(ctx, buf, index, -1U);
+ while (!list_empty(&ctx->io_buffers_pages)) {
+ struct page *page;
+
+ page = list_first_entry(&ctx->io_buffers_pages, struct page, lru);
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
}
static void io_req_caches_free(struct io_ring_ctx *ctx)
@@ -9416,6 +10027,18 @@ static void io_wait_rsrc_data(struct io_rsrc_data *data)
wait_for_completion(&data->done);
}
+static void io_flush_apoll_cache(struct io_ring_ctx *ctx)
+{
+ struct async_poll *apoll;
+
+ while (!list_empty(&ctx->apoll_cache)) {
+ apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+ poll.wait.entry);
+ list_del(&apoll->poll.wait.entry);
+ kfree(apoll);
+ }
+}
+
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_sq_thread_finish(ctx);
@@ -9437,8 +10060,9 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
__io_sqe_files_unregister(ctx);
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true);
- mutex_unlock(&ctx->uring_lock);
io_eventfd_unregister(ctx);
+ io_flush_apoll_cache(ctx);
+ mutex_unlock(&ctx->uring_lock);
io_destroy_buffers(ctx);
if (ctx->sq_creds)
put_cred(ctx->sq_creds);
@@ -9470,8 +10094,10 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_req_caches_free(ctx);
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
+ io_free_napi_list(ctx);
kfree(ctx->cancel_hash);
kfree(ctx->dummy_ubuf);
+ kfree(ctx->io_buffers);
kfree(ctx);
}
@@ -9970,6 +10596,139 @@ void __io_uring_cancel(bool cancel_all)
io_uring_cancel_generic(cancel_all, NULL);
}
+void io_uring_unreg_ringfd(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ int i;
+
+ for (i = 0; i < IO_RINGFD_REG_MAX; i++) {
+ if (tctx->registered_rings[i]) {
+ fput(tctx->registered_rings[i]);
+ tctx->registered_rings[i] = NULL;
+ }
+ }
+}
+
+static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
+ int start, int end)
+{
+ struct file *file;
+ int offset;
+
+ for (offset = start; offset < end; offset++) {
+ offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
+ if (tctx->registered_rings[offset])
+ continue;
+
+ file = fget(fd);
+ if (!file) {
+ return -EBADF;
+ } else if (file->f_op != &io_uring_fops) {
+ fput(file);
+ return -EOPNOTSUPP;
+ }
+ tctx->registered_rings[offset] = file;
+ return offset;
+ }
+
+ return -EBUSY;
+}
+
+/*
+ * Register a ring fd to avoid fdget/fdput for each io_uring_enter()
+ * invocation. User passes in an array of struct io_uring_rsrc_update
+ * with ->data set to the ring_fd, and ->offset given for the desired
+ * index. If no index is desired, application may set ->offset == -1U
+ * and we'll find an available index. Returns number of entries
+ * successfully processed, or < 0 on error if none were processed.
+ */
+static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
+ unsigned nr_args)
+{
+ struct io_uring_rsrc_update __user *arg = __arg;
+ struct io_uring_rsrc_update reg;
+ struct io_uring_task *tctx;
+ int ret, i;
+
+ if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+ return -EINVAL;
+
+ mutex_unlock(&ctx->uring_lock);
+ ret = io_uring_add_tctx_node(ctx);
+ mutex_lock(&ctx->uring_lock);
+ if (ret)
+ return ret;
+
+ tctx = current->io_uring;
+ for (i = 0; i < nr_args; i++) {
+ int start, end;
+
+ if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (reg.offset == -1U) {
+ start = 0;
+ end = IO_RINGFD_REG_MAX;
+ } else {
+ if (reg.offset >= IO_RINGFD_REG_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+ start = reg.offset;
+ end = start + 1;
+ }
+
+ ret = io_ring_add_registered_fd(tctx, reg.data, start, end);
+ if (ret < 0)
+ break;
+
+ reg.offset = ret;
+ if (copy_to_user(&arg[i], &reg, sizeof(reg))) {
+ fput(tctx->registered_rings[reg.offset]);
+ tctx->registered_rings[reg.offset] = NULL;
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ return i ? i : ret;
+}
+
+static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
+ unsigned nr_args)
+{
+ struct io_uring_rsrc_update __user *arg = __arg;
+ struct io_uring_task *tctx = current->io_uring;
+ struct io_uring_rsrc_update reg;
+ int ret = 0, i;
+
+ if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+ return -EINVAL;
+ if (!tctx)
+ return 0;
+
+ for (i = 0; i < nr_args; i++) {
+ if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (reg.offset >= IO_RINGFD_REG_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+
+ reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX);
+ if (tctx->registered_rings[reg.offset]) {
+ fput(tctx->registered_rings[reg.offset]);
+ tctx->registered_rings[reg.offset] = NULL;
+ }
+ }
+
+ return i ? i : ret;
+}
+
static void *io_uring_validate_mmap_request(struct file *file,
loff_t pgoff, size_t sz)
{
@@ -10100,12 +10859,28 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_run_task_work();
if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
- IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)))
+ IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
+ IORING_ENTER_REGISTERED_RING)))
return -EINVAL;
- f = fdget(fd);
- if (unlikely(!f.file))
- return -EBADF;
+ /*
+ * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
+ * need only dereference our task private array to find it.
+ */
+ if (flags & IORING_ENTER_REGISTERED_RING) {
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx || fd >= IO_RINGFD_REG_MAX)
+ return -EINVAL;
+ fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
+ f.file = tctx->registered_rings[fd];
+ if (unlikely(!f.file))
+ return -EBADF;
+ } else {
+ f = fdget(fd);
+ if (unlikely(!f.file))
+ return -EBADF;
+ }
ret = -EOPNOTSUPP;
if (unlikely(f.file->f_op != &io_uring_fops))
@@ -10179,7 +10954,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
out:
percpu_ref_put(&ctx->refs);
out_fput:
- fdput(f);
+ if (!(flags & IORING_ENTER_REGISTERED_RING))
+ fdput(f);
return submitted ? submitted : ret;
}
@@ -10597,7 +11373,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
return -EINVAL;
return io_uring_create(entries, &p, params);
@@ -10947,61 +11723,6 @@ err:
return ret;
}
-static bool io_register_op_must_quiesce(int op)
-{
- switch (op) {
- case IORING_REGISTER_BUFFERS:
- case IORING_UNREGISTER_BUFFERS:
- case IORING_REGISTER_FILES:
- case IORING_UNREGISTER_FILES:
- case IORING_REGISTER_FILES_UPDATE:
- case IORING_REGISTER_PROBE:
- case IORING_REGISTER_PERSONALITY:
- case IORING_UNREGISTER_PERSONALITY:
- case IORING_REGISTER_FILES2:
- case IORING_REGISTER_FILES_UPDATE2:
- case IORING_REGISTER_BUFFERS2:
- case IORING_REGISTER_BUFFERS_UPDATE:
- case IORING_REGISTER_IOWQ_AFF:
- case IORING_UNREGISTER_IOWQ_AFF:
- case IORING_REGISTER_IOWQ_MAX_WORKERS:
- return false;
- default:
- return true;
- }
-}
-
-static __cold int io_ctx_quiesce(struct io_ring_ctx *ctx)
-{
- long ret;
-
- percpu_ref_kill(&ctx->refs);
-
- /*
- * Drop uring mutex before waiting for references to exit. If another
- * thread is currently inside io_uring_enter() it might need to grab the
- * uring_lock to make progress. If we hold it here across the drain
- * wait, then we can deadlock. It's safe to drop the mutex here, since
- * no new references will come in after we've killed the percpu ref.
- */
- mutex_unlock(&ctx->uring_lock);
- do {
- ret = wait_for_completion_interruptible_timeout(&ctx->ref_comp, HZ);
- if (ret) {
- ret = min(0L, ret);
- break;
- }
-
- ret = io_run_task_work_sig();
- io_req_caches_free(ctx);
- } while (ret >= 0);
- mutex_lock(&ctx->uring_lock);
-
- if (ret)
- io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
- return ret;
-}
-
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -11025,12 +11746,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return -EACCES;
}
- if (io_register_op_must_quiesce(opcode)) {
- ret = io_ctx_quiesce(ctx);
- if (ret)
- return ret;
- }
-
switch (opcode) {
case IORING_REGISTER_BUFFERS:
ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
@@ -11054,17 +11769,16 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_register_files_update(ctx, arg, nr_args);
break;
case IORING_REGISTER_EVENTFD:
- case IORING_REGISTER_EVENTFD_ASYNC:
ret = -EINVAL;
if (nr_args != 1)
break;
- ret = io_eventfd_register(ctx, arg);
- if (ret)
+ ret = io_eventfd_register(ctx, arg, 0);
+ break;
+ case IORING_REGISTER_EVENTFD_ASYNC:
+ ret = -EINVAL;
+ if (nr_args != 1)
break;
- if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
- ctx->eventfd_async = 1;
- else
- ctx->eventfd_async = 0;
+ ret = io_eventfd_register(ctx, arg, 1);
break;
case IORING_UNREGISTER_EVENTFD:
ret = -EINVAL;
@@ -11131,16 +11845,17 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_iowq_max_workers(ctx, arg);
break;
+ case IORING_REGISTER_RING_FDS:
+ ret = io_ringfd_register(ctx, arg, nr_args);
+ break;
+ case IORING_UNREGISTER_RING_FDS:
+ ret = io_ringfd_unregister(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;
}
- if (io_register_op_must_quiesce(opcode)) {
- /* bring the ctx back to life */
- percpu_ref_reinit(&ctx->refs);
- reinit_completion(&ctx->ref_comp);
- }
return ret;
}
@@ -11166,8 +11881,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock);
- trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
- ctx->cq_ev_fd != NULL, ret);
+ trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
out_fput:
fdput(f);
return ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1ed097e94af2..090bf47606ab 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
if (!src_file.file)
return -EBADF;
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
- goto fdput;
cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
olen, 0);
if (cloned < 0)
@@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
ret = -EINVAL;
else
ret = 0;
-fdput:
fdput(src_file);
return ret;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c938bbad075e..dffd2cac8113 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -21,6 +21,8 @@
#include "../internal.h"
+#define IOEND_BATCH_SIZE 4096
+
/*
* Structure allocated for each folio when block size < folio size
* to track sub-folio uptodate status and I/O completions.
@@ -290,19 +292,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
if (ctx->rac) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
- ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
+ ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs),
+ REQ_OP_READ, gfp);
/*
* If the bio_alloc fails, try it again for a single page to
* avoid having to deal with partial page reads. This emulates
* what do_mpage_readpage does.
*/
- if (!ctx->bio)
- ctx->bio = bio_alloc(orig_gfp, 1);
- ctx->bio->bi_opf = REQ_OP_READ;
+ if (!ctx->bio) {
+ ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ,
+ orig_gfp);
+ }
if (ctx->rac)
ctx->bio->bi_opf |= REQ_RAHEAD;
ctx->bio->bi_iter.bi_sector = sector;
- bio_set_dev(ctx->bio, iomap->bdev);
ctx->bio->bi_end_io = iomap_read_end_io;
bio_add_folio(ctx->bio, folio, plen, poff);
}
@@ -422,37 +425,33 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_readahead);
/*
- * iomap_is_partially_uptodate checks whether blocks within a page are
+ * iomap_is_partially_uptodate checks whether blocks within a folio are
* uptodate or not.
*
- * Returns true if all blocks which correspond to a file portion
- * we want to read within the page are uptodate.
+ * Returns true if all blocks which correspond to the specified part
+ * of the folio are uptodate.
*/
-int
-iomap_is_partially_uptodate(struct page *page, unsigned long from,
- unsigned long count)
+bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{
- struct folio *folio = page_folio(page);
struct iomap_page *iop = to_iomap_page(folio);
- struct inode *inode = page->mapping->host;
- unsigned len, first, last;
- unsigned i;
+ struct inode *inode = folio->mapping->host;
+ size_t len;
+ unsigned first, last, i;
+
+ if (!iop)
+ return false;
- /* Limit range to one page */
- len = min_t(unsigned, PAGE_SIZE - from, count);
+ /* Limit range to this folio */
+ len = min(folio_size(folio) - from, count);
/* First and last blocks in range within page */
first = from >> inode->i_blkbits;
last = (from + len - 1) >> inode->i_blkbits;
- if (iop) {
- for (i = first; i <= last; i++)
- if (!test_bit(i, iop->uptodate))
- return 0;
- return 1;
- }
-
- return 0;
+ for (i = first; i <= last; i++)
+ if (!test_bit(i, iop->uptodate))
+ return false;
+ return true;
}
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
@@ -478,7 +477,8 @@ EXPORT_SYMBOL_GPL(iomap_releasepage);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
{
- trace_iomap_invalidatepage(folio->mapping->host, offset, len);
+ trace_iomap_invalidate_folio(folio->mapping->host,
+ folio_pos(folio) + offset, len);
/*
* If we're invalidating the entire folio, clear the dirty state
@@ -497,13 +497,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
}
EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
-void iomap_invalidatepage(struct page *page, unsigned int offset,
- unsigned int len)
-{
- iomap_invalidate_folio(page_folio(page), offset, len);
-}
-EXPORT_SYMBOL_GPL(iomap_invalidatepage);
-
#ifdef CONFIG_MIGRATION
int
iomap_migrate_page(struct address_space *mapping, struct page *newpage,
@@ -548,10 +541,8 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
struct bio_vec bvec;
struct bio bio;
- bio_init(&bio, &bvec, 1);
- bio.bi_opf = REQ_OP_READ;
+ bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
- bio_set_dev(&bio, iomap->bdev);
bio_add_folio(&bio, folio, plen, poff);
return submit_bio_wait(&bio);
}
@@ -1039,7 +1030,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
* state, release holds on bios, and finally free up memory. Do not use the
* ioend after this.
*/
-static void
+static u32
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
{
struct inode *inode = ioend->io_inode;
@@ -1048,6 +1039,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
u64 start = bio->bi_iter.bi_sector;
loff_t offset = ioend->io_offset;
bool quiet = bio_flagged(bio, BIO_QUIET);
+ u32 folio_count = 0;
for (bio = &ioend->io_inline_bio; bio; bio = next) {
struct folio_iter fi;
@@ -1062,9 +1054,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
next = bio->bi_private;
/* walk all folios in bio, ending page IO on them */
- bio_for_each_folio_all(fi, bio)
+ bio_for_each_folio_all(fi, bio) {
iomap_finish_folio_write(inode, fi.folio, fi.length,
error);
+ folio_count++;
+ }
bio_put(bio);
}
/* The ioend has been freed by bio_put() */
@@ -1074,20 +1068,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
"%s: writeback error on inode %lu, offset %lld, sector %llu",
inode->i_sb->s_id, inode->i_ino, offset, start);
}
+ return folio_count;
}
+/*
+ * Ioend completion routine for merged bios. This can only be called from task
+ * contexts as merged ioends can be of unbound length. Hence we have to break up
+ * the writeback completions into manageable chunks to avoid long scheduler
+ * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
+ * good batch processing throughput without creating adverse scheduler latency
+ * conditions.
+ */
void
iomap_finish_ioends(struct iomap_ioend *ioend, int error)
{
struct list_head tmp;
+ u32 completions;
+
+ might_sleep();
list_replace_init(&ioend->io_list, &tmp);
- iomap_finish_ioend(ioend, error);
+ completions = iomap_finish_ioend(ioend, error);
while (!list_empty(&tmp)) {
+ if (completions > IOEND_BATCH_SIZE * 8) {
+ cond_resched();
+ completions = 0;
+ }
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
list_del_init(&ioend->io_list);
- iomap_finish_ioend(ioend, error);
+ completions += iomap_finish_ioend(ioend, error);
}
}
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
@@ -1108,6 +1118,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
return false;
if (ioend->io_offset + ioend->io_size != next->io_offset)
return false;
+ /*
+ * Do not merge physically discontiguous ioends. The filesystem
+ * completion functions will have to iterate the physical
+ * discontiguities even if we merge the ioends at a logical level, so
+ * we don't gain anything by merging physical discontiguities here.
+ *
+ * We cannot use bio->bi_iter.bi_sector here as it is modified during
+ * submission so does not point to the start sector of the bio at
+ * completion.
+ */
+ if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
+ return false;
return true;
}
@@ -1196,10 +1218,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
struct iomap_ioend *ioend;
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
- bio_set_dev(bio, wpc->iomap.bdev);
+ bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
+ REQ_OP_WRITE | wbc_to_write_flags(wbc),
+ GFP_NOFS, &iomap_ioend_bioset);
bio->bi_iter.bi_sector = sector;
- bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
bio->bi_write_hint = inode->i_write_hint;
wbc_init_bio(wbc, bio);
@@ -1209,8 +1231,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
ioend->io_flags = wpc->iomap.flags;
ioend->io_inode = inode;
ioend->io_size = 0;
+ ioend->io_folios = 0;
ioend->io_offset = offset;
ioend->io_bio = bio;
+ ioend->io_sector = sector;
return ioend;
}
@@ -1226,10 +1250,9 @@ iomap_chain_bio(struct bio *prev)
{
struct bio *new;
- new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
- bio_copy_dev(new, prev);/* also copies over blkcg information */
+ new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
+ bio_clone_blkg_association(new, prev);
new->bi_iter.bi_sector = bio_end_sector(prev);
- new->bi_opf = prev->bi_opf;
new->bi_write_hint = prev->bi_write_hint;
bio_chain(prev, new);
@@ -1251,6 +1274,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
return false;
if (sector != bio_end_sector(wpc->ioend->io_bio))
return false;
+ /*
+ * Limit ioend bio chain lengths to minimise IO completion latency. This
+ * also prevents long tight loops ending page writeback on all the
+ * folios in the ioend.
+ */
+ if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE)
+ return false;
return true;
}
@@ -1335,6 +1365,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
&submit_list);
count++;
}
+ if (count)
+ wpc->ioend->io_folios++;
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
WARN_ON_ONCE(!folio_test_locked(folio));
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 03ea367df19a..67cf9c16f80c 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
+#include <linux/fscrypt.h>
#include <linux/pagemap.h>
#include <linux/iomap.h>
#include <linux/backing-dev.h>
@@ -179,19 +180,20 @@ static void iomap_dio_bio_end_io(struct bio *bio)
static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
loff_t pos, unsigned len)
{
+ struct inode *inode = file_inode(dio->iocb->ki_filp);
struct page *page = ZERO_PAGE(0);
int flags = REQ_SYNC | REQ_IDLE;
struct bio *bio;
- bio = bio_alloc(GFP_KERNEL, 1);
- bio_set_dev(bio, iter->iomap.bdev);
+ bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL);
+ fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
+ GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
get_page(page);
__bio_add_page(bio, page, len, 0);
- bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
iomap_dio_submit_bio(iter, dio, bio, pos);
}
@@ -309,14 +311,14 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
goto out;
}
- bio = bio_alloc(GFP_KERNEL, nr_pages);
- bio_set_dev(bio, iomap->bdev);
+ bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
+ fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
+ GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- bio->bi_opf = bio_opf;
ret = bio_iov_iter_get_pages(bio, dio->submit.iter);
if (unlikely(ret)) {
diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
index 66cf267c68ae..610ca6f1ec9b 100644
--- a/fs/iomap/fiemap.c
+++ b/fs/iomap/fiemap.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/fiemap.h>
+#include <linux/pagemap.h>
static int iomap_to_fiemap(struct fiemap_extent_info *fi,
const struct iomap *iomap, u32 flags)
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 65e39785c284..a6689a563c6e 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -81,7 +81,7 @@ DEFINE_EVENT(iomap_range_class, name, \
TP_ARGS(inode, off, len))
DEFINE_RANGE_EVENT(iomap_writepage);
DEFINE_RANGE_EVENT(iomap_releasepage);
-DEFINE_RANGE_EVENT(iomap_invalidatepage);
+DEFINE_RANGE_EVENT(iomap_invalidate_folio);
DEFINE_RANGE_EVENT(iomap_dio_invalidate_fail);
#define IOMAP_TYPE_STRINGS \
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 0c6eacfcbeef..d7491692aea3 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -70,7 +70,7 @@ static struct kmem_cache *isofs_inode_cachep;
static struct inode *isofs_alloc_inode(struct super_block *sb)
{
struct iso_inode_info *ei;
- ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, isofs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3cc4ab2ba7f4..5b9408e3b370 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -484,22 +484,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.run.rs_locked);
- spin_lock(&commit_transaction->t_handle_lock);
- while (atomic_read(&commit_transaction->t_updates)) {
- DEFINE_WAIT(wait);
+ // waits for any t_updates to finish
+ jbd2_journal_wait_updates(journal);
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
- if (atomic_read(&commit_transaction->t_updates)) {
- spin_unlock(&commit_transaction->t_handle_lock);
- write_unlock(&journal->j_state_lock);
- schedule();
- write_lock(&journal->j_state_lock);
- spin_lock(&commit_transaction->t_handle_lock);
- }
- finish_wait(&journal->j_wait_updates, &wait);
- }
- spin_unlock(&commit_transaction->t_handle_lock);
commit_transaction->t_state = T_SWITCH;
write_unlock(&journal->j_state_lock);
@@ -817,7 +804,7 @@ start_journal_io:
commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock);
- /*
+ /*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
* the commit record
@@ -1170,7 +1157,7 @@ restart_loop:
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 1);
+ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f13d548e4a7f..fcacafa4510d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(jbd2_journal_start_commit);
EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
EXPORT_SYMBOL(jbd2_journal_wipe);
EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
-EXPORT_SYMBOL(jbd2_journal_invalidatepage);
+EXPORT_SYMBOL(jbd2_journal_invalidate_folio);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
- journal->j_fc_cleanup_callback(journal, 0);
+ journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback)
@@ -1287,6 +1287,8 @@ static int jbd2_min_tag_size(void)
/**
* jbd2_journal_shrink_scan()
+ * @shrink: shrinker to work on
+ * @sc: reclaim request to process
*
* Scan the checkpointed buffer on the checkpoint list and release the
* journal_head.
@@ -1312,6 +1314,8 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
/**
* jbd2_journal_shrink_count()
+ * @shrink: shrinker to work on
+ * @sc: reclaim request to process
*
* Count the number of checkpoint buffers on the checkpoint list.
*/
@@ -2972,6 +2976,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
jbd_unlock_bh_journal_head(bh);
return jh;
}
+EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
static void __journal_remove_journal_head(struct buffer_head *bh)
{
@@ -3024,6 +3029,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
jbd_unlock_bh_journal_head(bh);
}
}
+EXPORT_SYMBOL(jbd2_journal_put_journal_head);
/*
* Initialize jbd inode head
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6a3caedd2285..fcb9175016a5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_start_time = ktime_get();
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
- spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) +
@@ -139,26 +138,22 @@ static void jbd2_get_transaction(journal_t *journal,
/*
* Update transaction's maximum wait time, if debugging is enabled.
*
- * In order for t_max_wait to be reliable, it must be protected by a
- * lock. But doing so will mean that start_this_handle() can not be
- * run in parallel on SMP systems, which limits our scalability. So
- * unless debugging is enabled, we no longer update t_max_wait, which
- * means that maximum wait time reported by the jbd2_run_stats
- * tracepoint will always be zero.
+ * t_max_wait is carefully updated here with use of atomic compare exchange.
+ * Note that there could be multiplre threads trying to do this simultaneously
+ * hence using cmpxchg to avoid any use of locks in this case.
+ * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug.
*/
static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts)
{
-#ifdef CONFIG_JBD2_DEBUG
- if (jbd2_journal_enable_debug &&
- time_after(transaction->t_start, ts)) {
- ts = jbd2_time_diff(ts, transaction->t_start);
- spin_lock(&transaction->t_handle_lock);
- if (ts > transaction->t_max_wait)
- transaction->t_max_wait = ts;
- spin_unlock(&transaction->t_handle_lock);
+ unsigned long oldts, newts;
+
+ if (time_after(transaction->t_start, ts)) {
+ newts = jbd2_time_diff(ts, transaction->t_start);
+ oldts = READ_ONCE(transaction->t_max_wait);
+ while (oldts < newts)
+ oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
}
-#endif
}
/*
@@ -449,7 +444,7 @@ repeat:
}
/* OK, account for the buffers that this operation expects to
- * use and add the handle to the running transaction.
+ * use and add the handle to the running transaction.
*/
update_t_max_wait(transaction, ts);
handle->h_transaction = transaction;
@@ -690,7 +685,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
DIV_ROUND_UP(
handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
- spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits);
@@ -698,7 +692,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
jbd_debug(3, "denied handle %p %d blocks: "
"transaction too large\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits);
- goto unlock;
+ goto error_out;
}
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
@@ -714,8 +708,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-unlock:
- spin_unlock(&transaction->t_handle_lock);
error_out:
read_unlock(&journal->j_state_lock);
return result;
@@ -836,6 +828,43 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
}
EXPORT_SYMBOL(jbd2_journal_restart);
+/*
+ * Waits for any outstanding t_updates to finish.
+ * This is called with write j_state_lock held.
+ */
+void jbd2_journal_wait_updates(journal_t *journal)
+{
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ /*
+ * Note that the running transaction can get freed under us if
+ * this transaction is getting committed in
+ * jbd2_journal_commit_transaction() ->
+ * jbd2_journal_free_transaction(). This can only happen when we
+ * release j_state_lock -> schedule() -> acquire j_state_lock.
+ * Hence we should everytime retrieve new j_running_transaction
+ * value (after j_state_lock release acquire cycle), else it may
+ * lead to use-after-free of old freed transaction.
+ */
+ transaction_t *transaction = journal->j_running_transaction;
+
+ if (!transaction)
+ break;
+
+ prepare_to_wait(&journal->j_wait_updates, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!atomic_read(&transaction->t_updates)) {
+ finish_wait(&journal->j_wait_updates, &wait);
+ break;
+ }
+ write_unlock(&journal->j_state_lock);
+ schedule();
+ finish_wait(&journal->j_wait_updates, &wait);
+ write_lock(&journal->j_state_lock);
+ }
+}
+
/**
* jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
@@ -848,8 +877,6 @@ EXPORT_SYMBOL(jbd2_journal_restart);
*/
void jbd2_journal_lock_updates(journal_t *journal)
{
- DEFINE_WAIT(wait);
-
jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
@@ -863,27 +890,9 @@ void jbd2_journal_lock_updates(journal_t *journal)
write_lock(&journal->j_state_lock);
}
- /* Wait until there are no running updates */
- while (1) {
- transaction_t *transaction = journal->j_running_transaction;
-
- if (!transaction)
- break;
+ /* Wait until there are no running t_updates */
+ jbd2_journal_wait_updates(journal);
- spin_lock(&transaction->t_handle_lock);
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&transaction->t_updates)) {
- spin_unlock(&transaction->t_handle_lock);
- finish_wait(&journal->j_wait_updates, &wait);
- break;
- }
- spin_unlock(&transaction->t_handle_lock);
- write_unlock(&journal->j_state_lock);
- schedule();
- finish_wait(&journal->j_wait_updates, &wait);
- write_lock(&journal->j_state_lock);
- }
write_unlock(&journal->j_state_lock);
/*
@@ -2208,14 +2217,14 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
}
/*
- * jbd2_journal_invalidatepage
+ * jbd2_journal_invalidate_folio
*
* This code is tricky. It has a number of cases to deal with.
*
* There are two invariants which this code relies on:
*
- * i_size must be updated on disk before we start calling invalidatepage on the
- * data.
+ * i_size must be updated on disk before we start calling invalidate_folio
+ * on the data.
*
* This is done in ext3 by defining an ext3_setattr method which
* updates i_size before truncate gets going. By maintaining this
@@ -2417,9 +2426,9 @@ zap_buffer_unlocked:
}
/**
- * jbd2_journal_invalidatepage()
+ * jbd2_journal_invalidate_folio()
* @journal: journal to use for flush...
- * @page: page to flush
+ * @folio: folio to flush
* @offset: start of the range to invalidate
* @length: length of the range to invalidate
*
@@ -2428,30 +2437,29 @@ zap_buffer_unlocked:
* the page is straddling i_size. Caller then has to wait for current commit
* and try again.
*/
-int jbd2_journal_invalidatepage(journal_t *journal,
- struct page *page,
- unsigned int offset,
- unsigned int length)
+int jbd2_journal_invalidate_folio(journal_t *journal, struct folio *folio,
+ size_t offset, size_t length)
{
struct buffer_head *head, *bh, *next;
unsigned int stop = offset + length;
unsigned int curr_off = 0;
- int partial_page = (offset || length < PAGE_SIZE);
+ int partial_page = (offset || length < folio_size(folio));
int may_free = 1;
int ret = 0;
- if (!PageLocked(page))
+ if (!folio_test_locked(folio))
BUG();
- if (!page_has_buffers(page))
+ head = folio_buffers(folio);
+ if (!head)
return 0;
- BUG_ON(stop > PAGE_SIZE || stop < length);
+ BUG_ON(stop > folio_size(folio) || stop < length);
/* We will potentially be playing with lists other than just the
* data lists (especially for journaled data mode), so be
* cautious in our locking. */
- head = bh = page_buffers(page);
+ bh = head;
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
@@ -2474,8 +2482,8 @@ int jbd2_journal_invalidatepage(journal_t *journal,
} while (bh != head);
if (!partial_page) {
- if (may_free && try_to_free_buffers(page))
- J_ASSERT(!page_has_buffers(page));
+ if (may_free && try_to_free_buffers(&folio->page))
+ J_ASSERT(!folio_buffers(folio));
}
return 0;
}
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 81ca58c10b72..7ea37f49f1e1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -39,7 +39,7 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
{
struct jffs2_inode_info *f;
- f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
+ f = alloc_inode_sb(sb, jffs2_inode_cachep, GFP_KERNEL);
if (!f)
return NULL;
return &f->vfs_inode;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 57ab424c05ff..27be2e8ba237 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -357,7 +357,8 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
const struct address_space_operations jfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = jfs_readpage,
.readahead = jfs_readahead,
.writepage = jfs_writepage,
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 78fd136ac13b..997c81fcea34 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1980,17 +1980,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bp->l_flag |= lbmREAD;
- bio = bio_alloc(GFP_NOFS, 1);
-
+ bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio_set_dev(bio, log->bdev);
-
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- bio->bi_opf = REQ_OP_READ;
/*check if journaling to disk has been disabled*/
if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
@@ -2125,16 +2121,13 @@ static void lbmStartIO(struct lbuf * bp)
jfs_info("lbmStartIO");
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio_set_dev(bio, log->bdev);
-
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
/* check if journaling to disk has been disabled */
if (log->no_integrity) {
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 104ae698443e..c4220ccdedef 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -417,12 +417,10 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
}
len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
- bio = bio_alloc(GFP_NOFS, 1);
- bio_set_dev(bio, inode->i_sb->s_bdev);
+ bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOFS);
bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
bio->bi_private = page;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
/* Don't call bio_add_page yet, we may add to this vec */
bio_offset = offset;
@@ -497,13 +495,12 @@ static int metapage_readpage(struct file *fp, struct page *page)
if (bio)
submit_bio(bio);
- bio = bio_alloc(GFP_NOFS, 1);
- bio_set_dev(bio, inode->i_sb->s_bdev);
+ bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_READ,
+ GFP_NOFS);
bio->bi_iter.bi_sector =
pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
bio->bi_private = page;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
len = xlen << inode->i_blkbits;
offset = block_offset << inode->i_blkbits;
if (bio_add_page(bio, page, len, offset) < len)
@@ -555,22 +552,22 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
return ret;
}
-static void metapage_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void metapage_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- BUG_ON(offset || length < PAGE_SIZE);
+ BUG_ON(offset || length < folio_size(folio));
- BUG_ON(PageWriteback(page));
+ BUG_ON(folio_test_writeback(folio));
- metapage_releasepage(page, 0);
+ metapage_releasepage(&folio->page, 0);
}
const struct address_space_operations jfs_metapage_aops = {
.readpage = metapage_readpage,
.writepage = metapage_writepage,
.releasepage = metapage_releasepage,
- .invalidatepage = metapage_invalidatepage,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .invalidate_folio = metapage_invalidate_folio,
+ .dirty_folio = filemap_dirty_folio,
};
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 24cbc9946e01..f1a13a74cddf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
{
struct jfs_inode_info *jfs_inode;
- jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
+ jfs_inode = alloc_inode_sb(sb, jfs_inode_cachep, GFP_NOFS);
if (!jfs_inode)
return NULL;
#ifdef CONFIG_QUOTA
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index dc3d061edda9..911444d21267 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -29,6 +29,7 @@
#include "mgmt/user_config.h"
#include "crypto_ctx.h"
#include "transport_ipc.h"
+#include "../smbfs_common/arc4.h"
/*
* Fixed format data defining GSS header and fixed string
@@ -336,6 +337,29 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
nt_len - CIFS_ENCPWD_SIZE,
domain_name, conn->ntlmssp.cryptkey);
kfree(domain_name);
+
+ /* The recovered secondary session key */
+ if (conn->ntlmssp.client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) {
+ struct arc4_ctx *ctx_arc4;
+ unsigned int sess_key_off, sess_key_len;
+
+ sess_key_off = le32_to_cpu(authblob->SessionKey.BufferOffset);
+ sess_key_len = le16_to_cpu(authblob->SessionKey.Length);
+
+ if (blob_len < (u64)sess_key_off + sess_key_len)
+ return -EINVAL;
+
+ ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
+ if (!ctx_arc4)
+ return -ENOMEM;
+
+ cifs_arc4_setkey(ctx_arc4, sess->sess_key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ cifs_arc4_crypt(ctx_arc4, sess->sess_key,
+ (char *)authblob + sess_key_off, sess_key_len);
+ kfree_sensitive(ctx_arc4);
+ }
+
return ret;
}
@@ -408,6 +432,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
(cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ if (cflags & NTLMSSP_NEGOTIATE_KEY_XCH)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
chgblob->NegotiateFlags = cpu_to_le32(flags);
len = strlen(ksmbd_netbios_name());
name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 1866c81c5c99..67e8e28e3fc3 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -2688,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work)
(struct create_posix *)context;
if (le16_to_cpu(context->DataOffset) +
le32_to_cpu(context->DataLength) <
- sizeof(struct create_posix)) {
+ sizeof(struct create_posix) - 4) {
rc = -EINVAL;
goto err_out1;
}
@@ -3422,9 +3422,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
goto free_conv_name;
}
- struct_sz = readdir_info_level_struct_sz(info_level);
- next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
- KSMBD_DIR_INFO_ALIGNMENT);
+ struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
+ next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
+ d_info->last_entry_off_align = next_entry_offset - struct_sz;
if (next_entry_offset > d_info->out_buf_len) {
d_info->out_buf_len = 0;
@@ -3976,6 +3976,7 @@ int smb2_query_dir(struct ksmbd_work *work)
((struct file_directory_info *)
((char *)rsp->Buffer + d_info.last_entry_offset))
->NextEntryOffset = 0;
+ d_info.data_count -= d_info.last_entry_off_align;
rsp->StructureSize = cpu_to_le16(9);
rsp->OutputBufferOffset = cpu_to_le16(72);
@@ -6126,13 +6127,26 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
__le16 ChannelInfoOffset,
__le16 ChannelInfoLength)
{
+ unsigned int i, ch_count;
+
if (work->conn->dialect == SMB30_PROT_ID &&
Channel != SMB2_CHANNEL_RDMA_V1)
return -EINVAL;
- if (ChannelInfoOffset == 0 ||
- le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
+ ch_count = le16_to_cpu(ChannelInfoLength) / sizeof(*desc);
+ if (ksmbd_debug_types & KSMBD_DEBUG_RDMA) {
+ for (i = 0; i < ch_count; i++) {
+ pr_info("RDMA r/w request %#x: token %#x, length %#x\n",
+ i,
+ le32_to_cpu(desc[i].token),
+ le32_to_cpu(desc[i].length));
+ }
+ }
+ if (ch_count != 1) {
+ ksmbd_debug(RDMA, "RDMA multiple buffer descriptors %d are not supported yet\n",
+ ch_count);
return -EINVAL;
+ }
work->need_invalidate_rkey =
(Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
@@ -6185,9 +6199,15 @@ int smb2_read(struct ksmbd_work *work)
if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset);
+
+ if (ch_offset < offsetof(struct smb2_read_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
err = smb2_set_remote_key_for_rdma(work,
(struct smb2_buffer_desc_v1 *)
- &req->Buffer[0],
+ ((char *)req + ch_offset),
req->Channel,
req->ReadChannelInfoOffset,
req->ReadChannelInfoLength);
@@ -6428,11 +6448,16 @@ int smb2_write(struct ksmbd_work *work)
if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
- if (req->Length != 0 || req->DataOffset != 0)
- return -EINVAL;
+ unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset);
+
+ if (req->Length != 0 || req->DataOffset != 0 ||
+ ch_offset < offsetof(struct smb2_write_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
err = smb2_set_remote_key_for_rdma(work,
(struct smb2_buffer_desc_v1 *)
- &req->Buffer[0],
+ ((char *)req + ch_offset),
req->Channel,
req->WriteChannelInfoOffset,
req->WriteChannelInfoLength);
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index ef7f42b0290a..9a7e211dbf4f 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -308,14 +308,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
for (i = 0; i < 2; i++) {
struct kstat kstat;
struct ksmbd_kstat ksmbd_kstat;
+ struct dentry *dentry;
if (!dir->dot_dotdot[i]) { /* fill dot entry info */
if (i == 0) {
d_info->name = ".";
d_info->name_len = 1;
+ dentry = dir->filp->f_path.dentry;
} else {
d_info->name = "..";
d_info->name_len = 2;
+ dentry = dir->filp->f_path.dentry->d_parent;
}
if (!match_pattern(d_info->name, d_info->name_len,
@@ -327,7 +330,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
ksmbd_kstat.kstat = &kstat;
ksmbd_vfs_fill_dentry_attrs(work,
user_ns,
- dir->filp->f_path.dentry->d_parent,
+ dentry,
&ksmbd_kstat);
rc = fn(conn, info_level, d_info, &ksmbd_kstat);
if (rc)
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 3c1ec1ac0b27..ba5a22bc2e6d 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -80,7 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
static int smb_direct_max_receive_size = 8192;
-static int smb_direct_max_read_write_size = 1048512;
+static int smb_direct_max_read_write_size = 524224;
static int smb_direct_max_outstanding_rw_ops = 8;
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 19d36393974c..9cebb6ba555b 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -11,7 +11,6 @@
#include <linux/writeback.h>
#include <linux/xattr.h>
#include <linux/falloc.h>
-#include <linux/genhd.h>
#include <linux/fsnotify.h>
#include <linux/dcache.h>
#include <linux/slab.h>
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index adf94a4f22fa..8c37aaf936ab 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -47,6 +47,7 @@ struct ksmbd_dir_info {
int last_entry_offset;
bool hide_dot_file;
int flags;
+ int last_entry_off_align;
};
struct ksmbd_readdir_data {
diff --git a/fs/libfs.c b/fs/libfs.c
index ba7438ab9371..e64bdedef168 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -631,7 +631,7 @@ const struct address_space_operations ram_aops = {
.readpage = simple_readpage,
.write_begin = simple_write_begin,
.write_end = simple_write_end,
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
};
EXPORT_SYMBOL(ram_aops);
@@ -1198,17 +1198,6 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-void noop_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
-{
- /*
- * There is no page cache to invalidate in the dax case, however
- * we need this callback defined to prevent falling back to
- * block_invalidatepage() in do_invalidatepage().
- */
-}
-EXPORT_SYMBOL_GPL(noop_invalidatepage);
-
ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
/*
@@ -1231,7 +1220,7 @@ EXPORT_SYMBOL(kfree_link);
struct inode *alloc_anon_inode(struct super_block *s)
{
static const struct address_space_operations anon_aops = {
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
};
struct inode *inode = new_inode_pseudo(s);
@@ -1379,7 +1368,7 @@ bool is_empty_dir_inode(struct inode *inode)
(inode->i_op == &empty_dir_inode_operations);
}
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
/*
* Determine if the name of a dentry should be casefolded.
*
@@ -1473,7 +1462,7 @@ static const struct dentry_operations generic_encrypted_dentry_ops = {
};
#endif
-#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
.d_compare = generic_ci_d_compare,
@@ -1508,10 +1497,10 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
#ifdef CONFIG_FS_ENCRYPTION
bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
bool needs_ci_ops = dentry->d_sb->s_encoding;
#endif
-#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
if (needs_encrypt_ops && needs_ci_ops) {
d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
return;
@@ -1523,7 +1512,7 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
return;
}
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
if (needs_ci_ops) {
d_set_d_op(dentry, &generic_ci_dentry_ops);
return;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0475c5a5d061..59ef8a1f843f 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -184,8 +184,7 @@ lockd(void *vrqstp)
dprintk("lockd_down: service stopped\n");
svc_exit_thread(rqstp);
-
- module_put_and_kthread_exit(0);
+ return 0;
}
static int create_lockd_listener(struct svc_serv *serv, const char *name,
@@ -197,8 +196,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
xprt = svc_find_xprt(serv, name, net, family, 0);
if (xprt == NULL)
- return svc_create_xprt(serv, name, net, family, port,
- SVC_SOCK_DEFAULTS, cred);
+ return svc_xprt_create(serv, name, net, family, port,
+ SVC_SOCK_DEFAULTS, cred);
svc_xprt_put(xprt);
return 0;
}
@@ -248,7 +247,8 @@ out_err:
if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
- svc_shutdown_net(serv, net);
+ svc_xprt_destroy_all(serv, net);
+ svc_rpcb_cleanup(serv, net);
return err;
}
@@ -286,9 +286,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
nlm_shutdown_hosts_net(net);
cancel_delayed_work_sync(&ln->grace_period_end);
locks_end_grace(&ln->lockd_manager);
- svc_shutdown_net(serv, net);
- dprintk("%s: per-net data destroyed; net=%x\n",
- __func__, net->ns.inum);
+ svc_xprt_destroy_all(serv, net);
+ svc_rpcb_cleanup(serv, net);
}
} else {
pr_err("%s: no users! net=%x\n",
@@ -350,13 +349,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
};
#endif
-static const struct svc_serv_ops lockd_sv_ops = {
- .svo_shutdown = svc_rpcb_cleanup,
- .svo_function = lockd,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_module = THIS_MODULE,
-};
-
static int lockd_get(void)
{
struct svc_serv *serv;
@@ -380,7 +372,7 @@ static int lockd_get(void)
nlm_timeout = LOCKD_DFLT_TIMEO;
nlmsvc_timeout = nlm_timeout * HZ;
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
+ serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
return -ENOMEM;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index cb3a7512c33e..0a22a2faf552 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -179,19 +179,21 @@ nlm_delete_file(struct nlm_file *file)
static int nlm_unlock_files(struct nlm_file *file)
{
struct file_lock lock;
- struct file *f;
+ locks_init_lock(&lock);
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
- for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
- pr_warn("lockd: unlock failure in %s:%d\n",
- __FILE__, __LINE__);
- return 1;
- }
- }
+ if (file->f_file[O_RDONLY] &&
+ vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
+ goto out_err;
+ if (file->f_file[O_WRONLY] &&
+ vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL))
+ goto out_err;
return 0;
+out_err:
+ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
+ return 1;
}
/*
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index a71f1cf894b9..700228c7f38b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -63,7 +63,7 @@ static struct kmem_cache * minix_inode_cachep;
static struct inode *minix_alloc_inode(struct super_block *sb)
{
struct minix_inode_info *ei;
- ei = kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, minix_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
@@ -442,7 +442,8 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
}
static const struct address_space_operations minix_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = minix_readpage,
.writepage = minix_writepage,
.write_begin = minix_write_begin,
diff --git a/fs/mpage.c b/fs/mpage.c
index 87f5cfef6caa..9ed1e58e8d70 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -57,38 +57,14 @@ static void mpage_end_io(struct bio *bio)
bio_put(bio);
}
-static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
+static struct bio *mpage_bio_submit(struct bio *bio)
{
bio->bi_end_io = mpage_end_io;
- bio_set_op_attrs(bio, op, op_flags);
guard_bio_eod(bio);
submit_bio(bio);
return NULL;
}
-static struct bio *
-mpage_alloc(struct block_device *bdev,
- sector_t first_sector, int nr_vecs,
- gfp_t gfp_flags)
-{
- struct bio *bio;
-
- /* Restrict the given (page cache) mask for slab allocations */
- gfp_flags &= GFP_KERNEL;
- bio = bio_alloc(gfp_flags, nr_vecs);
-
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
- }
-
- if (bio) {
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = first_sector;
- }
- return bio;
-}
-
/*
* support function for mpage_readahead. The fs supplied get_block might
* return an up to date buffer. This is used to map that buffer into
@@ -169,16 +145,15 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
struct block_device *bdev = NULL;
int length;
int fully_mapped = 1;
- int op_flags;
+ int op = REQ_OP_READ;
unsigned nblocks;
unsigned relative_block;
gfp_t gfp;
if (args->is_readahead) {
- op_flags = REQ_RAHEAD;
+ op |= REQ_RAHEAD;
gfp = readahead_gfp_mask(page->mapping);
} else {
- op_flags = 0;
gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
}
@@ -287,7 +262,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
- args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+ args->bio = mpage_bio_submit(args->bio);
alloc_new:
if (args->bio == NULL) {
@@ -296,15 +271,16 @@ alloc_new:
page))
goto out;
}
- args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
- bio_max_segs(args->nr_pages), gfp);
+ args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), op,
+ gfp);
if (args->bio == NULL)
goto confused;
+ args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
}
length = first_hole << blkbits;
if (bio_add_page(args->bio, page, length, 0) < length) {
- args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+ args->bio = mpage_bio_submit(args->bio);
goto alloc_new;
}
@@ -312,7 +288,7 @@ alloc_new:
nblocks = map_bh->b_size >> blkbits;
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
(first_hole != blocks_per_page))
- args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+ args->bio = mpage_bio_submit(args->bio);
else
args->last_block_in_bio = blocks[blocks_per_page - 1];
out:
@@ -320,7 +296,7 @@ out:
confused:
if (args->bio)
- args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+ args->bio = mpage_bio_submit(args->bio);
if (!PageUptodate(page))
block_read_full_page(page, args->get_block);
else
@@ -383,7 +359,7 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
put_page(page);
}
if (args.bio)
- mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
+ mpage_bio_submit(args.bio);
}
EXPORT_SYMBOL(mpage_readahead);
@@ -400,7 +376,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
args.bio = do_mpage_readpage(&args);
if (args.bio)
- mpage_bio_submit(REQ_OP_READ, 0, args.bio);
+ mpage_bio_submit(args.bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpage);
@@ -491,7 +467,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
- int op_flags = wbc_to_write_flags(wbc);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -504,7 +479,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
if (!buffer_mapped(bh)) {
/*
* unmapped dirty buffers are created by
- * __set_page_dirty_buffers -> mmapped data
+ * block_dirty_folio -> mmapped data
*/
if (buffer_dirty(bh))
goto confused;
@@ -599,7 +574,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+ bio = mpage_bio_submit(bio);
alloc_new:
if (bio == NULL) {
@@ -608,11 +583,10 @@ alloc_new:
page, wbc))
goto out;
}
- bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
- BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH);
- if (bio == NULL)
- goto confused;
-
+ bio = bio_alloc(bdev, BIO_MAX_VECS,
+ REQ_OP_WRITE | wbc_to_write_flags(wbc),
+ GFP_NOFS);
+ bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
wbc_init_bio(wbc, bio);
bio->bi_write_hint = inode->i_write_hint;
}
@@ -625,7 +599,7 @@ alloc_new:
wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+ bio = mpage_bio_submit(bio);
goto alloc_new;
}
@@ -635,7 +609,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+ bio = mpage_bio_submit(bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -647,7 +621,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+ bio = mpage_bio_submit(bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -703,11 +677,8 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio) {
- int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
- REQ_SYNC : 0);
- mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
- }
+ if (mpd.bio)
+ mpage_bio_submit(mpd.bio);
}
blk_finish_plug(&plug);
return ret;
@@ -724,11 +695,8 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio) {
- int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
- REQ_SYNC : 0);
- mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
- }
+ if (mpd.bio)
+ mpage_bio_submit(mpd.bio);
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/namei.c b/fs/namei.c
index b867a92c078e..3f1829b3ab5b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4024,13 +4024,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
- fsnotify_rmdir(dir, dentry);
out:
inode_unlock(dentry->d_inode);
dput(dentry);
if (!error)
- d_delete(dentry);
+ d_delete_notify(dir, dentry);
return error;
}
EXPORT_SYMBOL(vfs_rmdir);
@@ -4152,7 +4151,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
- fsnotify_unlink(dir, dentry);
}
}
}
@@ -4160,9 +4158,11 @@ out:
inode_unlock(target);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
+ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ fsnotify_unlink(dir, dentry);
+ } else if (!error) {
fsnotify_link_count(target);
- d_delete(dentry);
+ d_delete_notify(dir, dentry);
}
return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 40b994a29e90..0044feef59d0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -469,6 +469,24 @@ void mnt_drop_write_file(struct file *file)
}
EXPORT_SYMBOL(mnt_drop_write_file);
+/**
+ * mnt_hold_writers - prevent write access to the given mount
+ * @mnt: mnt to prevent write access to
+ *
+ * Prevents write access to @mnt if there are no active writers for @mnt.
+ * This function needs to be called and return successfully before changing
+ * properties of @mnt that need to remain stable for callers with write access
+ * to @mnt.
+ *
+ * After this functions has been called successfully callers must pair it with
+ * a call to mnt_unhold_writers() in order to stop preventing write access to
+ * @mnt.
+ *
+ * Context: This function expects lock_mount_hash() to be held serializing
+ * setting MNT_WRITE_HOLD.
+ * Return: On success 0 is returned.
+ * On error, -EBUSY is returned.
+ */
static inline int mnt_hold_writers(struct mount *mnt)
{
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
@@ -500,6 +518,18 @@ static inline int mnt_hold_writers(struct mount *mnt)
return 0;
}
+/**
+ * mnt_unhold_writers - stop preventing write access to the given mount
+ * @mnt: mnt to stop preventing write access to
+ *
+ * Stop preventing write access to @mnt allowing callers to gain write access
+ * to @mnt again.
+ *
+ * This function can only be called after a successful call to
+ * mnt_hold_writers().
+ *
+ * Context: This function expects lock_mount_hash() to be held.
+ */
static inline void mnt_unhold_writers(struct mount *mnt)
{
/*
@@ -2567,6 +2597,7 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
struct super_block *sb = mnt->mnt_sb;
if (!__mnt_is_readonly(mnt) &&
+ (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
(ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
char *buf = (char *)__get_free_page(GFP_KERNEL);
char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
@@ -2581,6 +2612,7 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
tm.tm_year+1900, (unsigned long long)sb->s_time_max);
free_page((unsigned long)buf);
+ sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
}
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index fe860c538747..79a8b451791f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -115,23 +115,6 @@ bl_submit_bio(struct bio *bio)
return NULL;
}
-static struct bio *bl_alloc_init_bio(unsigned int npg,
- struct block_device *bdev, sector_t disk_sector,
- bio_end_io_t end_io, struct parallel_io *par)
-{
- struct bio *bio;
-
- npg = bio_max_segs(npg);
- bio = bio_alloc(GFP_NOIO, npg);
- if (bio) {
- bio->bi_iter.bi_sector = disk_sector;
- bio_set_dev(bio, bdev);
- bio->bi_end_io = end_io;
- bio->bi_private = par;
- }
- return bio;
-}
-
static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
{
return offset >= map->start && offset < map->start + map->len;
@@ -171,11 +154,10 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
retry:
if (!bio) {
- bio = bl_alloc_init_bio(npg, map->bdev,
- disk_addr >> SECTOR_SHIFT, end_io, par);
- if (!bio)
- return ERR_PTR(-ENOMEM);
- bio_set_op_attrs(bio, rw, 0);
+ bio = bio_alloc(map->bdev, bio_max_segs(npg), rw, GFP_NOIO);
+ bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT;
+ bio->bi_end_io = end_io;
+ bio->bi_private = par;
}
if (bio_add_page(bio, page, *len, offset) < *len) {
bio = bl_submit_bio(bio);
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index ef9db135c649..6c977288cc28 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -27,7 +27,6 @@
*/
#include <linux/module.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "blocklayout.h"
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 054cc1255fac..456af7d230cf 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,7 +17,6 @@
#include <linux/errno.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
-#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/bc_xprt.h>
@@ -45,18 +44,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
int ret;
struct nfs_net *nn = net_generic(net, nfs_net_id);
- ret = svc_create_xprt(serv, "tcp", net, PF_INET,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
- cred);
+ ret = svc_xprt_create(serv, "tcp", net, PF_INET,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
- ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
- cred);
+ ret = svc_xprt_create(serv, "tcp", net, PF_INET6,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
@@ -92,8 +91,8 @@ nfs4_callback_svc(void *vrqstp)
continue;
svc_process(rqstp);
}
+
svc_exit_thread(rqstp);
- module_put_and_kthread_exit(0);
return 0;
}
@@ -136,8 +135,8 @@ nfs41_callback_svc(void *vrqstp)
finish_wait(&serv->sv_cb_waitq, &wq);
}
}
+
svc_exit_thread(rqstp);
- module_put_and_kthread_exit(0);
return 0;
}
@@ -189,7 +188,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
return;
dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
- svc_shutdown_net(serv, net);
+ svc_xprt_destroy_all(serv, net);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
@@ -232,33 +231,10 @@ err_bind:
return ret;
}
-static const struct svc_serv_ops nfs40_cb_sv_ops = {
- .svo_function = nfs4_callback_svc,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_module = THIS_MODULE,
-};
-#if defined(CONFIG_NFS_V4_1)
-static const struct svc_serv_ops nfs41_cb_sv_ops = {
- .svo_function = nfs41_callback_svc,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_module = THIS_MODULE,
-};
-
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
- [0] = &nfs40_cb_sv_ops,
- [1] = &nfs41_cb_sv_ops,
-};
-#else
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
- [0] = &nfs40_cb_sv_ops,
- [1] = NULL,
-};
-#endif
-
static struct svc_serv *nfs_callback_create_svc(int minorversion)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
- const struct svc_serv_ops *sv_ops;
+ int (*threadfn)(void *data);
struct svc_serv *serv;
/*
@@ -267,17 +243,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
if (cb_info->serv)
return svc_get(cb_info->serv);
- switch (minorversion) {
- case 0:
- sv_ops = nfs4_cb_sv_ops[0];
- break;
- default:
- sv_ops = nfs4_cb_sv_ops[1];
- }
-
- if (sv_ops == NULL)
- return ERR_PTR(-ENOTSUPP);
-
/*
* Sanity check: if there's no task,
* we should be the first user ...
@@ -286,7 +251,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
cb_info->users);
- serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+ threadfn = nfs4_callback_svc;
+#if defined(CONFIG_NFS_V4_1)
+ if (minorversion)
+ threadfn = nfs41_callback_svc;
+#else
+ if (minorversion)
+ return ERR_PTR(-ENOTSUPP);
+#endif
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
+ threadfn);
if (!serv) {
printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 6a2033131c06..ccd4f245cae2 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -170,7 +170,7 @@ struct cb_devicenotifyitem {
};
struct cb_devicenotifyargs {
- int ndevs;
+ uint32_t ndevs;
struct cb_devicenotifyitem *devs;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 09c5b1cb3e07..c343666d9a42 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -358,7 +358,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
struct cb_process_state *cps)
{
struct cb_devicenotifyargs *args = argp;
- int i;
+ uint32_t i;
__be32 res = 0;
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a67c41ec545f..f90de8043b0f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_devicenotifyargs *args = argp;
+ uint32_t tmp, n, i;
__be32 *p;
__be32 status = 0;
- u32 tmp;
- int n, i;
- args->ndevs = 0;
/* Num of device notifications */
p = xdr_inline_decode(xdr, sizeof(uint32_t));
@@ -271,7 +269,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
goto out;
}
n = ntohl(*p++);
- if (n <= 0)
+ if (n == 0)
goto out;
if (n > ULONG_MAX / sizeof(*args->devs)) {
status = htonl(NFS4ERR_BADXDR);
@@ -330,19 +328,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
dev->cbd_immediate = 0;
}
- args->ndevs++;
-
dprintk("%s: type %d layout 0x%x immediate %d\n",
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
dev->cbd_immediate);
}
+ args->ndevs = n;
+ dprintk("%s: ndevs %d\n", __func__, args->ndevs);
+ return 0;
+err:
+ kfree(args->devs);
out:
+ args->devs = NULL;
+ args->ndevs = 0;
dprintk("%s: status %d ndevs %d\n",
__func__, ntohl(status), args->ndevs);
return status;
-err:
- kfree(args->devs);
- goto out;
}
static __be32 decode_sessionid(struct xdr_stream *xdr,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d8b85b5a641..d1f34229e11a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
INIT_LIST_HEAD(&clp->cl_superblocks);
clp->cl_rpcclient = ERR_PTR(-EINVAL);
+ clp->cl_flags = cl_init->init_flags;
clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
@@ -423,7 +424,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- new->cl_flags = cl_init->init_flags;
return rpc_ops->init_client(new, cl_init);
}
@@ -856,6 +856,13 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
server->namelen = pathinfo.max_namelen;
}
+ if (clp->rpc_ops->discover_trunking != NULL &&
+ (server->caps & NFS_CAP_FS_LOCATIONS)) {
+ error = clp->rpc_ops->discover_trunking(server, mntfh);
+ if (error < 0)
+ return error;
+ }
+
return 0;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 347793626f19..75cb1cbe4cde 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -80,6 +80,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
ctx->page_index = 0;
+ ctx->eof = false;
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
(nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -168,6 +169,7 @@ struct nfs_readdir_descriptor {
unsigned int cache_entry_index;
signed char duped;
bool plus;
+ bool eob;
bool eof;
};
@@ -867,7 +869,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
- } while (!status && nfs_readdir_page_needs_filling(page));
+ } while (!status && nfs_readdir_page_needs_filling(page) &&
+ page_mapping(page));
nfs_readdir_free_pages(pages, array_size);
out:
@@ -988,7 +991,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->name, ent->name_len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
- desc->eof = true;
+ desc->eob = true;
break;
}
memcpy(desc->verf, verf, sizeof(desc->verf));
@@ -1004,7 +1007,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
desc->duped = 1;
}
if (array->page_is_eof)
- desc->eof = true;
+ desc->eof = !desc->eob;
kunmap(desc->page);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
@@ -1041,12 +1044,13 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
goto out;
desc->page_index = 0;
+ desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
desc->duped = 0;
status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
- for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
+ for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
desc->page = arrays[i];
nfs_do_filldir(desc, verf);
}
@@ -1105,9 +1109,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->duped = dir_ctx->duped;
page_index = dir_ctx->page_index;
desc->attr_gencount = dir_ctx->attr_gencount;
+ desc->eof = dir_ctx->eof;
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
spin_unlock(&file->f_lock);
+ if (desc->eof) {
+ res = 0;
+ goto out_free;
+ }
+
if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
list_is_singular(&nfsi->open_files))
invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
@@ -1141,7 +1151,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
- } while (!desc->eof);
+ } while (!desc->eob && !desc->eof);
spin_lock(&file->f_lock);
dir_ctx->dir_cookie = desc->dir_cookie;
@@ -1149,9 +1159,10 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
dir_ctx->duped = desc->duped;
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
+ dir_ctx->eof = desc->eof;
memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
spin_unlock(&file->f_lock);
-
+out_free:
kfree(desc);
out:
@@ -1193,6 +1204,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
if (offset == 0)
memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
dir_ctx->duped = 0;
+ dir_ctx->eof = false;
}
spin_unlock(&filp->f_lock);
return offset;
@@ -1325,6 +1337,14 @@ void nfs_clear_verifier_delegated(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
+{
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
+ d_really_is_negative(dentry))
+ return dentry->d_time == inode_peek_iversion_raw(dir);
+ return nfs_verify_change_attribute(dir, dentry->d_time);
+}
+
/*
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
@@ -1338,7 +1358,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
return 0;
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
if (nfs_mapping_need_revalidate_inode(dir)) {
@@ -1347,7 +1367,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
return 0;
}
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
return 1;
}
@@ -1437,6 +1457,9 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
+ /* Case insensitive server? Revalidate negative dentries */
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ return 1;
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
@@ -1537,7 +1560,7 @@ out:
* If the lookup failed despite the dentry change attribute being
* a match, then we should revalidate the directory cache.
*/
- if (!ret && nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!ret && nfs_dentry_verify_change(dir, dentry))
nfs_mark_dir_for_revalidate(dir);
return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}
@@ -1776,8 +1799,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (error == -ENOENT)
+ if (error == -ENOENT) {
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
goto no_entry;
+ }
if (error < 0) {
res = ERR_PTR(error);
goto out;
@@ -1806,6 +1832,14 @@ out:
}
EXPORT_SYMBOL_GPL(nfs_lookup);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
+{
+ /* Case insensitive server? Revalidate dentries */
+ if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
+ d_prune_aliases(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
+
#if IS_ENABLED(CONFIG_NFS_V4)
static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
@@ -1867,6 +1901,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
unsigned int lookup_flags = 0;
+ unsigned long dir_verifier;
bool switched = false;
int created = 0;
int err;
@@ -1940,7 +1975,11 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
switch (err) {
case -ENOENT:
d_splice_alias(NULL, dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
+ else
+ dir_verifier = nfs_save_change_attribute(dir);
+ nfs_set_verifier(dentry, dir_verifier);
break;
case -EISDIR:
case -ENOTDIR:
@@ -1968,6 +2007,24 @@ out:
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
+ if (!res) {
+ inode = d_inode(dentry);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+ res = ERR_PTR(-ENOTDIR);
+ else if (inode && S_ISREG(inode->i_mode))
+ res = ERR_PTR(-EOPENSTALE);
+ } else if (!IS_ERR(res)) {
+ inode = d_inode(res);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
+ dput(res);
+ res = ERR_PTR(-ENOTDIR);
+ } else if (inode && S_ISREG(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-EOPENSTALE);
+ }
+ }
if (switched) {
d_lookup_done(dentry);
if (!res)
@@ -2186,8 +2243,10 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
switch (error) {
case -ENOENT:
d_delete(dentry);
- fallthrough;
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ break;
case 0:
+ nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
}
}
@@ -2380,6 +2439,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
trace_nfs_link_enter(inode, dir, dentry);
d_drop(dentry);
+ if (S_ISREG(inode->i_mode))
+ nfs_sync_inode(inode);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -2469,6 +2530,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
}
}
+ if (S_ISREG(old_inode->i_mode))
+ nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
@@ -2529,7 +2592,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
- put_cred(entry->cred);
+ put_group_info(entry->group_info);
kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
@@ -2655,6 +2718,43 @@ void nfs_access_zap_cache(struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
+static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
+{
+ struct group_info *ga, *gb;
+ int g;
+
+ if (uid_lt(a->fsuid, b->fsuid))
+ return -1;
+ if (uid_gt(a->fsuid, b->fsuid))
+ return 1;
+
+ if (gid_lt(a->fsgid, b->fsgid))
+ return -1;
+ if (gid_gt(a->fsgid, b->fsgid))
+ return 1;
+
+ ga = a->group_info;
+ gb = b->group_info;
+ if (ga == gb)
+ return 0;
+ if (ga == NULL)
+ return -1;
+ if (gb == NULL)
+ return 1;
+ if (ga->ngroups < gb->ngroups)
+ return -1;
+ if (ga->ngroups > gb->ngroups)
+ return 1;
+
+ for (g = 0; g < ga->ngroups; g++) {
+ if (gid_lt(ga->gid[g], gb->gid[g]))
+ return -1;
+ if (gid_gt(ga->gid[g], gb->gid[g]))
+ return 1;
+ }
+ return 0;
+}
+
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
{
struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
@@ -2662,7 +2762,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
while (n != NULL) {
struct nfs_access_entry *entry =
rb_entry(n, struct nfs_access_entry, rb_node);
- int cmp = cred_fscmp(cred, entry->cred);
+ int cmp = access_cmp(cred, entry);
if (cmp < 0)
n = n->rb_left;
@@ -2674,7 +2774,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
return NULL;
}
-static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2704,8 +2804,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *
spin_lock(&inode->i_lock);
retry = false;
}
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
err = 0;
out:
@@ -2717,7 +2816,7 @@ out_zap:
return -ENOENT;
}
-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
@@ -2733,35 +2832,36 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
- cred_fscmp(cred, cache->cred) != 0)
+ access_cmp(cred, cache) != 0)
cache = NULL;
if (cache == NULL)
goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
err = 0;
out:
rcu_read_unlock();
return err;
}
-int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
-nfs_access_entry *res, bool may_block)
+int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block)
{
int status;
- status = nfs_access_get_cached_rcu(inode, cred, res);
+ status = nfs_access_get_cached_rcu(inode, cred, mask);
if (status != 0)
- status = nfs_access_get_cached_locked(inode, cred, res,
+ status = nfs_access_get_cached_locked(inode, cred, mask,
may_block);
return status;
}
EXPORT_SYMBOL_GPL(nfs_access_get_cached);
-static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_rbtree(struct inode *inode,
+ struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct rb_root *root_node = &nfsi->access_cache;
@@ -2774,7 +2874,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
while (*p != NULL) {
parent = *p;
entry = rb_entry(parent, struct nfs_access_entry, rb_node);
- cmp = cred_fscmp(set->cred, entry->cred);
+ cmp = access_cmp(cred, entry);
if (cmp < 0)
p = &parent->rb_left;
@@ -2796,13 +2896,16 @@ found:
nfs_access_free_entry(entry);
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL)
return;
RB_CLEAR_NODE(&cache->rb_node);
- cache->cred = get_cred(set->cred);
+ cache->fsuid = cred->fsuid;
+ cache->fsgid = cred->fsgid;
+ cache->group_info = get_group_info(cred->group_info);
cache->mask = set->mask;
/* The above field assignments must be visible
@@ -2810,7 +2913,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
* use rcu_assign_pointer, so just force the memory barrier.
*/
smp_wmb();
- nfs_access_add_rbtree(inode, cache);
+ nfs_access_add_rbtree(inode, cache, cred);
/* Update accounting */
smp_mb__before_atomic();
@@ -2875,7 +2978,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache, may_block);
+ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
if (status == 0)
goto out_cached;
@@ -2895,8 +2998,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
cache.mask |= NFS_ACCESS_EXECUTE;
- cache.cred = cred;
- status = NFS_PROTO(inode)->access(inode, &cache);
+ status = NFS_PROTO(inode)->access(inode, &cache, cred);
if (status != 0) {
if (status == -ESTALE) {
if (!S_ISDIR(inode->i_mode))
@@ -2906,7 +3008,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
}
goto out;
}
- nfs_access_add_cache(inode, &cache);
+ nfs_access_add_cache(inode, &cache, cred);
out_cached:
cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 76d76acbc594..2df2a5392737 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -406,17 +406,17 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
*/
-static void nfs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length)
+static void nfs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
- page, offset, length);
+ dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
+ folio->index, offset, length);
- if (offset != 0 || length < PAGE_SIZE)
+ if (offset != 0 || length < folio_size(folio))
return;
/* Cancel any unstarted writes on this page */
- nfs_wb_page_cancel(page_file_mapping(page)->host, page);
- wait_on_page_fscache(page);
+ nfs_wb_folio_cancel(folio->mapping->host, folio);
+ folio_wait_fscache(folio);
}
/*
@@ -472,15 +472,15 @@ static void nfs_check_dirty_writeback(struct page *page,
* - Caller holds page lock
* - Return 0 if successful, -error otherwise
*/
-static int nfs_launder_page(struct page *page)
+static int nfs_launder_folio(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio->mapping->host;
- dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
- inode->i_ino, (long long)page_offset(page));
+ dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
+ inode->i_ino, folio_pos(folio));
- wait_on_page_fscache(page);
- return nfs_wb_page(inode, page);
+ folio_wait_fscache(folio);
+ return nfs_wb_page(inode, &folio->page);
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -515,18 +515,18 @@ static void nfs_swap_deactivate(struct file *file)
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.write_begin = nfs_write_begin,
.write_end = nfs_write_end,
- .invalidatepage = nfs_invalidate_page,
+ .invalidate_folio = nfs_invalidate_folio,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
#ifdef CONFIG_MIGRATION
.migratepage = nfs_migrate_page,
#endif
- .launder_page = nfs_launder_page,
+ .launder_folio = nfs_launder_folio,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
.swap_activate = nfs_swap_activate,
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 79323b5dab0c..aed0748fd6ec 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr {
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
- struct nfs4_pnfs_ds *ds_list[1];
+ struct nfs4_pnfs_ds *ds_list[];
};
struct nfs4_filelayout_segment {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 86c3f7e69ec4..acf4b88889dc 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -136,9 +136,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_stripe_indices;
}
- dsaddr = kzalloc(sizeof(*dsaddr) +
- (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
- gfp_flags);
+ dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags);
if (!dsaddr)
goto out_err_free_stripe_indices;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a918c3a834b6..3351c2de3e08 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -853,12 +853,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
}
/* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
- S_ISREG(inode->i_mode)) {
- err = filemap_write_and_wait(inode->i_mapping);
- if (err)
- goto out;
- }
+ if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+ S_ISREG(inode->i_mode))
+ filemap_write_and_wait(inode->i_mapping);
/*
* We may force a getattr if the user cares about atime.
@@ -2241,7 +2238,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
struct inode *nfs_alloc_inode(struct super_block *sb)
{
struct nfs_inode *nfsi;
- nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
+ nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL);
if (!nfsi)
return NULL;
nfsi->flags = 0UL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 12f6acb483bb..2de7c56a1fbe 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -373,6 +373,7 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
umode_t, bool);
int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7100514d306b..1597eef40d54 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -220,7 +220,8 @@ static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
task_flags);
}
-static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
@@ -231,7 +232,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = -ENOMEM;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 8b21ff1be717..32129446beca 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -46,7 +46,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
- u32 bitmask[3];
+ u32 bitmask[NFS_BITMASK_SZ];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
@@ -69,9 +69,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
return status;
}
- memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
- if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
- bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+ nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, inode,
+ NFS_INO_INVALID_BLOCKS);
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
@@ -1044,13 +1043,14 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
struct nfs_server *server = NFS_SERVER(dst_inode);
+ __u32 dst_bitmask[NFS_BITMASK_SZ];
struct nfs42_clone_args args = {
.src_fh = NFS_FH(src_inode),
.dst_fh = NFS_FH(dst_inode),
.src_offset = src_offset,
.dst_offset = dst_offset,
.count = count,
- .dst_bitmask = server->cache_consistency_bitmask,
+ .dst_bitmask = dst_bitmask,
};
struct nfs42_clone_res res = {
.server = server,
@@ -1079,6 +1079,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
if (!res.dst_fattr)
return -ENOMEM;
+ nfs4_bitmask_set(dst_bitmask, server->cache_consistency_bitmask,
+ dst_inode, NFS_INO_INVALID_BLOCKS);
+
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
trace_nfs4_clone(src_inode, dst_inode, &args, status);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ed5eaca6801e..84f39b6f1b1e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -260,8 +260,8 @@ struct nfs4_state_maintenance_ops {
};
struct nfs4_mig_recovery_ops {
- int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
- struct page *, const struct cred *);
+ int (*get_locations)(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *, struct page *, const struct cred *);
int (*fsid_present)(struct inode *, const struct cred *);
};
@@ -280,7 +280,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
extern int nfs4_async_handle_error(struct rpc_task *task,
@@ -302,8 +303,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
- struct page *page, const struct cred *);
+extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *,
+ struct page *page, const struct cred *);
extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
struct dentry *,
@@ -315,6 +317,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
+extern void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity);
extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct inode *inode);
extern int update_open_stateid(struct nfs4_state *state,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d8b5a250ca05..47a6cf892c95 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1343,8 +1343,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
}
nfs_put_client(clp);
- if (server->nfs_client->cl_hostname == NULL)
+ if (server->nfs_client->cl_hostname == NULL) {
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (server->nfs_client->cl_hostname == NULL)
+ return -ENOMEM;
+ }
nfs_server_insert_lists(server);
return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root)));
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 873342308dc0..3680c8da510c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry,
return 0;
}
-static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct net *net)
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+ size_t salen, struct net *net, int port)
{
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
- if (ret < 0)
- ret = 0;
+ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
+ if (ret == 0) {
+ ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+ if (ret < 0)
+ ret = 0;
+ }
+ } else if (port) {
+ rpc_set_port(sa, port);
}
return ret;
}
@@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc,
nfs_parse_server_name(buf->data, buf->len,
&ctx->nfs_server.address,
sizeof(ctx->nfs_server._address),
- fc->net_ns);
+ fc->net_ns, 0);
if (ctx->nfs_server.addrlen == 0)
continue;
@@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net);
+ sap, addr_bufsize, net, 0);
if (salen == 0)
continue;
rpc_set_port(sap, NFS_PORT);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee3bc79f6ca3..0e0db6c27619 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -108,10 +108,6 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
- const __u32 *src, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -1233,8 +1229,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR |
- NFS_INO_REVAL_PAGECACHE;
+ NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR;
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
}
nfsi->attrtimeo_timestamp = jiffies;
@@ -2653,9 +2648,8 @@ static int nfs4_opendata_access(const struct cred *cred,
} else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = NFS4_ACCESS_READ;
- cache.cred = cred;
nfs_access_set_mask(&cache, opendata->o_res.access_result);
- nfs_access_add_cache(state->inode, &cache);
+ nfs_access_add_cache(state->inode, &cache, cred);
flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP;
if ((mask & ~cache.mask & flags) == 0)
@@ -3670,7 +3664,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (!nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
- inode, server, NULL);
+ inode, 0);
calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
@@ -3841,7 +3835,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_FH_EXPIRE_TYPE |
FATTR4_WORD0_LINK_SUPPORT |
FATTR4_WORD0_SYMLINK_SUPPORT |
- FATTR4_WORD0_ACLSUPPORT;
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
@@ -3870,10 +3866,16 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
+ if (res.case_insensitive)
+ server->caps |= NFS_CAP_CASE_INSENSITIVE;
+ if (res.case_preserving)
+ server->caps |= NFS_CAP_CASE_PRESERVING;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
server->caps |= NFS_CAP_SECURITY_LABEL;
#endif
+ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)
+ server->caps |= NFS_CAP_FS_LOCATIONS;
if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
@@ -3932,6 +3934,114 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
return err;
}
+static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
+ struct nfs_client *clp,
+ struct nfs_server *server)
+{
+ int i;
+
+ for (i = 0; i < location->nservers; i++) {
+ struct nfs4_string *srv_loc = &location->servers[i];
+ struct sockaddr addr;
+ size_t addrlen;
+ struct xprt_create xprt_args = {
+ .ident = 0,
+ .net = clp->cl_net,
+ };
+ struct nfs4_add_xprt_data xprtdata = {
+ .clp = clp,
+ };
+ struct rpc_add_xprt_test rpcdata = {
+ .add_xprt_test = clp->cl_mvops->session_trunk,
+ .data = &xprtdata,
+ };
+ char *servername = NULL;
+
+ if (!srv_loc->len)
+ continue;
+
+ addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len,
+ &addr, sizeof(addr),
+ clp->cl_net, server->port);
+ if (!addrlen)
+ return;
+ xprt_args.dstaddr = &addr;
+ xprt_args.addrlen = addrlen;
+ servername = kmalloc(srv_loc->len + 1, GFP_KERNEL);
+ if (!servername)
+ return;
+ memcpy(servername, srv_loc->data, srv_loc->len);
+ servername[srv_loc->len] = '\0';
+ xprt_args.servername = servername;
+
+ xprtdata.cred = nfs4_get_clid_cred(clp);
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_setup_test_and_add_xprt,
+ &rpcdata);
+ if (xprtdata.cred)
+ put_cred(xprtdata.cred);
+ kfree(servername);
+ }
+}
+
+static int _nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_fs_locations *locations = NULL;
+ struct page *page;
+ const struct cred *cred;
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ int status = -ENOMEM, i;
+
+ cred = ops->get_state_renewal_cred(clp);
+ if (cred == NULL) {
+ cred = nfs4_get_clid_cred(clp);
+ if (cred == NULL)
+ return -ENOKEY;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (page == NULL || locations == NULL)
+ goto out;
+
+ status = nfs4_proc_get_locations(server, fhandle, locations, page,
+ cred);
+ if (status)
+ goto out;
+
+ for (i = 0; i < locations->nlocations; i++)
+ test_fs_location_for_trunking(&locations->locations[i], clp,
+ server);
+out:
+ if (page)
+ __free_page(page);
+ kfree(locations);
+ return status;
+}
+
+static int nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+ struct nfs_client *clp = server->nfs_client;
+ int err = 0;
+
+ if (!nfs4_has_session(clp))
+ goto out;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_discover_trunking(server, fhandle),
+ &exception);
+ } while (exception.retry);
+out:
+ return err;
+}
+
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
@@ -4441,7 +4551,8 @@ static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
return err;
}
-static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_accessargs args = {
@@ -4455,7 +4566,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
.rpc_argp = &args,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = 0;
@@ -4475,14 +4586,15 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
return status;
}
-static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_proc_access(inode, entry);
+ err = _nfs4_proc_access(inode, entry, cred);
trace_nfs4_access(inode, err);
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
@@ -4663,8 +4775,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg,
nfs_fattr_init(res->dir_attr);
- if (inode)
+ if (inode) {
nfs4_inode_return_delegation(inode);
+ nfs_d_prune_case_insensitive_aliases(inode);
+ }
}
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
@@ -4730,6 +4844,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
if (task->tk_status == 0) {
+ nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry));
if (new_dir != old_dir) {
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
@@ -5422,14 +5537,14 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
- struct inode *inode, struct nfs_server *server,
- struct nfs4_label *label)
+void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity)
{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ struct nfs_server *server = NFS_SERVER(inode);
unsigned int i;
memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+ cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity);
if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
@@ -5441,8 +5556,6 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
if (cache_validity & NFS_INO_INVALID_NLINK)
bitmask[1] |= FATTR4_WORD1_NUMLINKS;
- if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
- bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
@@ -5469,7 +5582,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
} else {
nfs4_bitmask_set(hdr->args.bitmask_store,
server->cache_consistency_bitmask,
- hdr->inode, server, NULL);
+ hdr->inode, NFS_INO_INVALID_BLOCKS);
hdr->args.bitmask = hdr->args.bitmask_store;
}
@@ -6507,8 +6620,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
nfs4_bitmask_set(data->args.bitmask_store,
- server->cache_consistency_bitmask, inode, server,
- NULL);
+ server->cache_consistency_bitmask, inode, 0);
data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
@@ -7611,7 +7723,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
const char *key, const void *buf,
size_t buflen, int flags)
{
- struct nfs_access_entry cache;
+ u32 mask;
int ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
@@ -7626,8 +7738,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
* do a cached access check for the XA* flags to possibly avoid
* doing an RPC and getting EACCES back.
*/
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAWRITE))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAWRITE))
return -EACCES;
}
@@ -7648,14 +7760,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *key, void *buf, size_t buflen)
{
- struct nfs_access_entry cache;
+ u32 mask;
ssize_t ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return -EOPNOTSUPP;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAREAD))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAREAD))
return -EACCES;
}
@@ -7680,13 +7792,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
ssize_t ret, size;
char *buf;
size_t buflen;
- struct nfs_access_entry cache;
+ u32 mask;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return 0;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XALIST))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XALIST))
return 0;
}
@@ -7818,18 +7930,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
* appended to this compound to identify the client ID which is
* performing recovery.
*/
-static int _nfs40_proc_get_locations(struct inode *inode,
+static int _nfs40_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
.clientid = server->nfs_client->cl_clientid,
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7875,17 +7987,17 @@ static int _nfs40_proc_get_locations(struct inode *inode,
* When the client supports GETATTR(fs_locations_info), it can
* be plumbed in here.
*/
-static int _nfs41_proc_get_locations(struct inode *inode,
+static int _nfs41_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7919,7 +8031,8 @@ static int _nfs41_proc_get_locations(struct inode *inode,
/**
* nfs4_proc_get_locations - discover locations for a migrated FSID
- * @inode: inode on FSID that is migrating
+ * @server: pointer to nfs_server to process
+ * @fhandle: pointer to the kernel NFS client file handle
* @locations: result of query
* @page: buffer
* @cred: credential to use for this operation
@@ -7934,11 +8047,11 @@ static int _nfs41_proc_get_locations(struct inode *inode,
* -NFS4ERR_LEASE_MOVED is returned if the server still has leases
* from this client that require migration recovery.
*/
-int nfs4_proc_get_locations(struct inode *inode,
+int nfs4_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
@@ -7951,10 +8064,11 @@ int nfs4_proc_get_locations(struct inode *inode,
(unsigned long long)server->fsid.major,
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- nfs_display_fhandle(NFS_FH(inode), __func__);
+ nfs_display_fhandle(fhandle, __func__);
do {
- status = ops->get_locations(inode, locations, page, cred);
+ status = ops->get_locations(server, fhandle, locations, page,
+ cred);
if (status != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, status, &exception);
@@ -10423,6 +10537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
.clone_server = nfs_clone_server,
+ .discover_trunking = nfs4_discover_trunking,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d88b779f9dd0..02a899e4390f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2098,7 +2098,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
}
inode = d_inode(server->super->s_root);
- result = nfs4_proc_get_locations(inode, locations, page, cred);
+ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
+ page, cred);
if (result) {
dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
__func__, result);
@@ -2106,6 +2107,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
}
result = -NFS4ERR_NXIO;
+ if (!locations->nlocations)
+ goto out;
+
if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
dprintk("<-- %s: No fs_locations data, migration skipped\n",
__func__);
@@ -2693,6 +2697,5 @@ static int nfs4_run_state_manager(void *ptr)
allow_signal(SIGKILL);
nfs4_state_manager(clp);
nfs_put_client(clp);
- module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 69862bf6db00..8e70b92df4cc 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3533,6 +3533,42 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
return 0;
}
+static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_INSENSITIVE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_INSENSITIVE)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
+ }
+ dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
+static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_PRESERVING - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_PRESERVING)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
+ }
+ dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
{
__be32 *p;
@@ -3696,8 +3732,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (unlikely(!p))
goto out_eio;
n = be32_to_cpup(p);
- if (n <= 0)
- goto out_eio;
for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
struct nfs4_fs_location *loc;
@@ -4200,10 +4234,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
} else
printk(KERN_WARNING "%s: label too long (%u)!\n",
__func__, len);
+ if (label && label->label)
+ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
+ __func__, label->len, (char *)label->label,
+ label->len, label->pi, label->lfs);
}
- if (label && label->label)
- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
- (char *)label->label, label->len, label->pi, label->lfs);
return status;
}
@@ -4412,6 +4447,10 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
goto xdr_error;
if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_case_insensitive(xdr, bitmap, &res->case_insensitive)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_case_preserving(xdr, bitmap, &res->case_preserving)) != 0)
+ goto xdr_error;
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 8cb70755e3c9..a6f740366963 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -142,10 +142,11 @@ static struct attribute *nfs_netns_client_attrs[] = {
&nfs_netns_client_id.attr,
NULL,
};
+ATTRIBUTE_GROUPS(nfs_netns_client);
static struct kobj_type nfs_netns_client_type = {
.release = nfs_netns_client_release,
- .default_attrs = nfs_netns_client_attrs,
+ .default_groups = nfs_netns_client_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = nfs_netns_client_namespace,
};
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 987a187bd39a..614e2809032e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -417,7 +417,7 @@ static void nfs_set_page_writeback(struct page *page)
if (atomic_long_inc_return(&nfss->writeback) >
NFS_CONGESTION_ON_THRESH)
- set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ nfss->write_congested = 1;
}
static void nfs_end_page_writeback(struct nfs_page *req)
@@ -433,7 +433,7 @@ static void nfs_end_page_writeback(struct nfs_page *req)
end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ nfss->write_congested = 0;
}
/*
@@ -672,6 +672,10 @@ static int nfs_writepage_locked(struct page *page,
struct inode *inode = page_file_mapping(page)->host;
int err;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ NFS_SERVER(inode)->write_congested)
+ return AOP_WRITEPAGE_ACTIVATE;
+
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
false, &nfs_async_write_completion_ops);
@@ -719,6 +723,10 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int priority = 0;
int err;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ NFS_SERVER(inode)->write_congested)
+ return 0;
+
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
@@ -1893,7 +1901,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
+ nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@@ -2049,21 +2057,21 @@ out:
}
EXPORT_SYMBOL_GPL(nfs_wb_all);
-int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
{
struct nfs_page *req;
int ret = 0;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
/* blocking call to cancel all requests and join to a single (head)
* request */
- req = nfs_lock_and_join_requests(page);
+ req = nfs_lock_and_join_requests(&folio->page);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
} else if (req) {
- /* all requests from this page have been cancelled by
+ /* all requests from this folio have been cancelled by
* nfs_lock_and_join_requests, so just remove the head
* request from the inode / page_private pointer and
* release it */
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 3d1d17256a91..f6a2fd3015e7 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -35,18 +35,9 @@ config NFSD_V2_ACL
bool
depends on NFSD
-config NFSD_V3
- bool "NFS server support for NFS version 3"
- depends on NFSD
- help
- This option enables support in your system's NFS server for
- version 3 of the NFS protocol (RFC 1813).
-
- If unsure, say Y.
-
config NFSD_V3_ACL
bool "NFS server support for the NFSv3 ACL protocol extension"
- depends on NFSD_V3
+ depends on NFSD
select NFSD_V2_ACL
help
Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
@@ -70,7 +61,6 @@ config NFSD_V3_ACL
config NFSD_V4
bool "NFS server support for NFS version 4"
depends on NFSD && PROC_FS
- select NFSD_V3
select FS_POSIX_ACL
select SUNRPC_GSS
select CRYPTO
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 3f0983e93a99..805c06d5f1b4 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -12,9 +12,8 @@ nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
export.o auth.o lockd.o nfscache.o nfsxdr.o \
- stats.o filecache.o
+ stats.o filecache.o nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
-nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e5c0982a381d..b6d01d51a746 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -4,7 +4,6 @@
*/
#include <linux/exportfs.h>
#include <linux/iomap.h>
-#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/pr.h>
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 8bc807c5fea4..c08882f5867b 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -7,6 +7,7 @@
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/list_lru.h>
#include <linux/fsnotify_backend.h>
@@ -632,7 +633,7 @@ nfsd_file_cache_init(void)
if (!nfsd_filecache_wq)
goto out;
- nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+ nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
if (!nfsd_file_hashtbl) {
pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
@@ -700,7 +701,7 @@ out_err:
nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- kfree(nfsd_file_hashtbl);
+ kvfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
@@ -811,7 +812,7 @@ nfsd_file_cache_shutdown(void)
fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- kfree(nfsd_file_hashtbl);
+ kvfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index 2e2f1d5e9f62..070f90ed09b6 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -117,7 +117,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
da->netaddr.addr_len =
snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
- "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
+ "%s.%d.%d", addr, port >> 8, port & 0xff);
da->tightly_coupled = false;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8ef53f6726ec..936eebd4c56d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
unsigned int len;
int v;
- argp->count = min_t(u32, argp->count, max_blocksize);
-
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, max_blocksize);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
@@ -199,6 +203,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
(unsigned long long) argp->offset,
argp->stable? " stable" : "");
+ resp->status = nfserr_fbig;
+ if (argp->offset > (u64)OFFSET_MAX ||
+ argp->offset + argp->len > (u64)OFFSET_MAX)
+ return rpc_success;
+
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
@@ -651,15 +660,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX) {
- resp->status = nfserr_inval;
- goto out;
- }
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
argp->count, resp->verf);
-out:
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7c45ba4db61b..0293b8d65f10 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (xdr_stream_decode_u64(xdr, &newsize) < 0)
return false;
iap->ia_valid |= ATTR_SIZE;
- iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
+ iap->ia_size = newsize;
}
if (xdr_stream_decode_u32(xdr, &set_it) < 0)
return false;
@@ -1060,7 +1060,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
return false;
/* cookie */
resp->cookie_offset = dirlist->len;
- if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0)
+ if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0)
return false;
return true;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 6d1b5bb051c5..2c05692a9abf 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -422,7 +422,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
if (!new)
return nfserr_jukebox;
- memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
+ memcpy(&new->lo_seg, seg, sizeof(new->lo_seg));
new->lo_state = ls;
spin_lock(&fp->fi_lock);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ed1ee25647be..b207c76a873f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
read->rd_nf = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
@@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
unsigned long cnt;
int nvecs;
- if (write->wr_offset >= OFFSET_MAX)
- return nfserr_inval;
+ if (write->wr_offset > (u64)OFFSET_MAX ||
+ write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
+ return nfserr_fbig;
cnt = write->wr_buflen;
trace_nfsd_write_start(rqstp, &cstate->current_fh,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 72900b89cf84..234e852fcdfa 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4130,8 +4130,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfserr_clid_inuse;
if (client_has_state(old)
&& !same_creds(&unconf->cl_cred,
- &old->cl_cred))
+ &old->cl_cred)) {
+ old = NULL;
goto out;
+ }
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
@@ -4709,6 +4711,14 @@ nfsd_break_deleg_cb(struct file_lock *fl)
return ret;
}
+/**
+ * nfsd_breaker_owns_lease - Check if lease conflict was resolved
+ * @fl: Lock state to check
+ *
+ * Return values:
+ * %true: Lease conflict was resolved
+ * %false: Lease conflict was not resolved.
+ */
static bool nfsd_breaker_owns_lease(struct file_lock *fl)
{
struct nfs4_delegation *dl = fl->fl_owner;
@@ -4716,11 +4726,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
struct nfs4_client *clp;
if (!i_am_nfsd())
- return NULL;
+ return false;
rqst = kthread_data(current);
/* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
- return NULL;
+ return false;
clp = *(rqst->rq_lease_breaker);
return dl->dl_stid.sc_client == clp;
}
@@ -6524,7 +6534,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
}
static fl_owner_t
-nfsd4_fl_get_owner(fl_owner_t owner)
+nfsd4_lm_get_owner(fl_owner_t owner)
{
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
@@ -6533,7 +6543,7 @@ nfsd4_fl_get_owner(fl_owner_t owner)
}
static void
-nfsd4_fl_put_owner(fl_owner_t owner)
+nfsd4_lm_put_owner(fl_owner_t owner)
{
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
@@ -6568,8 +6578,8 @@ nfsd4_lm_notify(struct file_lock *fl)
static const struct lock_manager_operations nfsd_posix_mng_ops = {
.lm_notify = nfsd4_lm_notify,
- .lm_get_owner = nfsd4_fl_get_owner,
- .lm_put_owner = nfsd4_fl_put_owner,
+ .lm_get_owner = nfsd4_lm_get_owner,
+ .lm_put_owner = nfsd4_lm_put_owner,
};
static inline void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 899de438e529..da92e7d2ab6a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2854,6 +2854,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
+ if (!(stat.result_mask & STATX_BTIME))
+ /* underlying FS does not offer btime so we can't share it */
+ bmval1 &= ~FATTR4_WORD1_TIME_CREATE;
if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
@@ -3254,6 +3257,13 @@ out_acl:
p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
*p++ = cpu_to_be32(stat.mtime.tv_nsec);
}
+ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
+ goto out_resource;
+ p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
+ *p++ = cpu_to_be32(stat.btime.tv_nsec);
+ }
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
struct kstat parent_stat;
u64 ino = stat.ino;
@@ -3495,7 +3505,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
p = xdr_reserve_space(xdr, 3*4 + namlen);
if (!p)
goto fail;
- p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
+ p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */
p = xdr_encode_array(p, name, namlen); /* name length & name */
nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
@@ -3986,10 +3996,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4834,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
count = maxcount;
eof = read->rd_offset >= i_size_read(file_inode(file));
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a4a69ab6ab28..0b3f12aa37ff 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,12 +84,6 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
-static u32
-nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
-{
- return hash_32((__force u32)xid, nn->maskbits);
-}
-
static struct svc_cacherep *
nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
struct nfsd_net *nn)
@@ -241,6 +235,14 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
list_move_tail(&rp->c_lru, &b->lru_head);
}
+static noinline struct nfsd_drc_bucket *
+nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
+{
+ unsigned int hash = hash_32((__force u32)xid, nn->maskbits);
+
+ return &nn->drc_hashtbl[hash];
+}
+
static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
unsigned int max)
{
@@ -419,12 +421,10 @@ out:
*/
int nfsd_cache_lookup(struct svc_rqst *rqstp)
{
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_net *nn;
struct svc_cacherep *rp, *found;
- __be32 xid = rqstp->rq_xid;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid, nn);
- struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
+ struct nfsd_drc_bucket *b;
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
@@ -440,17 +440,16 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
+ nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp)
goto out;
+ b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
spin_lock(&b->cache_lock);
found = nfsd_cache_insert(b, rp, nn);
- if (found != rp) {
- nfsd_reply_cache_free_locked(NULL, rp, nn);
- rp = found;
+ if (found != rp)
goto found_entry;
- }
nfsd_stats_rc_misses_inc();
rqstp->rq_cacherep = rp;
@@ -468,8 +467,10 @@ out:
found_entry:
/* We found a matching entry which is either in progress or done. */
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
nfsd_stats_rc_hits_inc();
rtn = RC_DROPIT;
+ rp = found;
/* Request being processed */
if (rp->c_state == RC_INPROG)
@@ -528,7 +529,6 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
- u32 hash;
struct nfsd_drc_bucket *b;
int len;
size_t bufsize = 0;
@@ -536,8 +536,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
- b = &nn->drc_hashtbl[hash];
+ b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn);
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b9f27fbcd768..16920e4512bd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -772,13 +772,13 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (err != 0)
return err;
- err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
+ err = svc_xprt_create(nn->nfsd_serv, transport, net,
+ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0)
goto out_err;
- err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
+ err = svc_xprt_create(nn->nfsd_serv, transport, net,
+ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
@@ -790,7 +790,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
out_close:
xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
if (xprt != NULL) {
- svc_close_xprt(xprt);
+ svc_xprt_close(xprt);
svc_xprt_put(xprt);
}
out_err:
@@ -1247,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
clear_ncl(d_inode(dentry));
dget(dentry);
ret = simple_unlink(dir, dentry);
- d_delete(dentry);
+ d_drop(dentry);
+ fsnotify_unlink(dir, dentry);
dput(dentry);
WARN_ON_ONCE(ret);
}
@@ -1338,8 +1339,8 @@ void nfsd_client_rmdir(struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
WARN_ON_ONCE(ret);
+ d_drop(dentry);
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
inode_unlock(dir);
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3e5008b475ff..4fc1fd639527 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -364,7 +364,7 @@ void nfsd_lockd_shutdown(void);
| FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
| FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
| FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
- | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
+ | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \
| FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
#define NFSD4_SUPPORTED_ATTRS_WORD2 0
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 145208bcb9bd..c29baa03dfaf 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -611,8 +611,6 @@ out_negative:
return nfserr_serverfault;
}
-#ifdef CONFIG_NFSD_V3
-
/**
* fh_fill_pre_attrs - Fill in pre-op attributes
* @fhp: file handle to be updated
@@ -673,8 +671,6 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
}
-#endif /* CONFIG_NFSD_V3 */
-
/*
* Release a file handle.
*/
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 434930d8a946..fb9d358a267e 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -90,7 +90,6 @@ typedef struct svc_fh {
* operation
*/
int fh_flags; /* FH flags */
-#ifdef CONFIG_NFSD_V3
bool fh_post_saved; /* post-op attrs saved */
bool fh_pre_saved; /* pre-op attrs saved */
@@ -107,7 +106,6 @@ typedef struct svc_fh {
/* Post-op attributes saved in fh_unlock */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
} svc_fh;
#define NFSD4_FH_FOREIGN (1<<0)
#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
@@ -283,8 +281,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
}
#endif
-#ifdef CONFIG_NFSD_V3
-
/**
* fh_clear_pre_post_attrs - Reset pre/post attributes
* @fhp: file handle to be updated
@@ -327,22 +323,6 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
extern void fh_fill_pre_attrs(struct svc_fh *fhp);
extern void fh_fill_post_attrs(struct svc_fh *fhp);
-#else /* !CONFIG_NFSD_V3 */
-
-static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
-{
-}
-
-static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
-{
-}
-
-static inline void fh_fill_post_attrs(struct svc_fh *fhp)
-{
-}
-
-#endif /* !CONFIG_NFSD_V3 */
-
/*
* Lock a file handle/inode
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 18b8eb43a19b..fcdab8a8a41f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
unsigned long cnt = argp->len;
unsigned int nvecs;
- dprintk("nfsd: WRITE %s %d bytes at %d\n",
+ dprintk("nfsd: WRITE %s %u bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b8c682b62d29..4bb5baa17040 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -117,9 +117,7 @@ static struct svc_stat nfsd_acl_svcstats = {
static const struct svc_version *nfsd_version[] = {
[2] = &nfsd_version2,
-#if defined(CONFIG_NFSD_V3)
[3] = &nfsd_version3,
-#endif
#if defined(CONFIG_NFSD_V4)
[4] = &nfsd_version4,
#endif
@@ -293,13 +291,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
if (!list_empty(&nn->nfsd_serv->sv_permsocks))
return 0;
- error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS, cred);
+ error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
- error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS, cred);
+ error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
@@ -612,13 +610,6 @@ static int nfsd_get_default_max_blksize(void)
return ret;
}
-static const struct svc_serv_ops nfsd_thread_sv_ops = {
- .svo_shutdown = nfsd_last_thread,
- .svo_function = nfsd,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_module = THIS_MODULE,
-};
-
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -657,8 +648,7 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- &nfsd_thread_sv_ops);
+ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
if (serv == NULL)
return -ENOMEM;
@@ -724,7 +714,8 @@ void nfsd_put(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
- svc_shutdown_net(nn->nfsd_serv, net);
+ svc_xprt_destroy_all(nn->nfsd_serv, net);
+ nfsd_last_thread(nn->nfsd_serv, net);
svc_destroy(&nn->nfsd_serv->sv_refcnt);
spin_lock(&nfsd_notifier_lock);
nn->nfsd_serv = NULL;
@@ -1019,8 +1010,6 @@ out:
msleep(20);
}
- /* Release module */
- module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c4cf56327843..242fa123e0e9 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -13,22 +13,6 @@
#include "export.h"
#include "nfsfh.h"
-#define NFSD_TRACE_PROC_ARG_FIELDS \
- __field(unsigned int, netns_ino) \
- __field(u32, xid) \
- __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
- __array(unsigned char, client, sizeof(struct sockaddr_in6))
-
-#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \
- do { \
- __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
- __entry->xid = be32_to_cpu(rqstp->rq_xid); \
- memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
- rqstp->rq_xprt->xpt_locallen); \
- memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
- rqstp->rq_xprt->xpt_remotelen); \
- } while (0);
-
#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
__field(u32, xid) \
@@ -53,16 +37,22 @@ DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
),
TP_ARGS(rqstp),
TP_STRUCT__entry(
- NFSD_TRACE_PROC_ARG_FIELDS
-
+ __field(unsigned int, netns_ino)
+ __field(u32, xid)
__field(u32, vers)
__field(u32, proc)
+ __sockaddr(server, rqstp->rq_xprt->xpt_locallen)
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
),
TP_fast_assign(
- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+ const struct svc_xprt *xprt = rqstp->rq_xprt;
+ __entry->netns_ino = xprt->xpt_net->ns.inum;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->vers = rqstp->rq_vers;
__entry->proc = rqstp->rq_proc;
+ __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen);
+ __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen);
),
TP_printk("xid=0x%08x vers=%u proc=%u",
__entry->xid, __entry->vers, __entry->proc
@@ -306,14 +296,14 @@ TRACE_EVENT(nfsd_export_update,
DECLARE_EVENT_CLASS(nfsd_io_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
- loff_t offset,
- unsigned long len),
+ u64 offset,
+ u32 len),
TP_ARGS(rqstp, fhp, offset, len),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, fh_hash)
- __field(loff_t, offset)
- __field(unsigned long, len)
+ __field(u64, offset)
+ __field(u32, len)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqstp->rq_xid);
@@ -321,7 +311,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
__entry->offset = offset;
__entry->len = len;
),
- TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u",
__entry->xid, __entry->fh_hash,
__entry->offset, __entry->len)
)
@@ -330,8 +320,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
DEFINE_EVENT(nfsd_io_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \
- loff_t offset, \
- unsigned long len), \
+ u64 offset, \
+ u32 len), \
TP_ARGS(rqstp, fhp, offset, len))
DEFINE_NFSD_IO_EVENT(read_start);
@@ -613,20 +603,21 @@ TRACE_EVENT(nfsd_clid_cred_mismatch,
__field(u32, cl_id)
__field(unsigned long, cl_flavor)
__field(unsigned long, new_flavor)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->cl_flavor = clp->cl_cred.cr_flavor;
__entry->new_flavor = rqstp->rq_cred.cr_flavor;
- memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
),
TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc",
__entry->cl_boot, __entry->cl_id,
show_nfsd_authflavor(__entry->cl_flavor),
- show_nfsd_authflavor(__entry->new_flavor), __entry->addr
+ show_nfsd_authflavor(__entry->new_flavor),
+ __get_sockaddr(addr)
)
)
@@ -642,7 +633,7 @@ TRACE_EVENT(nfsd_clid_verf_mismatch,
__field(u32, cl_id)
__array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE)
__array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -651,14 +642,14 @@ TRACE_EVENT(nfsd_clid_verf_mismatch,
NFS4_VERIFIER_SIZE);
memcpy(__entry->new_verifier, (void *)verf,
NFS4_VERIFIER_SIZE);
- memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
),
TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc",
__entry->cl_boot, __entry->cl_id,
__print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE),
__print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE),
- __entry->addr
+ __get_sockaddr(addr)
)
);
@@ -908,18 +899,17 @@ TRACE_EVENT(nfsd_cb_args,
__field(u32, cl_id)
__field(u32, prog)
__field(u32, ident)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, conn->cb_addrlen)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->prog = conn->cb_prog;
__entry->ident = conn->cb_ident;
- memcpy(__entry->addr, &conn->cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &conn->cb_addr, conn->cb_addrlen);
),
TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->prog, __entry->ident)
);
@@ -951,17 +941,17 @@ DECLARE_EVENT_CLASS(nfsd_cb_class,
__field(unsigned long, state)
__field(u32, cl_boot)
__field(u32, cl_id)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
__entry->state = clp->cl_cb_state;
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x state=%s",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
show_cb_state(__entry->state))
);
@@ -1001,7 +991,7 @@ TRACE_EVENT(nfsd_cb_setup,
__field(u32, cl_boot)
__field(u32, cl_id)
__field(unsigned long, authflavor)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
__array(unsigned char, netid, 8)
),
TP_fast_assign(
@@ -1009,11 +999,11 @@ TRACE_EVENT(nfsd_cb_setup,
__entry->cl_id = clp->cl_clientid.cl_id;
strlcpy(__entry->netid, netid, sizeof(__entry->netid));
__entry->authflavor = authflavor;
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->netid, show_nfsd_authflavor(__entry->authflavor))
);
@@ -1027,30 +1017,32 @@ TRACE_EVENT(nfsd_cb_setup_err,
__field(long, error)
__field(u32, cl_boot)
__field(u32, cl_id)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
__entry->error = error;
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x error=%ld",
- __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error)
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->error)
);
-TRACE_EVENT(nfsd_cb_recall,
+TRACE_EVENT_CONDITION(nfsd_cb_recall,
TP_PROTO(
const struct nfs4_stid *stid
),
TP_ARGS(stid),
+ TP_CONDITION(stid->sc_client),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, si_id)
__field(u32, si_generation)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, stid->sc_client->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
const stateid_t *stp = &stid->sc_stateid;
@@ -1060,14 +1052,11 @@ TRACE_EVENT(nfsd_cb_recall,
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
- if (clp)
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
- else
- memset(__entry->addr, 0, sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation)
);
@@ -1081,7 +1070,7 @@ TRACE_EVENT(nfsd_cb_notify_lock,
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, fh_hash)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, lo->lo_owner.so_client->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
const struct nfs4_client *clp = lo->lo_owner.so_client;
@@ -1089,11 +1078,11 @@ TRACE_EVENT(nfsd_cb_notify_lock,
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh);
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->fh_hash)
);
@@ -1114,7 +1103,7 @@ TRACE_EVENT(nfsd_cb_offload,
__field(u32, fh_hash)
__field(int, status)
__field(u64, count)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
TP_fast_assign(
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
@@ -1124,11 +1113,11 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash = knfsd_fh_hash(fh);
__entry->status = be32_to_cpu(status);
__entry->count = count;
- memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
- sizeof(struct sockaddr_in6));
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d",
- __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation,
__entry->fh_hash, __entry->count, __entry->status)
);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 99c2b9dfbb10..c22ad0532e8e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -26,15 +26,14 @@
#include <linux/xattr.h>
#include <linux/jhash.h>
#include <linux/ima.h>
+#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/security.h>
-#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
-#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
#include "../internal.h"
@@ -435,6 +434,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
.ia_size = iap->ia_size,
};
+ host_err = -EFBIG;
+ if (iap->ia_size < 0)
+ goto out_unlock;
+
host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
if (host_err)
goto out_unlock;
@@ -604,7 +607,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif /* defined(CONFIG_NFSD_V4) */
-#ifdef CONFIG_NFSD_V3
/*
* Check server access rights to a file system object
*/
@@ -716,7 +718,6 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
out:
return error;
}
-#endif /* CONFIG_NFSD_V3 */
int nfsd_open_break_lease(struct inode *inode, int access)
{
@@ -1109,43 +1110,61 @@ out:
return err;
}
-#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ u32 count, __be32 *verf)
{
+ u64 maxbytes;
+ loff_t start, end;
struct nfsd_net *nn;
struct nfsd_file *nf;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
-
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
- }
+ __be32 err;
err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
+ }
+
nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
switch (err2) {
case 0:
nfsd_copy_write_verifier(verf, nn);
@@ -1167,7 +1186,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
out:
return err;
}
-#endif /* CONFIG_NFSD_V3 */
static __be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
@@ -1357,8 +1375,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
rdev, resfhp);
}
-#ifdef CONFIG_NFSD_V3
-
/*
* NFSv3 and NFSv4 version of nfsd_create
*/
@@ -1524,7 +1540,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserrno(host_err);
goto out;
}
-#endif /* CONFIG_NFSD_V3 */
/*
* Read a symlink. On entry, *lenp must contain the maximum path length that
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9f56dcb22ff7..ccb87b2864f6 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -68,15 +68,13 @@ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long, __be32 *verf);
-#endif /* CONFIG_NFSD_V3 */
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
#ifdef CONFIG_NFSD_V4
__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *name, void **bufp, int *lenp);
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 528fb299430e..852f71580bd0 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -32,7 +32,7 @@ struct nfsd_readargs {
struct nfsd_writeargs {
svc_fh fh;
__u32 offset;
- int len;
+ __u32 len;
struct xdr_buf payload;
};
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index e3d807d5b83a..476a4a649f38 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -199,23 +199,22 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
return 0;
}
-static int nilfs_set_page_dirty(struct page *page)
+static bool nilfs_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- int ret = __set_page_dirty_nobuffers(page);
+ struct inode *inode = mapping->host;
+ struct buffer_head *head;
+ unsigned int nr_dirty = 0;
+ bool ret = filemap_dirty_folio(mapping, folio);
- if (page_has_buffers(page)) {
- unsigned int nr_dirty = 0;
- struct buffer_head *bh, *head;
+ /*
+ * The page may not be locked, eg if called from try_to_unmap_one()
+ */
+ spin_lock(&mapping->private_lock);
+ head = folio_buffers(folio);
+ if (head) {
+ struct buffer_head *bh = head;
- /*
- * This page is locked by callers, and no other thread
- * concurrently marks its buffers dirty since they are
- * only dirtied through routines in fs/buffer.c in
- * which call sites of mark_buffer_dirty are protected
- * by page lock.
- */
- bh = head = page_buffers(page);
do {
/* Do not mark hole blocks dirty */
if (buffer_dirty(bh) || !buffer_mapped(bh))
@@ -224,14 +223,13 @@ static int nilfs_set_page_dirty(struct page *page)
set_buffer_dirty(bh);
nr_dirty++;
} while (bh = bh->b_this_page, bh != head);
-
- if (nr_dirty)
- nilfs_set_file_dirty(inode, nr_dirty);
} else if (ret) {
- unsigned int nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
+ nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
+ }
+ spin_unlock(&mapping->private_lock);
+ if (nr_dirty)
nilfs_set_file_dirty(inode, nr_dirty);
- }
return ret;
}
@@ -299,12 +297,12 @@ const struct address_space_operations nilfs_aops = {
.writepage = nilfs_writepage,
.readpage = nilfs_readpage,
.writepages = nilfs_writepages,
- .set_page_dirty = nilfs_set_page_dirty,
+ .dirty_folio = nilfs_dirty_folio,
.readahead = nilfs_readahead,
.write_begin = nilfs_write_begin,
.write_end = nilfs_write_end,
/* .releasepage = nilfs_releasepage, */
- .invalidatepage = block_invalidatepage,
+ .invalidate_folio = block_invalidate_folio,
.direct_IO = nilfs_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate,
};
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 4b3d33cf0041..78db33decd72 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -434,7 +434,8 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
static const struct address_space_operations def_mdt_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.writepage = nilfs_mdt_write_page,
};
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 43287b0d3e9b..1362ccb64ec7 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -337,26 +337,12 @@ static void nilfs_end_bio_write(struct bio *bio)
}
static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi, int mode,
- int mode_flags)
+ struct nilfs_write_info *wi)
{
struct bio *bio = wi->bio;
- int err;
-
- if (segbuf->sb_nbio > 0 &&
- bdi_write_congested(segbuf->sb_super->s_bdi)) {
- wait_for_completion(&segbuf->sb_bio_event);
- segbuf->sb_nbio--;
- if (unlikely(atomic_read(&segbuf->sb_err))) {
- bio_put(bio);
- err = -EIO;
- goto failed;
- }
- }
bio->bi_end_io = nilfs_end_bio_write;
bio->bi_private = segbuf;
- bio_set_op_attrs(bio, mode, mode_flags);
submit_bio(bio);
segbuf->sb_nbio++;
@@ -365,33 +351,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
wi->start = wi->end;
return 0;
-
- failed:
- wi->bio = NULL;
- return err;
-}
-
-/**
- * nilfs_alloc_seg_bio - allocate a new bio for writing log
- * @nilfs: nilfs object
- * @start: start block number of the bio
- * @nr_vecs: request size of page vector.
- *
- * Return Value: On success, pointer to the struct bio is returned.
- * On error, NULL is returned.
- */
-static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
- int nr_vecs)
-{
- struct bio *bio;
-
- bio = bio_alloc(GFP_NOIO, nr_vecs);
- if (likely(bio)) {
- bio_set_dev(bio, nilfs->ns_bdev);
- bio->bi_iter.bi_sector =
- start << (nilfs->ns_blocksize_bits - 9);
- }
- return bio;
}
static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
@@ -407,17 +366,17 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
struct nilfs_write_info *wi,
- struct buffer_head *bh, int mode)
+ struct buffer_head *bh)
{
int len, err;
BUG_ON(wi->nr_vecs <= 0);
repeat:
if (!wi->bio) {
- wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
- wi->nr_vecs);
- if (unlikely(!wi->bio))
- return -ENOMEM;
+ wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs,
+ REQ_OP_WRITE, GFP_NOIO);
+ wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) <<
+ (wi->nilfs->ns_blocksize_bits - 9);
}
len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -426,7 +385,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
return 0;
}
/* bio is FULL */
- err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0);
+ err = nilfs_segbuf_submit_bio(segbuf, wi);
/* never submit current bh */
if (likely(!err))
goto repeat;
@@ -456,13 +415,13 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
nilfs_segbuf_prepare_write(segbuf, &wi);
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
if (unlikely(res))
goto failed_bio;
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
if (unlikely(res))
goto failed_bio;
}
@@ -472,8 +431,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* Last BIO is always sent through the following
* submission.
*/
- res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE,
- REQ_SYNC);
+ wi.bio->bi_opf |= REQ_SYNC;
+ res = nilfs_segbuf_submit_bio(segbuf, &wi);
}
failed_bio:
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 63e5fa74016c..3e05c98631ec 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -151,7 +151,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
{
struct nilfs_inode_info *ii;
- ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
+ ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS);
if (!ii)
return NULL;
ii->i_bh = NULL;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 73b1615f9d96..2ff6bd85ba8f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -158,7 +158,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
struct fanotify_event *event)
{
size_t event_len = FAN_EVENT_METADATA_LEN;
- struct fanotify_info *info;
int fh_len;
int dot_len = 0;
@@ -168,8 +167,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
if (fanotify_is_error_event(event->mask))
event_len += FANOTIFY_ERROR_INFO_LEN;
- info = fanotify_event_info(event);
-
if (fanotify_event_has_any_dir_fh(event)) {
event_len += fanotify_dir_name_info_len(event);
} else if ((info_mode & FAN_REPORT_NAME) &&
@@ -704,9 +701,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (fanotify_is_perm_event(event->mask))
FANOTIFY_PERM(event)->fd = fd;
- if (f)
- fd_install(fd, f);
-
if (info_mode) {
ret = copy_info_records_to_user(event, info, info_mode, pidfd,
buf, count);
@@ -714,6 +708,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
goto out_close_fd;
}
+ if (f)
+ fd_install(fd, f);
+
return metadata.event_len;
out_close_fd:
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index bb0a43860ad2..d154dcfe06af 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -593,12 +593,12 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
iblock = initialized_size >> blocksize_bits;
/*
- * Be very careful. We have no exclusion from __set_page_dirty_buffers
+ * Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
*
- * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
+ * Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
*/
@@ -653,7 +653,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
// Update initialized size in the attribute and
// in the inode.
// Again, for each page do:
- // __set_page_dirty_buffers();
+ // block_dirty_folio();
// put_page()
// We don't need to wait on the writes.
// Update iblock.
@@ -1350,12 +1350,13 @@ retry_writepage:
/* Is the page fully outside i_size? (truncate in progress) */
if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
PAGE_SHIFT)) {
+ struct folio *folio = page_folio(page);
/*
* The page may have dirty, unmapped buffers. Make them
* freeable here, so the page does not leak.
*/
- block_invalidatepage(page, 0, PAGE_SIZE);
- unlock_page(page);
+ block_invalidate_folio(folio, 0, folio_size(folio));
+ folio_unlock(folio);
ntfs_debug("Write outside i_size - truncated?");
return 0;
}
@@ -1653,7 +1654,7 @@ const struct address_space_operations ntfs_normal_aops = {
.readpage = ntfs_readpage,
#ifdef NTFS_RW
.writepage = ntfs_writepage,
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
#endif /* NTFS_RW */
.bmap = ntfs_bmap,
.migratepage = buffer_migrate_page,
@@ -1668,7 +1669,7 @@ const struct address_space_operations ntfs_compressed_aops = {
.readpage = ntfs_readpage,
#ifdef NTFS_RW
.writepage = ntfs_writepage,
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
#endif /* NTFS_RW */
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
@@ -1683,9 +1684,7 @@ const struct address_space_operations ntfs_mst_aops = {
.readpage = ntfs_readpage, /* Fill page with data. */
#ifdef NTFS_RW
.writepage = ntfs_writepage, /* Write dirty page to disk. */
- .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
- without touching the buffers
- belonging to the page. */
+ .dirty_folio = filemap_dirty_folio,
#endif /* NTFS_RW */
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
@@ -1747,7 +1746,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
set_buffer_dirty(bh);
} while ((bh = bh->b_this_page) != head);
spin_unlock(&mapping->private_lock);
- __set_page_dirty_nobuffers(page);
+ block_dirty_folio(mapping, page_folio(page));
if (unlikely(buffers_to_free)) {
do {
bh = buffers_to_free->b_this_page;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4474adb393ca..efe0602b4e51 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -310,7 +310,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
ntfs_inode *ni;
ntfs_debug("Entering.");
- ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS);
+ ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
if (likely(ni != NULL)) {
ni->state = 0;
return VFS_I(ni);
@@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi)
}
/* Now allocate memory for the attribute list. */
ni->attr_list_size = (u32)ntfs_attr_size(a);
+ if (!ni->attr_list_size) {
+ ntfs_error(sb, "Attr_list_size is zero");
+ goto put_err_out;
+ }
ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
if (!ni->attr_list) {
ntfs_error(sb, "Not enough memory to allocate buffer "
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 4de9acb16968..3de5700a9b83 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1443,17 +1443,6 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
return err;
}
-static inline struct bio *ntfs_alloc_bio(u32 nr_vecs)
-{
- struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
-
- if (!bio && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
- }
- return bio;
-}
-
/*
* ntfs_bio_pages - Read/write pages from/to disk.
*/
@@ -1496,19 +1485,13 @@ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
lbo = ((u64)lcn << cluster_bits) + off;
len = ((u64)clen << cluster_bits) - off;
new_bio:
- new = ntfs_alloc_bio(nr_pages - page_idx);
- if (!new) {
- err = -ENOMEM;
- goto out;
- }
+ new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS);
if (bio) {
bio_chain(bio, new);
submit_bio(bio);
}
bio = new;
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = lbo >> 9;
- bio->bi_opf = op;
while (len) {
off = vbo & (PAGE_SIZE - 1);
@@ -1599,18 +1582,12 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
lbo = (u64)lcn << cluster_bits;
len = (u64)clen << cluster_bits;
new_bio:
- new = ntfs_alloc_bio(BIO_MAX_VECS);
- if (!new) {
- err = -ENOMEM;
- break;
- }
+ new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS);
if (bio) {
bio_chain(bio, new);
submit_bio(bio);
}
bio = new;
- bio_set_dev(bio, bdev);
- bio->bi_opf = REQ_OP_WRITE;
bio->bi_iter.bi_sector = lbo >> 9;
for (;;) {
@@ -1626,11 +1603,10 @@ new_bio:
}
} while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
- if (bio) {
- if (!err)
- err = submit_bio_wait(bio);
- bio_put(bio);
- }
+ if (!err)
+ err = submit_bio_wait(bio);
+ bio_put(bio);
+
blk_finish_plug(&plug);
out:
unlock_page(fill);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index a87ab3ad3cd3..9eab11e3b034 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -1950,7 +1950,7 @@ const struct address_space_operations ntfs_aops = {
.write_end = ntfs_write_end,
.direct_IO = ntfs_direct_IO,
.bmap = ntfs_bmap,
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
};
const struct address_space_operations ntfs_aops_cmpr = {
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 29813200c7af..278dcf502410 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -399,7 +399,7 @@ static struct kmem_cache *ntfs_inode_cachep;
static struct inode *ntfs_alloc_inode(struct super_block *sb)
{
- struct ntfs_inode *ni = kmem_cache_alloc(ntfs_inode_cachep, GFP_NOFS);
+ struct ntfs_inode *ni = alloc_inode_sb(sb, ntfs_inode_cachep, GFP_NOFS);
if (!ni)
return NULL;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bf9357123bc5..49f41074baad 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5981,7 +5981,7 @@ bail:
return status;
}
-/* Expects you to already be holding tl_inode->i_mutex */
+/* Expects you to already be holding tl_inode->i_rwsem */
int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
{
int status;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 498da317580a..4b9af65cb61b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2311,7 +2311,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
down_write(&oi->ip_alloc_sem);
- /* Delete orphan before acquire i_mutex. */
+ /* Delete orphan before acquire i_rwsem. */
if (dwc->dw_orphaned) {
BUG_ON(dwc->dw_writer_pid != task_pid_nr(current));
@@ -2453,7 +2453,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
const struct address_space_operations ocfs2_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
.readpage = ocfs2_readpage,
.readahead = ocfs2_readahead,
.writepage = ocfs2_writepage,
@@ -2461,7 +2461,7 @@ const struct address_space_operations ocfs2_aops = {
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
.direct_IO = ocfs2_direct_IO,
- .invalidatepage = block_invalidatepage,
+ .invalidate_folio = block_invalidate_folio,
.releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a17be1618bf7..ea0e70c0fce0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -518,7 +518,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
* GFP_KERNEL that the local node can get fenced. It would be
* nicest if we could pre-allocate these bios and avoid this
* all together. */
- bio = bio_alloc(GFP_ATOMIC, 16);
+ bio = bio_alloc(reg->hr_bdev, 16, op | op_flags, GFP_ATOMIC);
if (!bio) {
mlog(ML_ERROR, "Could not alloc slots BIO!\n");
bio = ERR_PTR(-ENOMEM);
@@ -527,10 +527,8 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
/* Must put everything in 512 byte sectors for the bio... */
bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
- bio_set_dev(bio, reg->hr_bdev);
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
- bio_set_op_attrs(bio, op, op_flags);
vec_start = (cs << bits) % PAGE_SIZE;
while(cs < max_slots) {
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 625c92521416..27fee68f860a 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -689,7 +689,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
struct o2nm_node_group *ns = NULL;
struct config_group *o2hb_group = NULL, *ret = NULL;
- /* this runs under the parent dir's i_mutex; there can be only
+ /* this runs under the parent dir's i_rwsem; there can be only
* one caller in here at a time */
if (o2nm_single_cluster)
return ERR_PTR(-ENOSPC);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f2cc1ff29e6d..81c3d65d68fe 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1957,7 +1957,7 @@ bail_nolock:
}
/*
- * NOTE: this should always be called with parent dir i_mutex taken.
+ * NOTE: this should always be called with parent dir i_rwsem taken.
*/
int ocfs2_find_files_on_disk(const char *name,
int namelen,
@@ -2003,7 +2003,7 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
* Return 0 if the name does not exist
* Return -EEXIST if the directory contains the name
*
- * Callers should have i_mutex + a cluster lock on dir
+ * Callers should have i_rwsem + a cluster lock on dir
*/
int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index fa0a14f199eb..e360543ad7e7 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -280,7 +280,7 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
{
struct dlmfs_inode_private *ip;
- ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS);
+ ip = alloc_inode_sb(sb, dlmfs_inode_cache, GFP_NOFS);
if (!ip)
return NULL;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fc5f780fa235..01b7407a8893 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -270,7 +270,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
/*
* Don't use ocfs2_mark_inode_dirty() here as we don't always
- * have i_mutex to guard against concurrent changes to other
+ * have i_rwsem to guard against concurrent changes to other
* inode fields.
*/
inode->i_atime = current_time(inode);
@@ -540,15 +540,12 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret)
{
- int ret;
struct ocfs2_extent_tree et;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
- ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
- clusters_to_add, mark_unwritten,
- data_ac, meta_ac, reason_ret);
-
- return ret;
+ return ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
+ clusters_to_add, mark_unwritten,
+ data_ac, meta_ac, reason_ret);
}
static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
@@ -1068,7 +1065,7 @@ static int ocfs2_extend_file(struct inode *inode,
/*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
- * i_mutex to block other extend/truncate calls while we're
+ * i_rwsem to block other extend/truncate calls while we're
* here. We even have to hold it for sparse files because there
* might be some tail zeroing.
*/
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 6c2411c2afcf..5739dc301569 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -713,7 +713,7 @@ bail:
/*
* Serialize with orphan dir recovery. If the process doing
* recovery on this orphan dir does an iget() with the dir
- * i_mutex held, we'll deadlock here. Instead we detect this
+ * i_rwsem held, we'll deadlock here. Instead we detect this
* and exit early - recovery will wipe this inode for us.
*/
static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 5f6bacbeef6b..c4426d12a2ad 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -606,7 +606,7 @@ out:
/*
* make sure we've got at least bits_wanted contiguous bits in the
- * local alloc. You lose them when you drop i_mutex.
+ * local alloc. You lose them when you drop i_rwsem.
*
* We will add ourselves to the transaction passed in, but may start
* our own in order to shift windows.
@@ -636,7 +636,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
/*
* We must double check state and allocator bits because
- * another process may have changed them while holding i_mutex.
+ * another process may have changed them while holding i_rwsem.
*/
spin_lock(&osb->osb_lock);
if (!ocfs2_la_state_enabled(osb) ||
@@ -1029,7 +1029,7 @@ enum ocfs2_la_event {
/*
* Given an event, calculate the size of our next local alloc window.
*
- * This should always be called under i_mutex of the local alloc inode
+ * This should always be called under i_rwsem of the local alloc inode
* so that local alloc disabling doesn't race with processes trying to
* use the allocator.
*
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2c46ff6ba4ea..c75fd54b9185 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -476,7 +476,7 @@ leave:
ocfs2_free_alloc_context(meta_ac);
/*
- * We should call iput after the i_mutex of the bitmap been
+ * We should call iput after the i_rwsem of the bitmap been
* unlocked in ocfs2_free_alloc_context, or the
* ocfs2_delete_inode will mutex_lock again.
*/
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bb62cc2e0211..337527571461 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -355,7 +355,7 @@ struct ocfs2_super
struct delayed_work la_enable_wq;
/*
- * Must hold local alloc i_mutex and osb->osb_lock to change
+ * Must hold local alloc i_rwsem and osb->osb_lock to change
* local_alloc_bits. Reads can be done under either lock.
*/
unsigned int local_alloc_bits;
@@ -430,7 +430,7 @@ struct ocfs2_super
atomic_t osb_tl_disable;
/*
* How many clusters in our truncate log.
- * It must be protected by osb_tl_inode->i_mutex.
+ * It must be protected by osb_tl_inode->i_rwsem.
*/
unsigned int truncated_clusters;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index f033de733adb..273f65e0aaba 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -36,7 +36,7 @@
* should be obeyed by all the functions:
* - any write of quota structure (either to local or global file) is protected
* by dqio_sem or dquot->dq_lock.
- * - any modification of global quota file holds inode cluster lock, i_mutex,
+ * - any modification of global quota file holds inode cluster lock, i_rwsem,
* and ip_alloc_sem of the global quota file (achieved by
* ocfs2_lock_global_qf). It also has to hold qinfo_lock.
* - an allocation of new blocks for local quota file is protected by
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 85a47621e0c0..a75e2b7d67f5 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -683,28 +683,22 @@ static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
void *name,
unsigned int namelen)
{
- int ret;
-
if (!lksb->lksb_fsdlm.sb_lvbptr)
lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
sizeof(struct dlm_lksb);
- ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
- flags|DLM_LKF_NODLCKWT, name, namelen, 0,
- fsdlm_lock_ast_wrapper, lksb,
- fsdlm_blocking_ast_wrapper);
- return ret;
+ return dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
+ flags|DLM_LKF_NODLCKWT, name, namelen, 0,
+ fsdlm_lock_ast_wrapper, lksb,
+ fsdlm_blocking_ast_wrapper);
}
static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
struct ocfs2_dlm_lksb *lksb,
u32 flags)
{
- int ret;
-
- ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
- flags, &lksb->lksb_fsdlm, lksb);
- return ret;
+ return dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
+ flags, &lksb->lksb_fsdlm, lksb);
}
static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 731558a6f27d..dd77b7aaabf5 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -661,17 +661,6 @@ static struct ctl_table ocfs2_nm_table[] = {
{ }
};
-static struct ctl_table ocfs2_mod_table[] = {
- {
- .procname = "nm",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_nm_table
- },
- { }
-};
-
static struct ctl_table_header *ocfs2_table_header;
/*
@@ -682,7 +671,7 @@ static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
- ocfs2_table_header = register_sysctl("fs/ocfs2", ocfs2_mod_table);
+ ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 481017e1dac5..166c8918c825 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
- int ret = 1;
+ int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
- if (!buffer_jbd(bg_bh))
+ jh = jbd2_journal_grab_journal_head(bg_bh);
+ if (!jh)
return 1;
- jbd_lock_bh_journal_head(bg_bh);
- if (buffer_jbd(bg_bh)) {
- jh = bh2jh(bg_bh);
- spin_lock(&jh->b_state_lock);
- bg = (struct ocfs2_group_desc *) jh->b_committed_data;
- if (bg)
- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
- else
- ret = 1;
- spin_unlock(&jh->b_state_lock);
- }
- jbd_unlock_bh_journal_head(bg_bh);
+ spin_lock(&jh->b_state_lock);
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
+ if (bg)
+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ else
+ ret = 1;
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
return ret;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2772dec9dcea..477cdf94122e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -548,7 +548,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
{
struct ocfs2_inode_info *oi;
- oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS);
+ oi = alloc_inode_sb(sb, ocfs2_inode_cachep, GFP_NOFS);
if (!oi)
return NULL;
@@ -1105,17 +1105,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error;
}
- root = d_make_root(inode);
- if (!root) {
- status = -ENOMEM;
- mlog_errno(status);
- goto read_super_error;
- }
-
- sb->s_root = root;
-
- ocfs2_complete_mount_recovery(osb);
-
osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
&ocfs2_kset->kobj);
if (!osb->osb_dev_kset) {
@@ -1133,6 +1122,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error;
}
+ root = d_make_root(inode);
+ if (!root) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto read_super_error;
+ }
+
+ sb->s_root = root;
+
+ ocfs2_complete_mount_recovery(osb);
+
if (ocfs2_mount_local(osb))
snprintf(nodestr, sizeof(nodestr), "local");
else
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index dd784eb0cd7c..95d0611c5fc7 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7205,7 +7205,7 @@ out:
* Used for reflink a non-preserve-security file.
*
* It uses common api like ocfs2_xattr_set, so the caller
- * must not hold any lock expect i_mutex.
+ * must not hold any lock expect i_rwsem.
*/
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 89725b15a64b..3f297b541713 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,7 +372,8 @@ const struct inode_operations omfs_file_inops = {
};
const struct address_space_operations omfs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = omfs_readpage,
.readahead = omfs_readahead,
.writepage = omfs_writepage,
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index f825176ff4ed..f0b7f4d51a17 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -335,7 +335,7 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
{
struct op_inode_info *oi;
- oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL);
+ oi = alloc_inode_sb(sb, op_inode_cachep, GFP_KERNEL);
if (!oi)
return NULL;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index e5e3e500ed46..79c1025d18ea 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -46,7 +46,7 @@ static int orangefs_writepage_locked(struct page *page,
else
wlen = PAGE_SIZE;
}
- /* Should've been handled in orangefs_invalidatepage. */
+ /* Should've been handled in orangefs_invalidate_folio. */
WARN_ON(off == len || off + wlen > len);
bv.bv_page = page;
@@ -243,7 +243,7 @@ static int orangefs_writepages(struct address_space *mapping,
return ret;
}
-static int orangefs_launder_page(struct page *);
+static int orangefs_launder_folio(struct folio *);
static void orangefs_readahead(struct readahead_control *rac)
{
@@ -290,14 +290,15 @@ static void orangefs_readahead(struct readahead_control *rac)
static int orangefs_readpage(struct file *file, struct page *page)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
struct iov_iter iter;
struct bio_vec bv;
ssize_t ret;
loff_t off; /* offset into this page */
- if (PageDirty(page))
- orangefs_launder_page(page);
+ if (folio_test_dirty(folio))
+ orangefs_launder_folio(folio);
off = page_offset(page);
bv.bv_page = page;
@@ -330,6 +331,7 @@ static int orangefs_write_begin(struct file *file,
void **fsdata)
{
struct orangefs_write_range *wr;
+ struct folio *folio;
struct page *page;
pgoff_t index;
int ret;
@@ -341,27 +343,28 @@ static int orangefs_write_begin(struct file *file,
return -ENOMEM;
*pagep = page;
+ folio = page_folio(page);
- if (PageDirty(page) && !PagePrivate(page)) {
+ if (folio_test_dirty(folio) && !folio_test_private(folio)) {
/*
* Should be impossible. If it happens, launder the page
* since we don't know what's dirty. This will WARN in
* orangefs_writepage_locked.
*/
- ret = orangefs_launder_page(page);
+ ret = orangefs_launder_folio(folio);
if (ret)
return ret;
}
- if (PagePrivate(page)) {
+ if (folio_test_private(folio)) {
struct orangefs_write_range *wr;
- wr = (struct orangefs_write_range *)page_private(page);
+ wr = folio_get_private(folio);
if (wr->pos + wr->len == pos &&
uid_eq(wr->uid, current_fsuid()) &&
gid_eq(wr->gid, current_fsgid())) {
wr->len += len;
goto okay;
} else {
- ret = orangefs_launder_page(page);
+ ret = orangefs_launder_folio(folio);
if (ret)
return ret;
}
@@ -375,7 +378,7 @@ static int orangefs_write_begin(struct file *file,
wr->len = len;
wr->uid = current_fsuid();
wr->gid = current_fsgid();
- attach_page_private(page, wr);
+ folio_attach_private(folio, wr);
okay:
return 0;
}
@@ -415,47 +418,45 @@ static int orangefs_write_end(struct file *file, struct address_space *mapping,
return copied;
}
-static void orangefs_invalidatepage(struct page *page,
- unsigned int offset,
- unsigned int length)
+static void orangefs_invalidate_folio(struct folio *folio,
+ size_t offset, size_t length)
{
- struct orangefs_write_range *wr;
- wr = (struct orangefs_write_range *)page_private(page);
+ struct orangefs_write_range *wr = folio_get_private(folio);
if (offset == 0 && length == PAGE_SIZE) {
- kfree(detach_page_private(page));
+ kfree(folio_detach_private(folio));
return;
/* write range entirely within invalidate range (or equal) */
- } else if (page_offset(page) + offset <= wr->pos &&
- wr->pos + wr->len <= page_offset(page) + offset + length) {
- kfree(detach_page_private(page));
+ } else if (folio_pos(folio) + offset <= wr->pos &&
+ wr->pos + wr->len <= folio_pos(folio) + offset + length) {
+ kfree(folio_detach_private(folio));
/* XXX is this right? only caller in fs */
- cancel_dirty_page(page);
+ folio_cancel_dirty(folio);
return;
/* invalidate range chops off end of write range */
- } else if (wr->pos < page_offset(page) + offset &&
- wr->pos + wr->len <= page_offset(page) + offset + length &&
- page_offset(page) + offset < wr->pos + wr->len) {
+ } else if (wr->pos < folio_pos(folio) + offset &&
+ wr->pos + wr->len <= folio_pos(folio) + offset + length &&
+ folio_pos(folio) + offset < wr->pos + wr->len) {
size_t x;
- x = wr->pos + wr->len - (page_offset(page) + offset);
+ x = wr->pos + wr->len - (folio_pos(folio) + offset);
WARN_ON(x > wr->len);
wr->len -= x;
wr->uid = current_fsuid();
wr->gid = current_fsgid();
/* invalidate range chops off beginning of write range */
- } else if (page_offset(page) + offset <= wr->pos &&
- page_offset(page) + offset + length < wr->pos + wr->len &&
- wr->pos < page_offset(page) + offset + length) {
+ } else if (folio_pos(folio) + offset <= wr->pos &&
+ folio_pos(folio) + offset + length < wr->pos + wr->len &&
+ wr->pos < folio_pos(folio) + offset + length) {
size_t x;
- x = page_offset(page) + offset + length - wr->pos;
+ x = folio_pos(folio) + offset + length - wr->pos;
WARN_ON(x > wr->len);
wr->pos += x;
wr->len -= x;
wr->uid = current_fsuid();
wr->gid = current_fsgid();
/* invalidate range entirely within write range (punch hole) */
- } else if (wr->pos < page_offset(page) + offset &&
- page_offset(page) + offset + length < wr->pos + wr->len) {
+ } else if (wr->pos < folio_pos(folio) + offset &&
+ folio_pos(folio) + offset + length < wr->pos + wr->len) {
/* XXX what do we do here... should not WARN_ON */
WARN_ON(1);
/* punch hole */
@@ -467,11 +468,11 @@ static void orangefs_invalidatepage(struct page *page,
/* non-overlapping ranges */
} else {
/* WARN if they do overlap */
- if (!((page_offset(page) + offset + length <= wr->pos) ^
- (wr->pos + wr->len <= page_offset(page) + offset))) {
+ if (!((folio_pos(folio) + offset + length <= wr->pos) ^
+ (wr->pos + wr->len <= folio_pos(folio) + offset))) {
WARN_ON(1);
- printk("invalidate range offset %llu length %u\n",
- page_offset(page) + offset, length);
+ printk("invalidate range offset %llu length %zu\n",
+ folio_pos(folio) + offset, length);
printk("write range offset %llu length %zu\n",
wr->pos, wr->len);
}
@@ -483,7 +484,7 @@ static void orangefs_invalidatepage(struct page *page,
* Thus the following runs if wr was modified above.
*/
- orangefs_launder_page(page);
+ orangefs_launder_folio(folio);
}
static int orangefs_releasepage(struct page *page, gfp_t foo)
@@ -496,17 +497,17 @@ static void orangefs_freepage(struct page *page)
kfree(detach_page_private(page));
}
-static int orangefs_launder_page(struct page *page)
+static int orangefs_launder_folio(struct folio *folio)
{
int r = 0;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
};
- wait_on_page_writeback(page);
- if (clear_page_dirty_for_io(page)) {
- r = orangefs_writepage_locked(page, &wbc);
- end_page_writeback(page);
+ folio_wait_writeback(folio);
+ if (folio_clear_dirty_for_io(folio)) {
+ r = orangefs_writepage_locked(&folio->page, &wbc);
+ folio_end_writeback(folio);
}
return r;
}
@@ -633,19 +634,19 @@ static const struct address_space_operations orangefs_address_operations = {
.readahead = orangefs_readahead,
.readpage = orangefs_readpage,
.writepages = orangefs_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.write_begin = orangefs_write_begin,
.write_end = orangefs_write_end,
- .invalidatepage = orangefs_invalidatepage,
+ .invalidate_folio = orangefs_invalidate_folio,
.releasepage = orangefs_releasepage,
.freepage = orangefs_freepage,
- .launder_page = orangefs_launder_page,
+ .launder_folio = orangefs_launder_folio,
.direct_IO = orangefs_direct_IO,
};
vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
unsigned long *bitlock = &orangefs_inode->bitlock;
@@ -659,27 +660,27 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf)
goto out;
}
- lock_page(page);
- if (PageDirty(page) && !PagePrivate(page)) {
+ folio_lock(folio);
+ if (folio_test_dirty(folio) && !folio_test_private(folio)) {
/*
- * Should be impossible. If it happens, launder the page
+ * Should be impossible. If it happens, launder the folio
* since we don't know what's dirty. This will WARN in
* orangefs_writepage_locked.
*/
- if (orangefs_launder_page(page)) {
+ if (orangefs_launder_folio(folio)) {
ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
goto out;
}
}
- if (PagePrivate(page)) {
- wr = (struct orangefs_write_range *)page_private(page);
+ if (folio_test_private(folio)) {
+ wr = folio_get_private(folio);
if (uid_eq(wr->uid, current_fsuid()) &&
gid_eq(wr->gid, current_fsgid())) {
- wr->pos = page_offset(page);
+ wr->pos = page_offset(vmf->page);
wr->len = PAGE_SIZE;
goto okay;
} else {
- if (orangefs_launder_page(page)) {
+ if (orangefs_launder_folio(folio)) {
ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
goto out;
}
@@ -690,27 +691,27 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf)
ret = VM_FAULT_LOCKED|VM_FAULT_RETRY;
goto out;
}
- wr->pos = page_offset(page);
+ wr->pos = page_offset(vmf->page);
wr->len = PAGE_SIZE;
wr->uid = current_fsuid();
wr->gid = current_fsgid();
- attach_page_private(page, wr);
+ folio_attach_private(folio, wr);
okay:
file_update_time(vmf->vma->vm_file);
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
+ if (folio->mapping != inode->i_mapping) {
+ folio_unlock(folio);
ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE;
goto out;
}
/*
- * We mark the page dirty already here so that when freeze is in
+ * We mark the folio dirty already here so that when freeze is in
* progress, we are guaranteed that writeback during freezing will
- * see the dirty page and writeprotect it again.
+ * see the dirty folio and writeprotect it again.
*/
- set_page_dirty(page);
- wait_for_stable_page(page);
+ folio_mark_dirty(folio);
+ folio_wait_stable(folio);
ret = VM_FAULT_LOCKED;
out:
sb_end_pagefault(inode->i_sb);
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index d90d8addbfc2..5254256a224d 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -107,7 +107,7 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
{
struct orangefs_inode_s *orangefs_inode;
- orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL);
+ orangefs_inode = alloc_inode_sb(sb, orangefs_inode_cache, GFP_KERNEL);
if (!orangefs_inode)
return NULL;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b193d08a3dc3..e040970408d4 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -145,7 +145,7 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
if (err == -ENOTTY || err == -EINVAL)
return 0;
pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
- old, err);
+ old->dentry, err);
return err;
}
@@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
*/
if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
err = ovl_set_protattr(inode, new->dentry, &oldfa);
- if (err)
+ if (err == -EPERM)
+ pr_warn_once("copying fileattr: no xattr on upper\n");
+ else if (err)
return err;
}
@@ -167,8 +169,16 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
err = ovl_real_fileattr_get(new, &newfa);
if (err) {
+ /*
+ * Returning an error if upper doesn't support fileattr will
+ * result in a regression, so revert to the old behavior.
+ */
+ if (err == -ENOTTY || err == -EINVAL) {
+ pr_warn_once("copying fileattr: no support on upper\n");
+ return 0;
+ }
pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
- new, err);
+ new->dentry, err);
return err;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7bb0a47cb615..001cdbb8f015 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -174,7 +174,7 @@ static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
{
- struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+ struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
if (!oi)
return NULL;
diff --git a/fs/pipe.c b/fs/pipe.c
index cc28623a67b6..2667db9506e2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -253,7 +253,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
*/
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
for (;;) {
- unsigned int head = pipe->head;
+ /* Read ->head with a barrier vs post_one_notification() */
+ unsigned int head = smp_load_acquire(&pipe->head);
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
@@ -831,10 +832,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
int i;
#ifdef CONFIG_WATCH_QUEUE
- if (pipe->watch_queue) {
+ if (pipe->watch_queue)
watch_queue_clear(pipe->watch_queue);
- put_watch_queue(pipe->watch_queue);
- }
#endif
(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
@@ -844,6 +843,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
if (buf->ops)
pipe_buf_release(pipe, buf);
}
+#ifdef CONFIG_WATCH_QUEUE
+ if (pipe->watch_queue)
+ put_watch_queue(pipe->watch_queue);
+#endif
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
kfree(pipe->bufs);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index f84355c5a36d..73aeb4e6d32e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -66,7 +66,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
{
struct proc_inode *ei;
- ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, proc_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
ei->pid = NULL;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9f1077d94cde..a2873a617ae8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
+#include <linux/memremap.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 18f8c3acbb85..f46060eb91b5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
- const char *anon_name;
+ struct anon_vma_name *anon_name;
if (!mm) {
name = "[vdso]";
@@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done;
}
- anon_name = vma_anon_name(vma);
+ anon_name = anon_vma_name(vma);
if (anon_name) {
seq_pad(m, ' ');
- seq_printf(m, "[anon:%s]", anon_name);
+ seq_printf(m, "[anon:%s]", anon_name->name);
}
}
@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
}
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- bool compound, bool young, bool dirty, bool locked)
+ bool compound, bool young, bool dirty, bool locked,
+ bool migration)
{
int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
* page_count().
+ *
+ * The page_mapcount() is called to get a snapshot of the mapcount.
+ * Without holding the page lock this snapshot can be slightly wrong as
+ * we cannot always read the mapcount atomically. It is not safe to
+ * call page_mapcount() even with PTL held if the page is not mapped,
+ * especially for migration entries. Treat regular migration entries
+ * as mapcount == 1.
*/
- if (page_count(page) == 1) {
+ if ((page_count(page) == 1) || migration) {
smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
locked, true);
return;
@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool migration = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else {
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
- } else if (is_pfn_swap_entry(swpent))
+ } else if (is_pfn_swap_entry(swpent)) {
+ if (is_migration_entry(swpent))
+ migration = true;
page = pfn_swap_entry_to_page(swpent);
+ }
} else {
smaps_pte_hole_lookup(addr, walk);
return;
@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
+ locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool migration = false;
if (pmd_present(*pmd)) {
/* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
- if (is_migration_entry(entry))
+ if (is_migration_entry(entry)) {
+ migration = true;
page = pfn_swap_entry_to_page(entry);
+ }
}
if (IS_ERR_OR_NULL(page))
return;
@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
/* pass */;
else
mss->file_thp += HPAGE_PMD_SIZE;
- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
+
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
+ locked, migration);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{
u64 frame = 0, flags = 0;
struct page *page = NULL;
+ bool migration = false;
if (pte_present(pte)) {
if (pm->show_pfn)
@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP;
+ migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
}
if (page && !PageAnon(page))
flags |= PM_FILE;
- if (page && page_mapcount(page) == 1)
+ if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
spinlock_t *ptl;
pte_t *pte, *orig_pte;
int err = 0;
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bool migration = false;
+
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
u64 flags = 0, frame = 0;
@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
+ migration = is_migration_entry(entry);
page = pfn_swap_entry_to_page(entry);
}
#endif
- if (page && page_mapcount(page) == 1)
+ if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
for (; addr != end; addr += PAGE_SIZE) {
@@ -1575,7 +1597,8 @@ static const struct mm_walk_ops pagemap_ops = {
* Bits 5-54 swap offset if swapped
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
- * Bits 57-60 zero
+ * Bit 57 pte is uffd-wp write-protected
+ * Bits 58-60 zero
* Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f243cb5e6a4f..e26162f102ff 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -143,21 +143,22 @@ static void pstore_timer_kick(void)
mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms));
}
-/*
- * Should pstore_dump() wait for a concurrent pstore_dump()? If
- * not, the current pstore_dump() will report a failure to dump
- * and return.
- */
-static bool pstore_cannot_wait(enum kmsg_dump_reason reason)
+static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
{
- /* In NMI path, pstore shouldn't block regardless of reason. */
+ /*
+ * In case of NMI path, pstore shouldn't be blocked
+ * regardless of reason.
+ */
if (in_nmi())
return true;
switch (reason) {
/* In panic case, other cpus are stopped by smp_send_stop(). */
case KMSG_DUMP_PANIC:
- /* Emergency restart shouldn't be blocked. */
+ /*
+ * Emergency restart shouldn't be blocked by spinning on
+ * pstore_info::buf_lock.
+ */
case KMSG_DUMP_EMERG:
return true;
default:
@@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper,
unsigned long total = 0;
const char *why;
unsigned int part = 1;
+ unsigned long flags = 0;
int ret;
why = kmsg_dump_reason_str(reason);
- if (down_trylock(&psinfo->buf_lock)) {
- /* Failed to acquire lock: give up if we cannot wait. */
- if (pstore_cannot_wait(reason)) {
- pr_err("dump skipped in %s path: may corrupt error record\n",
- in_nmi() ? "NMI" : why);
- return;
- }
- if (down_interruptible(&psinfo->buf_lock)) {
- pr_err("could not grab semaphore?!\n");
+ if (pstore_cannot_block_path(reason)) {
+ if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) {
+ pr_err("dump skipped in %s path because of concurrent dump\n",
+ in_nmi() ? "NMI" : why);
return;
}
+ } else {
+ spin_lock_irqsave(&psinfo->buf_lock, flags);
}
kmsg_dump_rewind(&iter);
@@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
total += record.size;
part++;
}
-
- up(&psinfo->buf_lock);
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
}
static struct kmsg_dumper pstore_dumper = {
@@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi)
psi->write_user = pstore_write_user_compat;
psinfo = psi;
mutex_init(&psinfo->read_mutex);
- sema_init(&psinfo->buf_lock, 1);
+ spin_lock_init(&psinfo->buf_lock);
if (psi->flags & PSTORE_FLAGS_DMESG)
allocate_buf_for_compression();
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index fe5305028c6e..a89e33719fcf 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -263,10 +263,10 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
if (prz->corrected_bytes || prz->bad_blocks)
ret = snprintf(str, len, ""
- "\n%d Corrected bytes, %d unrecoverable blocks\n",
+ "\nECC: %d Corrected bytes, %d unrecoverable blocks\n",
prz->corrected_bytes, prz->bad_blocks);
else
- ret = snprintf(str, len, "\nNo errors detected\n");
+ ret = snprintf(str, len, "\nECC: No errors detected\n");
return ret;
}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3fb7fc819b4f..a635bb6615e9 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -338,7 +338,7 @@ static struct kmem_cache *qnx4_inode_cachep;
static struct inode *qnx4_alloc_inode(struct super_block *sb)
{
struct qnx4_inode_info *ei;
- ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, qnx4_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 61191f7bdf62..9d8e7e9788a1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -597,7 +597,7 @@ static struct kmem_cache *qnx6_inode_cachep;
static struct inode *qnx6_alloc_inode(struct super_block *sb)
{
struct qnx6_inode_info *ei;
- ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, qnx6_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 22d904bde6ab..a74aef99bd3d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -690,9 +690,14 @@ int dquot_quota_sync(struct super_block *sb, int type)
/* This is not very clever (and fast) but currently I don't know about
* any other simple way of getting quota data to disk and we must get
* them there for userspace to be visible... */
- if (sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, 1);
- sync_blockdev(sb->s_bdev);
+ if (sb->s_op->sync_fs) {
+ ret = sb->s_op->sync_fs(sb, 1);
+ if (ret)
+ return ret;
+ }
+ ret = sync_blockdev(sb->s_bdev);
+ if (ret)
+ return ret;
/*
* Now when everything is written we can discard the pagecache so
diff --git a/fs/read_write.c b/fs/read_write.c
index 0074afa7ecb3..dc5000173b80 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
return security_file_permission(file,
read_write == READ ? MAY_READ : MAY_WRITE);
}
+EXPORT_SYMBOL(rw_verify_area);
static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
@@ -1617,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
return 0;
}
-/*
- * Performs necessary checks before doing a write
- *
- * Can adjust writing position or amount of bytes to write.
- * Returns appropriate error code that caller should return or
- * zero in case that write should be allowed.
- */
-ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
+/* Like generic_write_checks(), but takes size of write instead of iter. */
+int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- loff_t count;
- int ret;
if (IS_SWAPFILE(inode))
return -ETXTBSY;
- if (!iov_iter_count(from))
+ if (!*count)
return 0;
/* FIXME: this is for backwards compatibility with 2.4 */
@@ -1644,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EINVAL;
- count = iov_iter_count(from);
- ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+ return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
+}
+EXPORT_SYMBOL(generic_write_checks_count);
+
+/*
+ * Performs necessary checks before doing a write
+ *
+ * Can adjust writing position or amount of bytes to write.
+ * Returns appropriate error code that caller should return or
+ * zero in case that write should be allowed.
+ */
+ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ loff_t count = iov_iter_count(from);
+ int ret;
+
+ ret = generic_write_checks_count(iocb, &count);
if (ret)
return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f49b72ccac4c..e4221fa85ea2 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3094,7 +3094,7 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
* decide if this buffer needs to stay around for data logging or ordered
* write purposes
*/
-static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
+static int invalidate_folio_can_drop(struct inode *inode, struct buffer_head *bh)
{
int ret = 1;
struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
@@ -3147,26 +3147,26 @@ free_jh:
return ret;
}
-/* clm -- taken from fs/buffer.c:block_invalidate_page */
-static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+/* clm -- taken from fs/buffer.c:block_invalidate_folio */
+static void reiserfs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
struct buffer_head *head, *bh, *next;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
unsigned int curr_off = 0;
unsigned int stop = offset + length;
- int partial_page = (offset || length < PAGE_SIZE);
+ int partial_page = (offset || length < folio_size(folio));
int ret = 1;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
if (!partial_page)
- ClearPageChecked(page);
+ folio_clear_checked(folio);
- if (!page_has_buffers(page))
+ head = folio_buffers(folio);
+ if (!head)
goto out;
- head = page_buffers(page);
bh = head;
do {
unsigned int next_off = curr_off + bh->b_size;
@@ -3179,7 +3179,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
* is this block fully invalidated?
*/
if (offset <= curr_off) {
- if (invalidatepage_can_drop(inode, bh))
+ if (invalidate_folio_can_drop(inode, bh))
reiserfs_unmap_buffer(bh);
else
ret = 0;
@@ -3194,21 +3194,21 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
* so real IO is not possible anymore.
*/
if (!partial_page && ret) {
- ret = try_to_release_page(page, 0);
+ ret = filemap_release_folio(folio, 0);
/* maybe should BUG_ON(!ret); - neilb */
}
out:
return;
}
-static int reiserfs_set_page_dirty(struct page *page)
+static bool reiserfs_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- if (reiserfs_file_data_log(inode)) {
- SetPageChecked(page);
- return __set_page_dirty_nobuffers(page);
+ if (reiserfs_file_data_log(mapping->host)) {
+ folio_set_checked(folio);
+ return filemap_dirty_folio(mapping, folio);
}
- return __set_page_dirty_buffers(page);
+ return block_dirty_folio(mapping, folio);
}
/*
@@ -3430,10 +3430,10 @@ const struct address_space_operations reiserfs_address_space_operations = {
.readpage = reiserfs_readpage,
.readahead = reiserfs_readahead,
.releasepage = reiserfs_releasepage,
- .invalidatepage = reiserfs_invalidatepage,
+ .invalidate_folio = reiserfs_invalidate_folio,
.write_begin = reiserfs_write_begin,
.write_end = reiserfs_write_end,
.bmap = reiserfs_aop_bmap,
.direct_IO = reiserfs_direct_IO,
- .set_page_dirty = reiserfs_set_page_dirty,
+ .dirty_folio = reiserfs_dirty_folio,
};
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a3e21160b634..b5b6f6201bed 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -858,8 +858,8 @@ loop_next:
ret = -EIO;
}
/*
- * ugly interaction with invalidatepage here.
- * reiserfs_invalidate_page will pin any buffer that has a
+ * ugly interaction with invalidate_folio here.
+ * reiserfs_invalidate_folio will pin any buffer that has a
* valid journal head from an older transaction. If someone
* else sets our buffer dirty after we write it in the first
* loop, and then someone truncates the page away, nobody
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 82e09901462e..756c4916c7ae 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -639,7 +639,7 @@ static struct kmem_cache *reiserfs_inode_cachep;
static struct inode *reiserfs_alloc_inode(struct super_block *sb)
{
struct reiserfs_inode_info *ei;
- ei = kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, reiserfs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
atomic_set(&ei->openers, 0);
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 231159682907..e112b5424cdb 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -146,11 +146,11 @@ static int generic_remap_check_len(struct inode *inode_in,
}
/* Read a page's worth of file data into the page cache. */
-static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos)
+static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos)
{
struct folio *folio;
- folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL);
+ folio = read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
if (IS_ERR(folio))
return folio;
if (!folio_test_uptodate(folio)) {
@@ -187,8 +187,8 @@ static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2)
* Compare extents of two files to see if they are the same.
* Caller must have locked both inodes to prevent write races.
*/
-static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t dstoff,
+static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff,
+ struct file *dest, loff_t dstoff,
loff_t len, bool *is_same)
{
bool same = true;
@@ -224,8 +224,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
* someone is invalidating pages on us and we lose.
*/
if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) ||
- src_folio->mapping != src->i_mapping ||
- dst_folio->mapping != dest->i_mapping) {
+ src_folio->mapping != src->f_mapping ||
+ dst_folio->mapping != dest->f_mapping) {
same = false;
goto unlock;
}
@@ -333,8 +333,8 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (remap_flags & REMAP_FILE_DEDUP) {
bool is_same = false;
- ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
- inode_out, pos_out, *len, &is_same);
+ ret = vfs_dedupe_file_range_compare(file_in, pos_in,
+ file_out, pos_out, *len, &is_same);
if (ret)
return ret;
if (!is_same)
@@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
- /*
- * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
- * the same mount. Practically, they only need to be on the same file
- * system.
- */
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
@@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
goto out_drop_write;
ret = -EXDEV;
- if (src_file->f_path.mnt != dst_file->f_path.mnt)
+ if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
goto out_drop_write;
ret = -EISDIR;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 259f684d9236..9e6bbb4219de 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -375,7 +375,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *inode;
- inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
+ inode = alloc_inode_sb(sb, romfs_inode_cachep, GFP_KERNEL);
return inode ? &inode->vfs_inode : NULL;
}
diff --git a/fs/splice.c b/fs/splice.c
index 5dbce4dcc1a7..047b79db8eb5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -46,45 +46,45 @@
static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- struct page *page = buf->page;
+ struct folio *folio = page_folio(buf->page);
struct address_space *mapping;
- lock_page(page);
+ folio_lock(folio);
- mapping = page_mapping(page);
+ mapping = folio_mapping(folio);
if (mapping) {
- WARN_ON(!PageUptodate(page));
+ WARN_ON(!folio_test_uptodate(folio));
/*
* At least for ext2 with nobh option, we need to wait on
- * writeback completing on this page, since we'll remove it
+ * writeback completing on this folio, since we'll remove it
* from the pagecache. Otherwise truncate wont wait on the
- * page, allowing the disk blocks to be reused by someone else
+ * folio, allowing the disk blocks to be reused by someone else
* before we actually wrote our data to them. fs corruption
* ensues.
*/
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
- if (page_has_private(page) &&
- !try_to_release_page(page, GFP_KERNEL))
+ if (folio_has_private(folio) &&
+ !filemap_release_folio(folio, GFP_KERNEL))
goto out_unlock;
/*
* If we succeeded in removing the mapping, set LRU flag
* and return good.
*/
- if (remove_mapping(mapping, page)) {
+ if (remove_mapping(mapping, folio)) {
buf->flags |= PIPE_BUF_FLAG_LRU;
return true;
}
}
/*
- * Raced with truncate or failed to remove page from current
+ * Raced with truncate or failed to remove folio from current
* address space, unlock and return failure.
*/
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
return false;
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2db8bcf7ff85..622c844f6d11 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -86,16 +86,17 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
int error, i;
struct bio *bio;
- if (page_count <= BIO_MAX_VECS)
- bio = bio_alloc(GFP_NOIO, page_count);
- else
+ if (page_count <= BIO_MAX_VECS) {
+ bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO);
+ } else {
bio = bio_kmalloc(GFP_NOIO, page_count);
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_opf = REQ_OP_READ;
+ }
if (!bio)
return -ENOMEM;
- bio_set_dev(bio, sb->s_bdev);
- bio->bi_opf = READ;
bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
for (i = 0; i < page_count; ++i) {
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index b1b556dbce12..4f74abbc1a54 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -584,7 +584,7 @@ static void __exit exit_squashfs_fs(void)
static struct inode *squashfs_alloc_inode(struct super_block *sb)
{
struct squashfs_inode_info *ei =
- kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL);
+ alloc_inode_sb(sb, squashfs_inode_cachep, GFP_KERNEL);
return ei ? &ei->vfs_inode : NULL;
}
diff --git a/fs/stat.c b/fs/stat.c
index 28d2020ba1f4..7f734be0e57e 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -184,6 +184,20 @@ int vfs_fstat(int fd, struct kstat *stat)
return error;
}
+int getname_statx_lookup_flags(int flags)
+{
+ int lookup_flags = 0;
+
+ if (!(flags & AT_SYMLINK_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ if (!(flags & AT_NO_AUTOMOUNT))
+ lookup_flags |= LOOKUP_AUTOMOUNT;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ return lookup_flags;
+}
+
/**
* vfs_statx - Get basic and extra attributes by filename
* @dfd: A file descriptor representing the base dir for a relative filename
@@ -199,26 +213,19 @@ int vfs_fstat(int fd, struct kstat *stat)
*
* 0 will be returned on success, and a -ve error code if unsuccessful.
*/
-static int vfs_statx(int dfd, const char __user *filename, int flags,
+static int vfs_statx(int dfd, struct filename *filename, int flags,
struct kstat *stat, u32 request_mask)
{
struct path path;
- unsigned lookup_flags = 0;
+ unsigned int lookup_flags = getname_statx_lookup_flags(flags);
int error;
if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH |
AT_STATX_SYNC_TYPE))
return -EINVAL;
- if (!(flags & AT_SYMLINK_NOFOLLOW))
- lookup_flags |= LOOKUP_FOLLOW;
- if (!(flags & AT_NO_AUTOMOUNT))
- lookup_flags |= LOOKUP_AUTOMOUNT;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
-
retry:
- error = user_path_at(dfd, filename, lookup_flags, &path);
+ error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
if (error)
goto out;
@@ -240,8 +247,15 @@ out:
int vfs_fstatat(int dfd, const char __user *filename,
struct kstat *stat, int flags)
{
- return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
- stat, STATX_BASIC_STATS);
+ int ret;
+ int statx_flags = flags | AT_NO_AUTOMOUNT;
+ struct filename *name;
+
+ name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
+ ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
+ putname(name);
+
+ return ret;
}
#ifdef __ARCH_WANT_OLD_STAT
@@ -602,7 +616,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
-int do_statx(int dfd, const char __user *filename, unsigned flags,
+int do_statx(int dfd, struct filename *filename, unsigned int flags,
unsigned int mask, struct statx __user *buffer)
{
struct kstat stat;
@@ -636,7 +650,14 @@ SYSCALL_DEFINE5(statx,
unsigned int, mask,
struct statx __user *, buffer)
{
- return do_statx(dfd, filename, flags, mask, buffer);
+ int ret;
+ struct filename *name;
+
+ name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL);
+ ret = do_statx(dfd, name, flags, mask, buffer);
+ putname(name);
+
+ return ret;
}
#ifdef CONFIG_COMPAT
diff --git a/fs/super.c b/fs/super.c
index 7af820ba5ad5..f1d4a193602d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1616,11 +1616,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb)
percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
}
-static void sb_freeze_unlock(struct super_block *sb)
+static void sb_freeze_unlock(struct super_block *sb, int level)
{
- int level;
-
- for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+ for (level--; level >= 0; level--)
percpu_up_write(sb->s_writers.rw_sem + level);
}
@@ -1691,7 +1689,14 @@ int freeze_super(struct super_block *sb)
sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */
- sync_filesystem(sb);
+ ret = sync_filesystem(sb);
+ if (ret) {
+ sb->s_writers.frozen = SB_UNFROZEN;
+ sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+ wake_up(&sb->s_writers.wait_unfrozen);
+ deactivate_locked_super(sb);
+ return ret;
+ }
/* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS;
@@ -1703,7 +1708,7 @@ int freeze_super(struct super_block *sb)
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN;
- sb_freeze_unlock(sb);
+ sb_freeze_unlock(sb, SB_FREEZE_FS);
wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
return ret;
@@ -1748,7 +1753,7 @@ static int thaw_super_locked(struct super_block *sb)
}
sb->s_writers.frozen = SB_UNFROZEN;
- sb_freeze_unlock(sb);
+ sb_freeze_unlock(sb, SB_FREEZE_FS);
out:
wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 3ce8e2137f31..c7690016453e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,7 +29,7 @@
*/
int sync_filesystem(struct super_block *sb)
{
- int ret;
+ int ret = 0;
/*
* We need to be protected against the filesystem going from
@@ -52,15 +52,21 @@ int sync_filesystem(struct super_block *sb)
* at a time.
*/
writeback_inodes_sb(sb, WB_REASON_SYNC);
- if (sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, 0);
+ if (sb->s_op->sync_fs) {
+ ret = sb->s_op->sync_fs(sb, 0);
+ if (ret)
+ return ret;
+ }
ret = sync_blockdev_nowait(sb->s_bdev);
- if (ret < 0)
+ if (ret)
return ret;
sync_inodes_sb(sb);
- if (sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, 1);
+ if (sb->s_op->sync_fs) {
+ ret = sb->s_op->sync_fs(sb, 1);
+ if (ret)
+ return ret;
+ }
return sync_blockdev(sb->s_bdev);
}
EXPORT_SYMBOL(sync_filesystem);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index be47263b8605..9e8d4a6fb2f3 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -306,7 +306,7 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
{
struct sysv_inode_info *si;
- si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL);
+ si = alloc_inode_sb(sb, sysv_inode_cachep, GFP_KERNEL);
if (!si)
return NULL;
return &si->vfs_inode;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 749385015a8d..409ab5e17803 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -495,7 +495,8 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations sysv_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = sysv_readpage,
.writepage = sysv_writepage,
.write_begin = sysv_write_begin,
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index bafc02bf8220..de7252715b12 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -264,7 +264,6 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
if (!gid_valid(gid))
return -EINVAL;
opts->gid = gid;
- set_gid(tracefs_mount->mnt_root, gid);
break;
case Opt_mode:
if (match_octal(&args[0], &option))
@@ -291,7 +290,9 @@ static int tracefs_apply_options(struct super_block *sb)
inode->i_mode |= opts->mode;
inode->i_uid = opts->uid;
- inode->i_gid = opts->gid;
+
+ /* Set all the group ids to the mount option */
+ set_gid(sb->s_root, opts->gid);
return 0;
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5cfa28cd00cd..8a9ffc2d4167 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1287,25 +1287,25 @@ int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
}
-static void ubifs_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+static void ubifs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
- ubifs_assert(c, PagePrivate(page));
- if (offset || length < PAGE_SIZE)
- /* Partial page remains dirty */
+ ubifs_assert(c, folio_test_private(folio));
+ if (offset || length < folio_size(folio))
+ /* Partial folio remains dirty */
return;
- if (PageChecked(page))
+ if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
atomic_long_dec(&c->dirty_pg_cnt);
- ClearPagePrivate(page);
- ClearPageChecked(page);
+ folio_clear_private(folio);
+ folio_clear_checked(folio);
}
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
@@ -1445,18 +1445,18 @@ static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
return generic_file_write_iter(iocb, from);
}
-static int ubifs_set_page_dirty(struct page *page)
+static bool ubifs_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
{
- int ret;
- struct inode *inode = page->mapping->host;
- struct ubifs_info *c = inode->i_sb->s_fs_info;
+ bool ret;
+ struct ubifs_info *c = mapping->host->i_sb->s_fs_info;
- ret = __set_page_dirty_nobuffers(page);
+ ret = filemap_dirty_folio(mapping, folio);
/*
* An attempt to dirty a page without budgeting for it - should not
* happen.
*/
- ubifs_assert(c, ret == 0);
+ ubifs_assert(c, ret == false);
return ret;
}
@@ -1646,8 +1646,8 @@ const struct address_space_operations ubifs_file_address_operations = {
.writepage = ubifs_writepage,
.write_begin = ubifs_write_begin,
.write_end = ubifs_write_end,
- .invalidatepage = ubifs_invalidatepage,
- .set_page_dirty = ubifs_set_page_dirty,
+ .invalidate_folio = ubifs_invalidate_folio,
+ .dirty_folio = ubifs_dirty_folio,
#ifdef CONFIG_MIGRATION
.migratepage = ubifs_migrate_page,
#endif
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index aa7a1381c457..bad67455215f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -268,7 +268,7 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
{
struct ubifs_inode *ui;
- ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS);
+ ui = alloc_inode_sb(sb, ubifs_inode_slab, GFP_NOFS);
if (!ui)
return NULL;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1baff8ddb754..0f6bf2504437 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,7 +125,8 @@ static int udf_adinicb_write_end(struct file *file, struct address_space *mappin
}
const struct address_space_operations udf_adinicb_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
.write_begin = udf_adinicb_write_begin,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d6b7a50736b..ca4fa710e562 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -235,7 +235,8 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations udf_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = udf_readpage,
.readahead = udf_readahead,
.writepage = udf_writepage,
@@ -258,10 +259,6 @@ int udf_expand_file_adinicb(struct inode *inode)
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- struct writeback_control udf_wbc = {
- .sync_mode = WB_SYNC_NONE,
- .nr_to_write = 1,
- };
WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
@@ -305,8 +302,10 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ set_page_dirty(page);
+ unlock_page(page);
up_write(&iinfo->i_data_sem);
- err = inode->i_data.a_ops->writepage(page, &udf_wbc);
+ err = filemap_fdatawrite(inode->i_mapping);
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
@@ -317,6 +316,7 @@ int udf_expand_file_adinicb(struct inode *inode)
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
+ iinfo->i_lenAlloc = inode->i_size;
up_write(&iinfo->i_data_sem);
}
put_page(page);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f26b5e0b84b6..48871615e489 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -136,7 +136,7 @@ static struct kmem_cache *udf_inode_cachep;
static struct inode *udf_alloc_inode(struct super_block *sb)
{
struct udf_inode_info *ei;
- ei = kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, udf_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ac628de69601..d0dda01620f0 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -526,7 +526,8 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations ufs_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
+ .dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
.readpage = ufs_readpage,
.writepage = ufs_writepage,
.write_begin = ufs_write_begin,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 00a01471ea05..23377c1baed9 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1443,7 +1443,7 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
{
struct ufs_inode_info *ei;
- ei = kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, ufs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig
index 610d7bc05d6e..da786a687fdc 100644
--- a/fs/unicode/Kconfig
+++ b/fs/unicode/Kconfig
@@ -3,21 +3,13 @@
# UTF-8 normalization
#
config UNICODE
- bool "UTF-8 normalization and casefolding support"
+ tristate "UTF-8 normalization and casefolding support"
help
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
- support.
-
-config UNICODE_UTF8_DATA
- tristate "UTF-8 normalization and casefolding tables"
- depends on UNICODE
- default UNICODE
- help
- This contains a large table of case foldings, which can be loaded as
- a separate module if you say M here. To be on the safe side stick
- to the default of Y. Saying N here makes no sense, if you do not want
- utf8 casefolding support, disable CONFIG_UNICODE instead.
+ support. If you say M here the large table of case foldings will
+ be a separate loadable module that gets requested only when a file
+ system actually use it.
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
- depends on UNICODE_UTF8_DATA
+ depends on UNICODE
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index 2f9d9188852b..0cc87423de82 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_UNICODE) += unicode.o
+ifneq ($(CONFIG_UNICODE),)
+obj-y += unicode.o
+endif
+obj-$(CONFIG_UNICODE) += utf8data.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
-obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o
unicode-y := utf8-norm.o utf8-core.o
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e26b10132d47..aa0c47cb0d16 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -198,6 +198,9 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
struct uffd_msg msg;
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
+
+ if (!(features & UFFD_FEATURE_EXACT_ADDRESS))
+ address &= PAGE_MASK;
msg.arg.pagefault.address = address;
/*
* These flags indicate why the userfault occurred:
@@ -482,7 +485,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->address, vmf->flags, reason,
+ uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason,
ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
@@ -878,7 +881,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX, vma_anon_name(vma));
+ NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev)
vma = prev;
else
@@ -1438,7 +1441,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }),
- vma_anon_name(vma));
+ anon_vma_name(vma));
if (prev) {
vma = prev;
goto next;
@@ -1615,7 +1618,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX, vma_anon_name(vma));
+ NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
vma = prev;
goto next;
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 864c2fad23be..d74e0d336995 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -354,7 +354,7 @@ out:
const struct address_space_operations vboxsf_reg_aops = {
.readpage = vboxsf_readpage,
.writepage = vboxsf_writepage,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.write_begin = simple_write_begin,
.write_end = vboxsf_write_end,
};
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 37dd3fe5b1e9..d2f6df69f611 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -241,7 +241,7 @@ static struct inode *vboxsf_alloc_inode(struct super_block *sb)
{
struct vboxsf_inode *sf_i;
- sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS);
+ sf_i = alloc_inode_sb(sb, vboxsf_inode_cachep, GFP_NOFS);
if (!sf_i)
return NULL;
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index aec2ebf7d25a..e1db0f3f7e5e 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -9,6 +9,7 @@
#include <linux/namei.h>
#include <linux/nls.h>
#include <linux/sizes.h>
+#include <linux/pagemap.h>
#include <linux/vfs.h>
#include "vfsmod.h"
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f18a875f51c6..c1500b238520 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2818,7 +2818,7 @@ xfs_btree_split_worker(
* in any way.
*/
if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+ new_pflags |= PF_MEMALLOC | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags);
xfs_trans_set_context(args->cur->bc_tp);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2705f91bdd0d..90b7f4d127de 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -136,7 +136,20 @@ done:
memalloc_nofs_restore(nofs_flag);
}
-/* Finish all pending io completions. */
+/*
+ * Finish all pending IO completions that require transactional modifications.
+ *
+ * We try to merge physical and logically contiguous ioends before completion to
+ * minimise the number of transactions we need to perform during IO completion.
+ * Both unwritten extent conversion and COW remapping need to iterate and modify
+ * one physical extent at a time, so we gain nothing by merging physically
+ * discontiguous extents here.
+ *
+ * The ioend chain length that we can be processing here is largely unbound in
+ * length and we may have to perform significant amounts of work on each ioend
+ * to complete it. Hence we have to be careful about holding the CPU for too
+ * long in this loop.
+ */
void
xfs_end_io(
struct work_struct *work)
@@ -157,6 +170,7 @@ xfs_end_io(
list_del_init(&ioend->io_list);
iomap_ioend_try_merge(ioend, &tmp);
xfs_end_ioend(ioend);
+ cond_resched();
}
}
@@ -553,9 +567,9 @@ const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readahead = xfs_vm_readahead,
.writepages = xfs_vm_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.releasepage = iomap_releasepage,
- .invalidatepage = iomap_invalidatepage,
+ .invalidate_folio = iomap_invalidate_folio,
.bmap = xfs_vm_bmap,
.direct_IO = noop_direct_IO,
.migratepage = iomap_migrate_page,
@@ -567,7 +581,6 @@ const struct address_space_operations xfs_address_space_operations = {
const struct address_space_operations xfs_dax_aops = {
.writepages = xfs_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
.swap_activate = xfs_iomap_swapfile_activate,
};
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index 667e297f59b1..32fa02945f73 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -36,9 +36,7 @@ xfs_flush_bdev_async(
return;
}
- bio_init(bio, NULL, 0);
- bio_set_dev(bio, bdev);
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ bio_init(bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
bio->bi_private = done;
bio->bi_end_io = xfs_flush_bdev_async_endio;
@@ -61,10 +59,9 @@ xfs_rw_bdev(
if (is_vmalloc && op == REQ_OP_WRITE)
flush_kernel_vmap_range(data, count);
- bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
- bio_set_dev(bio, bdev);
+ bio = bio_alloc(bdev, bio_max_vecs(left), op | REQ_META | REQ_SYNC,
+ GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
- bio->bi_opf = op | REQ_META | REQ_SYNC;
do {
struct page *page = kmem_to_page(data);
@@ -74,10 +71,9 @@ xfs_rw_bdev(
while (bio_add_page(bio, page, len, off) != len) {
struct bio *prev = bio;
- bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
- bio_copy_dev(bio, prev);
+ bio = bio_alloc(prev->bi_bdev, bio_max_vecs(left),
+ prev->bi_opf, GFP_KERNEL);
bio->bi_iter.bi_sector = bio_end_sector(prev);
- bio->bi_opf = prev->bi_opf;
bio_chain(prev, bio);
submit_bio(prev);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d4a387d3d0ce..eb2e387ba528 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -850,9 +850,6 @@ xfs_alloc_file_space(
rblocks = 0;
}
- /*
- * Allocate and setup the transaction.
- */
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
dblocks, rblocks, false, &tp);
if (error)
@@ -869,9 +866,9 @@ xfs_alloc_file_space(
if (error)
goto error;
- /*
- * Complete the transaction
- */
+ ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b45e0d50a405..44795018df1d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -843,9 +843,6 @@ xfs_buf_readahead_map(
{
struct xfs_buf *bp;
- if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
- return;
-
xfs_buf_read_map(target, map, nmaps,
XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
__this_address);
@@ -1440,12 +1437,10 @@ next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = bio_max_segs(total_nr_pages);
- bio = bio_alloc(GFP_NOIO, nr_pages);
- bio_set_dev(bio, bp->b_target->bt_bdev);
+ bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
- bio->bi_opf = op;
for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 22ad207bedf4..5bddb1e9e0b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -66,40 +66,6 @@ xfs_is_falloc_aligned(
return !((pos | len) & mask);
}
-int
-xfs_update_prealloc_flags(
- struct xfs_inode *ip,
- enum xfs_prealloc_flags flags)
-{
- struct xfs_trans *tp;
- int error;
-
- error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
- 0, 0, 0, &tp);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
- if (!(flags & XFS_PREALLOC_INVISIBLE)) {
- VFS_I(ip)->i_mode &= ~S_ISUID;
- if (VFS_I(ip)->i_mode & S_IXGRP)
- VFS_I(ip)->i_mode &= ~S_ISGID;
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- }
-
- if (flags & XFS_PREALLOC_SET)
- ip->i_diflags |= XFS_DIFLAG_PREALLOC;
- if (flags & XFS_PREALLOC_CLEAR)
- ip->i_diflags &= ~XFS_DIFLAG_PREALLOC;
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (flags & XFS_PREALLOC_SYNC)
- xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp);
-}
-
/*
* Fsync operations on directories are much simpler than on regular files,
* as there is no file data to flush, and thus also no need for explicit
@@ -895,6 +861,21 @@ xfs_break_layouts(
return error;
}
+/* Does this file, inode, or mount want synchronous writes? */
+static inline bool xfs_file_sync_writes(struct file *filp)
+{
+ struct xfs_inode *ip = XFS_I(file_inode(filp));
+
+ if (xfs_has_wsync(ip->i_mount))
+ return true;
+ if (filp->f_flags & (__O_SYNC | O_DSYNC))
+ return true;
+ if (IS_SYNC(file_inode(filp)))
+ return true;
+
+ return false;
+}
+
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
@@ -910,7 +891,6 @@ xfs_file_fallocate(
struct inode *inode = file_inode(file);
struct xfs_inode *ip = XFS_I(inode);
long error;
- enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
loff_t new_size = 0;
bool do_file_insert = false;
@@ -955,6 +935,10 @@ xfs_file_fallocate(
goto out_unlock;
}
+ error = file_modified(file);
+ if (error)
+ goto out_unlock;
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
@@ -1004,8 +988,6 @@ xfs_file_fallocate(
}
do_file_insert = true;
} else {
- flags |= XFS_PREALLOC_SET;
-
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
@@ -1057,13 +1039,6 @@ xfs_file_fallocate(
}
}
- if (file->f_flags & O_DSYNC)
- flags |= XFS_PREALLOC_SYNC;
-
- error = xfs_update_prealloc_flags(ip, flags);
- if (error)
- goto out_unlock;
-
/* Change file size if needed */
if (new_size) {
struct iattr iattr;
@@ -1082,8 +1057,14 @@ xfs_file_fallocate(
* leave shifted extents past EOF and hence losing access to
* the data that is contained within them.
*/
- if (do_file_insert)
+ if (do_file_insert) {
error = xfs_insert_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
+ }
+
+ if (xfs_file_sync_writes(file))
+ error = xfs_log_force_inode(ip);
out_unlock:
xfs_iunlock(ip, iolock);
@@ -1115,21 +1096,6 @@ xfs_file_fadvise(
return ret;
}
-/* Does this file, inode, or mount want synchronous writes? */
-static inline bool xfs_file_sync_writes(struct file *filp)
-{
- struct xfs_inode *ip = XFS_I(file_inode(filp));
-
- if (xfs_has_wsync(ip->i_mount))
- return true;
- if (filp->f_flags & (__O_SYNC | O_DSYNC))
- return true;
- if (IS_SYNC(file_inode(filp)))
- return true;
-
- return false;
-}
-
STATIC loff_t
xfs_file_remap_range(
struct file *file_in,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 9644f938990c..9e434cb41e48 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -77,7 +77,7 @@ xfs_inode_alloc(
* XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL
* and return NULL here on ENOMEM.
*/
- ip = kmem_cache_alloc(xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL);
+ ip = alloc_inode_sb(mp->m_super, xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL);
if (inode_init_always(mp->m_super, VFS_I(ip))) {
kmem_cache_free(xfs_inode_cache, ip);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c447bf04205a..b7e8f14d9fca 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -462,15 +462,6 @@ xfs_itruncate_extents(
}
/* from xfs_file.c */
-enum xfs_prealloc_flags {
- XFS_PREALLOC_SET = (1 << 1),
- XFS_PREALLOC_CLEAR = (1 << 2),
- XFS_PREALLOC_SYNC = (1 << 3),
- XFS_PREALLOC_INVISIBLE = (1 << 4),
-};
-
-int xfs_update_prealloc_flags(struct xfs_inode *ip,
- enum xfs_prealloc_flags flags);
int xfs_break_layouts(struct inode *inode, uint *iolock,
enum layout_break_reason reason);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 03a6198c97f6..2515fe8299e1 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1464,7 +1464,7 @@ xfs_ioc_getbmap(
if (bmx.bmv_count < 2)
return -EINVAL;
- if (bmx.bmv_count > ULONG_MAX / recsize)
+ if (bmx.bmv_count >= INT_MAX / recsize)
return -ENOMEM;
buf = kvcalloc(bmx.bmv_count, sizeof(*buf), GFP_KERNEL);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 89fec9a18c34..16f9edbda4eb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1883,19 +1883,19 @@ xlog_write_iclog(
return;
}
- bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
- bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
- iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
- iclog->ic_bio.bi_end_io = xlog_bio_end_io;
- iclog->ic_bio.bi_private = iclog;
-
/*
* We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
* IOs coming immediately after this one. This prevents the block layer
* writeback throttle from throttling log writes behind background
* metadata writeback and causing priority inversions.
*/
- iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
+ bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec,
+ howmany(count, PAGE_SIZE),
+ REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE);
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+ iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+ iclog->ic_bio.bi_private = iclog;
+
if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
/*
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index d6334abbc0b3..4abe17312c2b 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -71,6 +71,40 @@ xfs_fs_get_uuid(
}
/*
+ * We cannot use file based VFS helpers such as file_modified() to update
+ * inode state as we modify the data/metadata in the inode here. Hence we have
+ * to open code the timestamp updates and SUID/SGID stripping. We also need
+ * to set the inode prealloc flag to ensure that the extents we allocate are not
+ * removed if the inode is reclaimed from memory before xfs_fs_block_commit()
+ * is from the client to indicate that data has been written and the file size
+ * can be extended.
+ */
+static int
+xfs_fs_map_update_inode(
+ struct xfs_inode *ip)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
+ 0, 0, 0, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ VFS_I(ip)->i_mode &= ~S_ISUID;
+ if (VFS_I(ip)->i_mode & S_IXGRP)
+ VFS_I(ip)->i_mode &= ~S_ISGID;
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp);
+}
+
+/*
* Get a layout for the pNFS client.
*/
int
@@ -164,10 +198,12 @@ xfs_fs_map_blocks(
* that the blocks allocated and handed out to the client are
* guaranteed to be present even after a server crash.
*/
- error = xfs_update_prealloc_flags(ip,
- XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
+ error = xfs_fs_map_update_inode(ip);
+ if (!error)
+ error = xfs_log_force_inode(ip);
if (error)
goto out_unlock;
+
} else {
xfs_iunlock(ip, lock_flags);
}
@@ -255,7 +291,7 @@ xfs_fs_commit_blocks(
length = end - start;
if (!length)
continue;
-
+
/*
* Make sure reads through the pagecache see the new data.
*/
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e8f37bdc8354..d84714e4e46a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -735,6 +735,7 @@ xfs_fs_sync_fs(
int wait)
{
struct xfs_mount *mp = XFS_M(sb);
+ int error;
trace_xfs_fs_sync_fs(mp, __return_address);
@@ -744,7 +745,10 @@ xfs_fs_sync_fs(
if (!wait)
return 0;
- xfs_log_force(mp, XFS_LOG_SYNC);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
+ if (error)
+ return error;
+
if (laptop_mode) {
/*
* The disk must be active because we're syncing.
@@ -1749,6 +1753,11 @@ xfs_remount_ro(
};
int error;
+ /* Flush all the dirty data to disk. */
+ error = sync_filesystem(mp->m_super);
+ if (error)
+ return error;
+
/*
* Cancel background eofb scanning so it cannot race with the final
* log force+buftarg wait and deadlock the remount.
@@ -1827,8 +1836,6 @@ xfs_fs_reconfigure(
if (error)
return error;
- sync_filesystem(mp->m_super);
-
/* inode32 -> inode64 */
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index b76dfb310ab6..804b6e265685 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -185,9 +185,9 @@ static const struct address_space_operations zonefs_file_aops = {
.readahead = zonefs_readahead,
.writepage = zonefs_writepage,
.writepages = zonefs_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .dirty_folio = filemap_dirty_folio,
.releasepage = iomap_releasepage,
- .invalidatepage = iomap_invalidatepage,
+ .invalidate_folio = iomap_invalidate_folio,
.migratepage = iomap_migrate_page,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
@@ -692,12 +692,11 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
if (!nr_pages)
return 0;
- bio = bio_alloc(GFP_NOFS, nr_pages);
- bio_set_dev(bio, bdev);
+ bio = bio_alloc(bdev, nr_pages,
+ REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
bio->bi_iter.bi_sector = zi->i_zsector;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_ioprio = iocb->ki_ioprio;
- bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
if (iocb->ki_flags & IOCB_DSYNC)
bio->bi_opf |= REQ_FUA;
@@ -1137,7 +1136,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
{
struct zonefs_inode_info *zi;
- zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL);
+ zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
if (!zi)
return NULL;
@@ -1541,10 +1540,8 @@ static int zonefs_read_super(struct super_block *sb)
if (!page)
return -ENOMEM;
- bio_init(&bio, &bio_vec, 1);
+ bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = 0;
- bio.bi_opf = REQ_OP_READ;
- bio_set_dev(&bio, sb->s_bdev);
bio_add_page(&bio, page, PAGE_SIZE, 0);
ret = submit_bio_wait(&bio);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ca88c4706f2b..3f7f01f03869 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -480,6 +480,8 @@ void acpi_initialize_hp_context(struct acpi_device *adev,
/* acpi_device.dev.bus == &acpi_bus_type */
extern struct bus_type acpi_bus_type;
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
+
/*
* Events
* ------
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 69e89d572b9e..02c1fa16e638 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -507,8 +507,12 @@ typedef u64 acpi_integer;
/* Pointer/Integer type conversions */
#define ACPI_TO_POINTER(i) ACPI_CAST_PTR (void, (acpi_size) (i))
+#ifndef ACPI_TO_INTEGER
#define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) 0)
+#endif
+#ifndef ACPI_OFFSET
#define ACPI_OFFSET(d, f) ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0)
+#endif
#define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i)
/* Optimizations for 4-character (32-bit) acpi_name manipulation */
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..afaca3a075e8 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init acpi_ghes_init(void);
#else
#define ghes_disable 1
+static inline void acpi_ghes_init(void) { }
#endif
#ifdef CONFIG_ACPI_APEI
void __init acpi_hest_init(void);
#else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
#endif
int erst_write(const struct cper_record_header *record);
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index b3ffb9bbf664..cec41e004ecf 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -114,6 +114,11 @@
#define acpi_raw_spinlock raw_spinlock_t *
#define acpi_cpu_flags unsigned long
+#define acpi_uintptr_t uintptr_t
+
+#define ACPI_TO_INTEGER(p) ((uintptr_t)(p))
+#define ACPI_OFFSET(d, f) offsetof(d, f)
+
/* Use native linux version of acpi_os_allocate_zeroed */
#define USE_NATIVE_ALLOCATE_ZEROED
diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h
index c90192b1c755..4225a8ca9c1a 100644
--- a/include/asm-generic/bitops/instrumented-atomic.h
+++ b/include/asm-generic/bitops/instrumented-atomic.h
@@ -23,7 +23,7 @@
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void set_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
arch_set_bit(nr, addr);
@@ -36,7 +36,7 @@ static inline void set_bit(long nr, volatile unsigned long *addr)
*
* This is a relaxed atomic operation (no implied memory barriers).
*/
-static inline void clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
arch_clear_bit(nr, addr);
@@ -52,7 +52,7 @@ static inline void clear_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
arch_change_bit(nr, addr);
@@ -65,7 +65,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
*
* This is an atomic fully-ordered operation (implied full memory barrier).
*/
-static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
kcsan_mb();
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
@@ -79,7 +79,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
*
* This is an atomic fully-ordered operation (implied full memory barrier).
*/
-static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
kcsan_mb();
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
@@ -93,7 +93,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
*
* This is an atomic fully-ordered operation (implied full memory barrier).
*/
-static inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
kcsan_mb();
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h
index 37363d570b9b..7ab1ecc37782 100644
--- a/include/asm-generic/bitops/instrumented-non-atomic.h
+++ b/include/asm-generic/bitops/instrumented-non-atomic.h
@@ -22,7 +22,7 @@
* region of memory concurrently, the effect may be that only one operation
* succeeds.
*/
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
arch___set_bit(nr, addr);
@@ -37,7 +37,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr)
* region of memory concurrently, the effect may be that only one operation
* succeeds.
*/
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
arch___clear_bit(nr, addr);
@@ -52,13 +52,13 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* region of memory concurrently, the effect may be that only one operation
* succeeds.
*/
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
arch___change_bit(nr, addr);
}
-static inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr)
+static __always_inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr)
{
if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) {
/*
@@ -90,7 +90,7 @@ static inline void __instrument_read_write_bitop(long nr, volatile unsigned long
* This operation is non-atomic. If two instances of this operation race, one
* can appear to succeed but actually fail.
*/
-static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
{
__instrument_read_write_bitop(nr, addr);
return arch___test_and_set_bit(nr, addr);
@@ -104,7 +104,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is non-atomic. If two instances of this operation race, one
* can appear to succeed but actually fail.
*/
-static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
__instrument_read_write_bitop(nr, addr);
return arch___test_and_clear_bit(nr, addr);
@@ -118,7 +118,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
* This operation is non-atomic. If two instances of this operation race, one
* can appear to succeed but actually fail.
*/
-static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
{
__instrument_read_write_bitop(nr, addr);
return arch___test_and_change_bit(nr, addr);
@@ -129,7 +129,7 @@ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
* @nr: bit number to test
* @addr: Address to start counting from
*/
-static inline bool test_bit(long nr, const volatile unsigned long *addr)
+static __always_inline bool test_bit(long nr, const volatile unsigned long *addr)
{
instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long));
return arch_test_bit(nr, addr);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 42f3866bca69..2a10db2f0bc5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -321,16 +321,6 @@
#define THERMAL_TABLE(name)
#endif
-#ifdef CONFIG_DTPM
-#define DTPM_TABLE() \
- . = ALIGN(8); \
- __dtpm_table = .; \
- KEEP(*(__dtpm_table)) \
- __dtpm_table_end = .;
-#else
-#define DTPM_TABLE()
-#endif
-
#define KERNEL_DTB() \
STRUCT_ALIGN(); \
__dtb_start = .; \
@@ -723,7 +713,6 @@
ACPI_PROBE_TABLE(irqchip) \
ACPI_PROBE_TABLE(timer) \
THERMAL_TABLE(governor) \
- DTPM_TABLE() \
EARLYCON_TABLE() \
LSM_TABLE() \
EARLY_LSM_TABLE() \
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index b62a2a56a4d4..44509d48fca2 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -8,7 +8,8 @@
#include <linux/prefetch.h>
static void
-xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
long lines = bytes / (sizeof (long)) / 8;
@@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
prefetchw(p1);
@@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
prefetchw(p1);
@@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
@@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
@@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
@@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
@@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
@@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
long lines = bytes / (sizeof (long)) / 8 - 1;
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index e715bdb720d5..057c8964aefb 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -56,6 +56,7 @@ enum arch_timer_spi_nr {
#define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
+#define ARCH_TIMER_EVT_INTERVAL_SCALE (1 << 17) /* EVNTIS in the ARMv8 ARM */
#define ARCH_TIMER_EVT_STREAM_PERIOD_US 100
#define ARCH_TIMER_EVT_STREAM_FREQ \
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f76ec723ceae..f50c5d1725da 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -13,6 +13,8 @@
#include <linux/list.h>
#include <linux/types.h>
+#include <asm/unaligned.h>
+
/*
* Maximum values for blocksize and alignmask, used to allocate
* static buffers that are big enough for any combination of
@@ -154,9 +156,11 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
(size % sizeof(unsigned long)) == 0) {
unsigned long *d = (unsigned long *)dst;
unsigned long *s = (unsigned long *)src;
+ unsigned long l;
while (size > 0) {
- *d++ ^= *s++;
+ l = get_unaligned(d) ^ get_unaligned(s++);
+ put_unaligned(l, d++);
size -= sizeof(unsigned long);
}
} else {
@@ -173,9 +177,11 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
unsigned long *d = (unsigned long *)dst;
unsigned long *s1 = (unsigned long *)src1;
unsigned long *s2 = (unsigned long *)src2;
+ unsigned long l;
while (size > 0) {
- *d++ = *s1++ ^ *s2++;
+ l = get_unaligned(s1++) ^ get_unaligned(s2++);
+ put_unaligned(l, d++);
size -= sizeof(unsigned long);
}
} else {
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
deleted file mode 100644
index 48198c36d6b9..000000000000
--- a/include/crypto/asym_tpm_subtype.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
-#define _LINUX_ASYM_TPM_SUBTYPE_H
-
-#include <linux/keyctl.h>
-
-struct tpm_key {
- void *blob;
- u32 blob_len;
- uint16_t key_len; /* Size in bits of the key */
- const void *pub_key; /* pointer inside blob to the public key bytes */
- uint16_t pub_key_len; /* length of the public key */
-};
-
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
-
-extern struct asymmetric_key_subtype asym_tpm_subtype;
-
-#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index d71e9858ab86..7b863e911cb4 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -24,21 +24,17 @@
*
* @key: Private DH key
* @p: Diffie-Hellman parameter P
- * @q: Diffie-Hellman parameter Q
* @g: Diffie-Hellman generator G
* @key_size: Size of the private DH key
* @p_size: Size of DH parameter P
- * @q_size: Size of DH parameter Q
* @g_size: Size of DH generator G
*/
struct dh {
- void *key;
- void *p;
- void *q;
- void *g;
+ const void *key;
+ const void *p;
+ const void *g;
unsigned int key_size;
unsigned int p_size;
- unsigned int q_size;
unsigned int g_size;
};
@@ -83,4 +79,20 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params);
*/
int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params);
+/**
+ * __crypto_dh_decode_key() - decode a private key without parameter checks
+ * @buf: Buffer holding a packet key that should be decoded
+ * @len: Length of the packet private key buffer
+ * @params: Buffer allocated by the caller that is filled with the
+ * unpacked DH private key.
+ *
+ * Internal function providing the same services as the exported
+ * crypto_dh_decode_key(), but without any of those basic parameter
+ * checks conducted by the latter.
+ *
+ * Return: -EINVAL if buffer has insufficient size, 0 on success
+ */
+int __crypto_dh_decode_key(const char *buf, unsigned int len,
+ struct dh *params);
+
#endif
diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
index d39cfa0d333e..52363eee2b20 100644
--- a/include/crypto/internal/blake2s.h
+++ b/include/crypto/internal/blake2s.h
@@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state)
state->f[0] = -1;
}
-typedef void (*blake2s_compress_t)(struct blake2s_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
/* Helper functions for BLAKE2s shared by the library and shash APIs */
-static inline void __blake2s_update(struct blake2s_state *state,
- const u8 *in, size_t inlen,
- blake2s_compress_t compress)
+static __always_inline void
+__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
+ bool force_generic)
{
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
@@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
return;
if (inlen > fill) {
memcpy(state->buf + state->buflen, in, fill);
- (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+ if (force_generic)
+ blake2s_compress_generic(state, state->buf, 1,
+ BLAKE2S_BLOCK_SIZE);
+ else
+ blake2s_compress(state, state->buf, 1,
+ BLAKE2S_BLOCK_SIZE);
state->buflen = 0;
in += fill;
inlen -= fill;
@@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
/* Hash one less (full) block than strictly possible */
- (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ if (force_generic)
+ blake2s_compress_generic(state, in, nblocks - 1,
+ BLAKE2S_BLOCK_SIZE);
+ else
+ blake2s_compress(state, in, nblocks - 1,
+ BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
@@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state,
state->buflen += inlen;
}
-static inline void __blake2s_final(struct blake2s_state *state, u8 *out,
- blake2s_compress_t compress)
+static __always_inline void
+__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
{
blake2s_set_lastblock(state);
memset(state->buf + state->buflen, 0,
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
- (*compress)(state, state->buf, 1, state->buflen);
+ if (force_generic)
+ blake2s_compress_generic(state, state->buf, 1, state->buflen);
+ else
+ blake2s_compress(state, state->buf, 1, state->buflen);
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
memcpy(out, state->h, state->outlen);
}
@@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc)
static inline int crypto_blake2s_update(struct shash_desc *desc,
const u8 *in, unsigned int inlen,
- blake2s_compress_t compress)
+ bool force_generic)
{
struct blake2s_state *state = shash_desc_ctx(desc);
- __blake2s_update(state, in, inlen, compress);
+ __blake2s_update(state, in, inlen, force_generic);
return 0;
}
static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
- blake2s_compress_t compress)
+ bool force_generic)
{
struct blake2s_state *state = shash_desc_ctx(desc);
- __blake2s_final(state, out, compress);
+ __blake2s_final(state, out, force_generic);
return 0;
}
diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 659b642efada..9cb0662ebe87 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -10,6 +10,38 @@
#include <crypto/kpp.h>
#include <crypto/algapi.h>
+/**
+ * struct kpp_instance - KPP template instance
+ * @free: Callback getting invoked upon instance destruction. Must be set.
+ * @s: Internal. Generic crypto core instance state properly layout
+ * to alias with @alg as needed.
+ * @alg: The &struct kpp_alg implementation provided by the instance.
+ */
+struct kpp_instance {
+ void (*free)(struct kpp_instance *inst);
+ union {
+ struct {
+ char head[offsetof(struct kpp_alg, base)];
+ struct crypto_instance base;
+ } s;
+ struct kpp_alg alg;
+ };
+};
+
+/**
+ * struct crypto_kpp_spawn - KPP algorithm spawn
+ * @base: Internal. Generic crypto core spawn state.
+ *
+ * Template instances can get a hold on some inner KPP algorithm by
+ * binding a &struct crypto_kpp_spawn via
+ * crypto_grab_kpp(). Transforms may subsequently get instantiated
+ * from the referenced inner &struct kpp_alg by means of
+ * crypto_spawn_kpp().
+ */
+struct crypto_kpp_spawn {
+ struct crypto_spawn base;
+};
+
/*
* Transform internal helpers.
*/
@@ -33,6 +65,62 @@ static inline const char *kpp_alg_name(struct crypto_kpp *tfm)
return crypto_kpp_tfm(tfm)->__crt_alg->cra_name;
}
+/*
+ * Template instance internal helpers.
+ */
+/**
+ * kpp_crypto_instance() - Cast a &struct kpp_instance to the corresponding
+ * generic &struct crypto_instance.
+ * @inst: Pointer to the &struct kpp_instance to be cast.
+ * Return: A pointer to the &struct crypto_instance embedded in @inst.
+ */
+static inline struct crypto_instance *kpp_crypto_instance(
+ struct kpp_instance *inst)
+{
+ return &inst->s.base;
+}
+
+/**
+ * kpp_instance() - Cast a generic &struct crypto_instance to the corresponding
+ * &struct kpp_instance.
+ * @inst: Pointer to the &struct crypto_instance to be cast.
+ * Return: A pointer to the &struct kpp_instance @inst is embedded in.
+ */
+static inline struct kpp_instance *kpp_instance(struct crypto_instance *inst)
+{
+ return container_of(inst, struct kpp_instance, s.base);
+}
+
+/**
+ * kpp_alg_instance() - Get the &struct kpp_instance a given KPP transform has
+ * been instantiated from.
+ * @kpp: The KPP transform instantiated from some &struct kpp_instance.
+ * Return: The &struct kpp_instance associated with @kpp.
+ */
+static inline struct kpp_instance *kpp_alg_instance(struct crypto_kpp *kpp)
+{
+ return kpp_instance(crypto_tfm_alg_instance(&kpp->base));
+}
+
+/**
+ * kpp_instance_ctx() - Get a pointer to a &struct kpp_instance's implementation
+ * specific context data.
+ * @inst: The &struct kpp_instance whose context data to access.
+ *
+ * A KPP template implementation may allocate extra memory beyond the
+ * end of a &struct kpp_instance instantiated from &crypto_template.create().
+ * This function provides a means to obtain a pointer to this area.
+ *
+ * Return: A pointer to the implementation specific context data.
+ */
+static inline void *kpp_instance_ctx(struct kpp_instance *inst)
+{
+ return crypto_instance_ctx(kpp_crypto_instance(inst));
+}
+
+/*
+ * KPP algorithm (un)registration functions.
+ */
/**
* crypto_register_kpp() -- Register key-agreement protocol primitives algorithm
*
@@ -56,4 +144,74 @@ int crypto_register_kpp(struct kpp_alg *alg);
*/
void crypto_unregister_kpp(struct kpp_alg *alg);
+/**
+ * kpp_register_instance() - Register a KPP template instance.
+ * @tmpl: The instantiating template.
+ * @inst: The KPP template instance to be registered.
+ * Return: %0 on success, negative error code otherwise.
+ */
+int kpp_register_instance(struct crypto_template *tmpl,
+ struct kpp_instance *inst);
+
+/*
+ * KPP spawn related functions.
+ */
+/**
+ * crypto_grab_kpp() - Look up a KPP algorithm and bind a spawn to it.
+ * @spawn: The KPP spawn to bind.
+ * @inst: The template instance owning @spawn.
+ * @name: The KPP algorithm name to look up.
+ * @type: The type bitset to pass on to the lookup.
+ * @mask: The mask bismask to pass on to the lookup.
+ * Return: %0 on success, a negative error code otherwise.
+ */
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+ struct crypto_instance *inst,
+ const char *name, u32 type, u32 mask);
+
+/**
+ * crypto_drop_kpp() - Release a spawn previously bound via crypto_grab_kpp().
+ * @spawn: The spawn to release.
+ */
+static inline void crypto_drop_kpp(struct crypto_kpp_spawn *spawn)
+{
+ crypto_drop_spawn(&spawn->base);
+}
+
+/**
+ * crypto_spawn_kpp_alg() - Get the algorithm a KPP spawn has been bound to.
+ * @spawn: The spawn to get the referenced &struct kpp_alg for.
+ *
+ * This function as well as the returned result are safe to use only
+ * after @spawn has been successfully bound via crypto_grab_kpp() and
+ * up to until the template instance owning @spawn has either been
+ * registered successfully or the spawn has been released again via
+ * crypto_drop_spawn().
+ *
+ * Return: A pointer to the &struct kpp_alg referenced from the spawn.
+ */
+static inline struct kpp_alg *crypto_spawn_kpp_alg(
+ struct crypto_kpp_spawn *spawn)
+{
+ return container_of(spawn->base.alg, struct kpp_alg, base);
+}
+
+/**
+ * crypto_spawn_kpp() - Create a transform from a KPP spawn.
+ * @spawn: The spawn previously bound to some &struct kpp_alg via
+ * crypto_grab_kpp().
+ *
+ * Once a &struct crypto_kpp_spawn has been successfully bound to a
+ * &struct kpp_alg via crypto_grab_kpp(), transforms for the latter
+ * may get instantiated from the former by means of this function.
+ *
+ * Return: A pointer to the freshly created KPP transform on success
+ * or an ``ERR_PTR()`` otherwise.
+ */
+static inline struct crypto_kpp *crypto_spawn_kpp(
+ struct crypto_kpp_spawn *spawn)
+{
+ return crypto_spawn_tfm2(&spawn->base);
+}
+
#endif
diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h
index 42ea21289ba9..1f021ad0533f 100644
--- a/include/crypto/sm3.h
+++ b/include/crypto/sm3.h
@@ -1,5 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common values for SM3 algorithm
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#ifndef _CRYPTO_SM3_H
@@ -30,13 +35,30 @@ struct sm3_state {
u8 buffer[SM3_BLOCK_SIZE];
};
-struct shash_desc;
+/*
+ * Stand-alone implementation of the SM3 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
+ *
+ * For details see lib/crypto/sm3.c
+ */
-extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
+static inline void sm3_init(struct sm3_state *sctx)
+{
+ sctx->state[0] = SM3_IVA;
+ sctx->state[1] = SM3_IVB;
+ sctx->state[2] = SM3_IVC;
+ sctx->state[3] = SM3_IVD;
+ sctx->state[4] = SM3_IVE;
+ sctx->state[5] = SM3_IVF;
+ sctx->state[6] = SM3_IVG;
+ sctx->state[7] = SM3_IVH;
+ sctx->count = 0;
+}
-extern int crypto_sm3_final(struct shash_desc *desc, u8 *out);
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len);
+void sm3_final(struct sm3_state *sctx, u8 *out);
-extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
#endif
diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h
index 7d57063b8a65..29ff6b895848 100644
--- a/include/dt-bindings/clock/dra7.h
+++ b/include/dt-bindings/clock/dra7.h
@@ -84,17 +84,10 @@
#define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
#define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
-/* iva clocks */
-#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
-#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
-
/* dss clocks */
#define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
#define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30)
-/* gpu clocks */
-#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
-
/* l3init clocks */
#define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
#define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30)
@@ -267,10 +260,17 @@
#define DRA7_L3INSTR_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
#define DRA7_L3INSTR_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
+/* iva clocks */
+#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
+#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
+
/* dss clocks */
#define DRA7_DSS_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
#define DRA7_DSS_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30)
+/* gpu clocks */
+#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20)
+
/* l3init clocks */
#define DRA7_L3INIT_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28)
#define DRA7_L3INIT_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30)
diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h
index 604f2bb30ac0..bf3aac0e5491 100644
--- a/include/dt-bindings/interrupt-controller/apple-aic.h
+++ b/include/dt-bindings/interrupt-controller/apple-aic.h
@@ -11,5 +11,7 @@
#define AIC_TMR_HV_VIRT 1
#define AIC_TMR_GUEST_PHYS 2
#define AIC_TMR_GUEST_VIRT 3
+#define AIC_CPU_PMU_E 4
+#define AIC_CPU_PMU_P 5
#endif
diff --git a/include/dt-bindings/regulator/richtek,rt5190a-regulator.h b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h
new file mode 100644
index 000000000000..63f99d4c1cb3
--- /dev/null
+++ b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__
+#define __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__
+
+/*
+ * BUCK/LDO mode constants which may be used in devicetree properties
+ * (eg. regulator-allowed-modes).
+ * See the manufacturer's datasheet for more information on these modes.
+ */
+
+#define RT5190A_OPMODE_AUTO 0
+#define RT5190A_OPMODE_FPWM 1
+
+#endif
diff --git a/include/dt-bindings/regulator/ti,tps62864.h b/include/dt-bindings/regulator/ti,tps62864.h
new file mode 100644
index 000000000000..8db31f23d956
--- /dev/null
+++ b/include/dt-bindings/regulator/ti,tps62864.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_REGULATOR_TI_TPS62864_H
+#define _DT_BINDINGS_REGULATOR_TI_TPS62864_H
+
+#define TPS62864_MODE_NORMAL 0
+#define TPS62864_MODE_FPWM 1
+
+#endif
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 6acd3cf13a18..2419a735420f 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -38,6 +38,20 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
#define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
#endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+extern int restrict_link_by_builtin_secondary_and_machine(
+ struct key *dest_keyring,
+ const struct key_type *type,
+ const union key_payload *payload,
+ struct key *restrict_key);
+extern void __init set_machine_trusted_keys(struct key *keyring);
+#else
+#define restrict_link_by_builtin_secondary_and_machine restrict_link_by_builtin_trusted
+static inline void __init set_machine_trusted_keys(struct key *keyring)
+{
+}
+#endif
+
extern struct pkcs7_message *pkcs7;
#ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
extern int mark_hash_blacklisted(const char *hash);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6274758648e3..35413793a4d4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
int acpi_resources_are_enforced(void);
#ifdef CONFIG_HIBERNATION
-void __init acpi_check_s4_hw_signature(int check);
+extern int acpi_check_s4_hw_signature;
#endif
#ifdef CONFIG_PM_SLEEP
@@ -580,6 +580,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
extern bool osc_sb_native_usb4_support_confirmed;
+extern bool osc_sb_cppc_not_supported;
/* USB4 Capabilities */
#define OSC_USB_USB3_TUNNELING 0x00000001
@@ -691,7 +692,7 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
int acpi_device_modalias(struct device *, char *, int);
struct platform_device *acpi_create_platform_device(struct acpi_device *,
- struct property_entry *);
+ const struct property_entry *);
#define ACPI_PTR(_ptr) (_ptr)
static inline void acpi_device_set_enumerated(struct acpi_device *adev)
@@ -930,7 +931,7 @@ static inline int acpi_device_modalias(struct device *dev,
static inline struct platform_device *
acpi_create_platform_device(struct acpi_device *adev,
- struct property_entry *properties)
+ const struct property_entry *properties)
{
return NULL;
}
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h
new file mode 100644
index 000000000000..f477f0b452fa
--- /dev/null
+++ b/include/linux/acpi_agdi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_AGDI_H__
+#define __ACPI_AGDI_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_AGDI
+void __init acpi_agdi_init(void);
+#else
+static inline void acpi_agdi_init(void) {}
+#endif
+#endif /* __ACPI_AGDI_H__ */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6c7f47846971..6562f543c3e0 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -117,30 +117,9 @@ void amba_device_put(struct amba_device *);
int amba_device_add(struct amba_device *, struct resource *);
int amba_device_register(struct amba_device *, struct resource *);
void amba_device_unregister(struct amba_device *);
-struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
int amba_request_regions(struct amba_device *, const char *);
void amba_release_regions(struct amba_device *);
-static inline int amba_pclk_enable(struct amba_device *dev)
-{
- return clk_enable(dev->pclk);
-}
-
-static inline void amba_pclk_disable(struct amba_device *dev)
-{
- clk_disable(dev->pclk);
-}
-
-static inline int amba_pclk_prepare(struct amba_device *dev)
-{
- return clk_prepare(dev->pclk);
-}
-
-static inline void amba_pclk_unprepare(struct amba_device *dev)
-{
- clk_unprepare(dev->pclk);
-}
-
/* Some drivers don't use the struct amba_device */
#define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
#define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index cce6136b300a..58cbe18d825c 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,6 +11,10 @@
void topology_normalize_cpu_scale(void);
int topology_update_cpu_topology(void);
+#ifdef CONFIG_ACPI_CPPC_LIB
+void topology_init_cpu_capacity_cppc(void);
+#endif
+
struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 63ccb5252190..220c8c60e021 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -92,6 +92,11 @@
ARM_SMCCC_SMC_32, \
0, 0x7fff)
+#define ARM_SMCCC_ARCH_WORKAROUND_3 \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, 0x3fff)
+
#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_32, \
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..14dc461b0e82 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 199e47e97d64..21292b5bbb55 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -324,12 +324,12 @@ enum {
ATA_LOG_NCQ_NON_DATA = 0x12,
ATA_LOG_NCQ_SEND_RECV = 0x13,
ATA_LOG_IDENTIFY_DEVICE = 0x30,
+ ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
/* Identify device log pages: */
ATA_LOG_SECURITY = 0x06,
ATA_LOG_SATA_SETTINGS = 0x08,
ATA_LOG_ZONED_INFORMATION = 0x09,
- ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
/* Identify device SATA settings log:*/
ATA_LOG_DEVSLP_OFFSET = 0x30,
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index a3dba31df01e..6db58d180866 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -151,7 +151,16 @@
static __always_inline int
arch_atomic_read_acquire(const atomic_t *v)
{
- return smp_load_acquire(&(v)->counter);
+ int ret;
+
+ if (__native_word(atomic_t)) {
+ ret = smp_load_acquire(&(v)->counter);
+ } else {
+ ret = arch_atomic_read(v);
+ __atomic_acquire_fence();
+ }
+
+ return ret;
}
#define arch_atomic_read_acquire arch_atomic_read_acquire
#endif
@@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v)
static __always_inline void
arch_atomic_set_release(atomic_t *v, int i)
{
- smp_store_release(&(v)->counter, i);
+ if (__native_word(atomic_t)) {
+ smp_store_release(&(v)->counter, i);
+ } else {
+ __atomic_release_fence();
+ arch_atomic_set(v, i);
+ }
}
#define arch_atomic_set_release arch_atomic_set_release
#endif
@@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v)
static __always_inline s64
arch_atomic64_read_acquire(const atomic64_t *v)
{
- return smp_load_acquire(&(v)->counter);
+ s64 ret;
+
+ if (__native_word(atomic64_t)) {
+ ret = smp_load_acquire(&(v)->counter);
+ } else {
+ ret = arch_atomic64_read(v);
+ __atomic_acquire_fence();
+ }
+
+ return ret;
}
#define arch_atomic64_read_acquire arch_atomic64_read_acquire
#endif
@@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v)
static __always_inline void
arch_atomic64_set_release(atomic64_t *v, s64 i)
{
- smp_store_release(&(v)->counter, i);
+ if (__native_word(atomic64_t)) {
+ smp_store_release(&(v)->counter, i);
+ } else {
+ __atomic_release_fence();
+ arch_atomic64_set(v, i);
+ }
}
#define arch_atomic64_set_release arch_atomic64_set_release
#endif
@@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
+// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 993c5628a726..e863c88df95f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -207,14 +207,6 @@ struct backing_dev_info {
#endif
};
-enum {
- BLK_RW_ASYNC = 0,
- BLK_RW_SYNC = 1,
-};
-
-void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
-void set_bdi_congested(struct backing_dev_info *bdi, int sync);
-
struct wb_lock_cookie {
bool locked;
unsigned long flags;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 483979c1b9f4..87ce24d238f3 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -135,13 +135,6 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb)
struct backing_dev_info *inode_to_bdi(struct inode *inode);
-static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
-{
- return wb->congested & cong_bits;
-}
-
-long congestion_wait(int sync, long timeout);
-
static inline bool mapping_can_writeback(struct address_space *mapping)
{
return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
@@ -162,7 +155,6 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
gfp_t gfp);
void wb_memcg_offline(struct mem_cgroup *memcg);
void wb_blkcg_offline(struct blkcg *blkcg);
-int inode_congested(struct inode *inode, int cong_bits);
/**
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -390,50 +382,8 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
{
}
-static inline int inode_congested(struct inode *inode, int cong_bits)
-{
- return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
-}
-
#endif /* CONFIG_CGROUP_WRITEBACK */
-static inline int inode_read_congested(struct inode *inode)
-{
- return inode_congested(inode, 1 << WB_sync_congested);
-}
-
-static inline int inode_write_congested(struct inode *inode)
-{
- return inode_congested(inode, 1 << WB_async_congested);
-}
-
-static inline int inode_rw_congested(struct inode *inode)
-{
- return inode_congested(inode, (1 << WB_sync_congested) |
- (1 << WB_async_congested));
-}
-
-static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
-{
- return wb_congested(&bdi->wb, cong_bits);
-}
-
-static inline int bdi_read_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, 1 << WB_sync_congested);
-}
-
-static inline int bdi_write_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, 1 << WB_async_congested);
-}
-
-static inline int bdi_rw_congested(struct backing_dev_info *bdi)
-{
- return bdi_congested(bdi, (1 << WB_sync_congested) |
- (1 << WB_async_congested));
-}
-
const char *bdi_dev_name(struct backing_dev_info *bdi);
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 049cf9421d83..3dc20c4f394c 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -8,6 +8,7 @@
#include <uapi/linux/binfmts.h>
struct filename;
+struct coredump_params;
#define CORENAME_MAX_SIZE 128
@@ -77,18 +78,6 @@ struct linux_binprm {
#define BINPRM_FLAGS_PRESERVE_ARGV0_BIT 3
#define BINPRM_FLAGS_PRESERVE_ARGV0 (1 << BINPRM_FLAGS_PRESERVE_ARGV0_BIT)
-/* Function parameter for binfmt->coredump */
-struct coredump_params {
- const kernel_siginfo_t *siginfo;
- struct pt_regs *regs;
- struct file *file;
- unsigned long limit;
- unsigned long mm_flags;
- loff_t written;
- loff_t pos;
- loff_t to_skip;
-};
-
/*
* This structure defines the functions that are used to load the binary formats that
* linux accepts.
@@ -98,8 +87,10 @@ struct linux_binfmt {
struct module *module;
int (*load_binary)(struct linux_binprm *);
int (*load_shlib)(struct file *);
+#ifdef CONFIG_COREDUMP
int (*core_dump)(struct coredump_params *cprm);
unsigned long min_coredump; /* minimal dump size */
+#endif
} __randomize_layout;
extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 117d7f248ac9..4c21f6e69e18 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -405,21 +405,25 @@ extern void bioset_exit(struct bio_set *);
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
-struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs,
- struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
- struct bio_set *bs);
+struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
+ unsigned int opf, gfp_t gfp_mask,
+ struct bio_set *bs);
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
+ unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
extern void bio_put(struct bio *);
-extern void __bio_clone_fast(struct bio *, struct bio *);
-extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+ gfp_t gfp, struct bio_set *bs);
+int bio_init_clone(struct block_device *bdev, struct bio *bio,
+ struct bio *bio_src, gfp_t gfp);
extern struct bio_set fs_bio_set;
-static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+static inline struct bio *bio_alloc(struct block_device *bdev,
+ unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
+ return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set);
}
void submit_bio(struct bio *bio);
@@ -454,10 +458,10 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
struct request_queue;
extern int submit_bio_wait(struct bio *bio);
-extern void bio_init(struct bio *bio, struct bio_vec *table,
- unsigned short max_vecs);
+void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
+ unsigned short max_vecs, unsigned int opf);
extern void bio_uninit(struct bio *);
-extern void bio_reset(struct bio *);
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf);
void bio_chain(struct bio *, struct bio *);
int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
@@ -487,8 +491,6 @@ static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
__bio_release_pages(bio, mark_dirty);
}
-extern const char *bio_devname(struct bio *bio, char *buffer);
-
#define bio_dev(bio) \
disk_devt((bio)->bi_bdev->bd_disk)
@@ -515,13 +517,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
bio_associate_blkg(bio);
}
-static inline void bio_copy_dev(struct bio *dst, struct bio *src)
-{
- bio_clear_flag(dst, BIO_REMAPPED);
- dst->bi_bdev = src->bi_bdev;
- bio_clone_blkg_association(dst, src);
-}
-
/*
* BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
*
@@ -790,6 +785,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
bio->bi_opf |= REQ_NOWAIT;
}
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
+ unsigned int nr_pages, unsigned int opf, gfp_t gfp);
#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index b4de2010fba5..f2ad8ed8f777 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -25,14 +25,8 @@
#include <linux/kthread.h>
#include <linux/fs.h>
-/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
-#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
-
-/* Max limits for throttle policy */
-#define THROTL_IOPS_MAX UINT_MAX
#define FC_APPID_LEN 129
-
#ifdef CONFIG_BLK_CGROUP
enum blkg_iostat_type {
@@ -44,6 +38,7 @@ enum blkg_iostat_type {
};
struct blkcg_gq;
+struct blkg_policy_data;
struct blkcg {
struct cgroup_subsys_state css;
@@ -76,36 +71,6 @@ struct blkg_iostat_set {
struct blkg_iostat last;
};
-/*
- * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
- * request_queue (q). This is used by blkcg policies which need to track
- * information per blkcg - q pair.
- *
- * There can be multiple active blkcg policies and each blkg:policy pair is
- * represented by a blkg_policy_data which is allocated and freed by each
- * policy's pd_alloc/free_fn() methods. A policy can allocate private data
- * area by allocating larger data structure which embeds blkg_policy_data
- * at the beginning.
- */
-struct blkg_policy_data {
- /* the blkg and policy id this per-policy data belongs to */
- struct blkcg_gq *blkg;
- int plid;
-};
-
-/*
- * Policies that need to keep per-blkcg data which is independent from any
- * request_queue associated to it should implement cpd_alloc/free_fn()
- * methods. A policy can allocate private data area by allocating larger
- * data structure which embeds blkcg_policy_data at the beginning.
- * cpd_init() is invoked to let each policy handle per-blkcg data.
- */
-struct blkcg_policy_data {
- /* the blkcg and policy id this per-policy data belongs to */
- struct blkcg *blkcg;
- int plid;
-};
-
/* association between a blk cgroup and a request queue */
struct blkcg_gq {
/* Pointer to the associated request_queue */
@@ -141,93 +106,11 @@ struct blkcg_gq {
struct rcu_head rcu_head;
};
-typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
-typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
- struct request_queue *q, struct blkcg *blkcg);
-typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
- struct seq_file *s);
-
-struct blkcg_policy {
- int plid;
- /* cgroup files for the policy */
- struct cftype *dfl_cftypes;
- struct cftype *legacy_cftypes;
-
- /* operations */
- blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
- blkcg_pol_init_cpd_fn *cpd_init_fn;
- blkcg_pol_free_cpd_fn *cpd_free_fn;
- blkcg_pol_bind_cpd_fn *cpd_bind_fn;
-
- blkcg_pol_alloc_pd_fn *pd_alloc_fn;
- blkcg_pol_init_pd_fn *pd_init_fn;
- blkcg_pol_online_pd_fn *pd_online_fn;
- blkcg_pol_offline_pd_fn *pd_offline_fn;
- blkcg_pol_free_pd_fn *pd_free_fn;
- blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
- blkcg_pol_stat_pd_fn *pd_stat_fn;
-};
-
-extern struct blkcg blkcg_root;
extern struct cgroup_subsys_state * const blkcg_root_css;
-extern bool blkcg_debug_stats;
-
-struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
- struct request_queue *q, bool update_hint);
-int blkcg_init_queue(struct request_queue *q);
-void blkcg_exit_queue(struct request_queue *q);
-
-/* Blkio controller policy registration */
-int blkcg_policy_register(struct blkcg_policy *pol);
-void blkcg_policy_unregister(struct blkcg_policy *pol);
-int blkcg_activate_policy(struct request_queue *q,
- const struct blkcg_policy *pol);
-void blkcg_deactivate_policy(struct request_queue *q,
- const struct blkcg_policy *pol);
-
-const char *blkg_dev_name(struct blkcg_gq *blkg);
-void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
- u64 (*prfill)(struct seq_file *,
- struct blkg_policy_data *, int),
- const struct blkcg_policy *pol, int data,
- bool show_total);
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
-
-struct blkg_conf_ctx {
- struct block_device *bdev;
- struct blkcg_gq *blkg;
- char *body;
-};
-
-struct block_device *blkcg_conf_open_bdev(char **inputp);
-int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- char *input, struct blkg_conf_ctx *ctx);
-void blkg_conf_finish(struct blkg_conf_ctx *ctx);
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
- struct cgroup_subsys_state *css;
-
- css = kthread_blkcg();
- if (css)
- return css;
- return task_css(current, io_cgrp_id);
-}
+void blkcg_destroy_blkgs(struct blkcg *blkcg);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
@@ -235,27 +118,6 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
}
/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use. The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio. This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it. However, the latter potentially gets
- * it from task_css(). This can race against task migration and the cgroup
- * dying. It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return css_to_blkcg(blkcg_css());
-}
-
-/**
* bio_blkcg - grab the blkcg associated with a bio
* @bio: target bio
*
@@ -291,22 +153,6 @@ static inline bool blk_cgroup_congested(void)
}
/**
- * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
- * @return: true if this bio needs to be submitted with the root blkg context.
- *
- * In order to avoid priority inversions we sometimes need to issue a bio as if
- * it were attached to the root blkg, and then backcharge to the actual owning
- * blkg. The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio. Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
- * backcharging to the originating cgroup once the io is complete.
- */
-static inline bool bio_issue_as_root_blkg(struct bio *bio)
-{
- return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
-}
-
-/**
* blkcg_parent - get the parent of a blkcg
* @blkcg: blkcg of interest
*
@@ -318,96 +164,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
}
/**
- * __blkg_lookup - internal version of blkg_lookup()
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- * @update_hint: whether to update lookup hint with the result or not
- *
- * This is internal version and shouldn't be used by policy
- * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
- * @q's bypass state. If @update_hint is %true, the caller should be
- * holding @q->queue_lock and lookup hint is updated on success.
- */
-static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
- struct request_queue *q,
- bool update_hint)
-{
- struct blkcg_gq *blkg;
-
- if (blkcg == &blkcg_root)
- return q->root_blkg;
-
- blkg = rcu_dereference(blkcg->blkg_hint);
- if (blkg && blkg->q == q)
- return blkg;
-
- return blkg_lookup_slowpath(blkcg, q, update_hint);
-}
-
-/**
- * blkg_lookup - lookup blkg for the specified blkcg - q pair
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- *
- * Lookup blkg for the @blkcg - @q pair. This function should be called
- * under RCU read lock.
- */
-static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
- struct request_queue *q)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return __blkg_lookup(blkcg, q, false);
-}
-
-/**
- * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
- * @q: request_queue of interest
- *
- * Lookup blkg for @q at the root level. See also blkg_lookup().
- */
-static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
-{
- return q->root_blkg;
-}
-
-/**
- * blkg_to_pdata - get policy private data
- * @blkg: blkg of interest
- * @pol: policy of interest
- *
- * Return pointer to private data associated with the @blkg-@pol pair.
- */
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol)
-{
- return blkg ? blkg->pd[pol->plid] : NULL;
-}
-
-static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
- struct blkcg_policy *pol)
-{
- return blkcg ? blkcg->cpd[pol->plid] : NULL;
-}
-
-/**
- * pdata_to_blkg - get blkg associated with policy private data
- * @pd: policy private data of interest
- *
- * @pd is policy private data. Determine the blkg it's associated with.
- */
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
-{
- return pd ? pd->blkg : NULL;
-}
-
-static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
-{
- return cpd ? cpd->blkcg : NULL;
-}
-
-extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
-
-/**
* blkcg_pin_online - pin online state
* @blkcg: blkcg of interest
*
@@ -439,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg)
} while (blkcg);
}
-/**
- * blkg_path - format cgroup path of blkg
- * @blkg: blkg of interest
- * @buf: target buffer
- * @buflen: target buffer length
- *
- * Format the path of the cgroup of @blkg into @buf.
- */
-static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
-{
- return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-}
-
-/**
- * blkg_get - get a blkg reference
- * @blkg: blkg to get
- *
- * The caller should be holding an existing reference.
- */
-static inline void blkg_get(struct blkcg_gq *blkg)
-{
- percpu_ref_get(&blkg->refcnt);
-}
-
-/**
- * blkg_tryget - try and get a blkg reference
- * @blkg: blkg to get
- *
- * This is for use when doing an RCU lookup of the blkg. We may be in the midst
- * of freeing this blkg, so we can only use it if the refcnt is not zero.
- */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
-{
- return blkg && percpu_ref_tryget(&blkg->refcnt);
-}
-
-/**
- * blkg_put - put a blkg reference
- * @blkg: blkg to put
- */
-static inline void blkg_put(struct blkcg_gq *blkg)
-{
- percpu_ref_put(&blkg->refcnt);
-}
-
-/**
- * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
- * read locked. If called under either blkcg or queue lock, the iteration
- * is guaranteed to include all and only online blkgs. The caller may
- * update @pos_css by calling css_rightmost_descendant() to skip subtree.
- * @p_blkg is included in the iteration and the first node to be visited.
- */
-#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
- css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
- if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
- (p_blkg)->q, false)))
-
-/**
- * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead. Synchronization rules are the same. @p_blkg is
- * included in the iteration and the last node to be visited.
- */
-#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
- css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
- if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
- (p_blkg)->q, false)))
-
-bool __blkcg_punt_bio_submit(struct bio *bio);
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio)
-{
- if (bio->bi_opf & REQ_CGROUP_PUNT)
- return __blkcg_punt_bio_submit(bio);
- else
- return false;
-}
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
-static inline void blkcg_use_delay(struct blkcg_gq *blkg)
-{
- if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
- return;
- if (atomic_add_return(1, &blkg->use_delay) == 1)
- atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
-}
-
-static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
-{
- int old = atomic_read(&blkg->use_delay);
-
- if (WARN_ON_ONCE(old < 0))
- return 0;
- if (old == 0)
- return 0;
-
- /*
- * We do this song and dance because we can race with somebody else
- * adding or removing delay. If we just did an atomic_dec we'd end up
- * negative and we'd already be in trouble. We need to subtract 1 and
- * then check to see if we were the last delay so we can drop the
- * congestion count on the cgroup.
- */
- while (old) {
- int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
- if (cur == old)
- break;
- old = cur;
- }
-
- if (old == 0)
- return 0;
- if (old == 1)
- atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
- return 1;
-}
-
-/**
- * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
- * @blkg: target blkg
- * @delay: delay duration in nsecs
- *
- * When enabled with this function, the delay is not decayed and must be
- * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
- * blkcg_[un]use_delay() and blkcg_add_delay() usages.
- */
-static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
-{
- int old = atomic_read(&blkg->use_delay);
-
- /* We only want 1 person setting the congestion count for this blkg. */
- if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
- atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
-
- atomic64_set(&blkg->delay_nsec, delay);
-}
-
-/**
- * blkcg_clear_delay - Disable allocator delay mechanism
- * @blkg: target blkg
- *
- * Disable use_delay mechanism. See blkcg_set_delay().
- */
-static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
-{
- int old = atomic_read(&blkg->use_delay);
-
- /* We only want 1 person clearing the congestion count for this blkg. */
- if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
- atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
-}
-
-void blk_cgroup_bio_start(struct bio *bio);
-void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
-void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
-void blkcg_maybe_throttle_current(void);
#else /* CONFIG_BLK_CGROUP */
struct blkcg {
};
-struct blkg_policy_data {
-};
-
-struct blkcg_policy_data {
-};
-
struct blkcg_gq {
};
-struct blkcg_policy {
-};
-
#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
static inline void blkcg_maybe_throttle_current(void) { }
static inline bool blk_cgroup_congested(void) { return false; }
#ifdef CONFIG_BLOCK
-
static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
-
-static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
-{ return NULL; }
-static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
-static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
-static inline int blkcg_activate_policy(struct request_queue *q,
- const struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_deactivate_policy(struct request_queue *q,
- const struct blkcg_policy *pol) { }
-
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
+#endif /* CONFIG_BLOCK */
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol) { return NULL; }
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
-static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
-static inline void blkg_get(struct blkcg_gq *blkg) { }
-static inline void blkg_put(struct blkcg_gq *blkg) { }
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
-static inline void blk_cgroup_bio_start(struct bio *bio) { }
-
-#define blk_queue_for_each_rl(rl, q) \
- for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-
-#endif /* CONFIG_BLOCK */
#endif /* CONFIG_BLK_CGROUP */
#ifdef CONFIG_BLK_CGROUP_FC_APPID
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d319ffa59354..7aa5c54901a9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -917,8 +917,7 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
}
#define queue_for_each_hw_ctx(q, hctx, i) \
- for ((i) = 0; (i) < (q)->nr_hw_queues && \
- ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
+ xa_for_each(&(q)->hctx_table, (i), (hctx))
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \
@@ -952,8 +951,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
void blk_rq_unprep_clone(struct request *rq);
-blk_status_t blk_insert_cloned_request(struct request_queue *q,
- struct request *rq);
+blk_status_t blk_insert_cloned_request(struct request *rq);
struct rq_map_data {
struct page **pages;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fe065c394fff..0c3563b45fe9 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t;
*/
#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
+/*
+ * BLK_STS_OFFLINE is returned from the driver when the target device is offline
+ * or is being taken offline. This could help differentiate the case where a
+ * device is intentionally being shut down from a real I/O error.
+ */
+#define BLK_STS_OFFLINE ((__force blk_status_t)17)
+
/**
* blk_path_error - returns true if error may be path related
* @error: status the request was completed with
@@ -317,7 +324,8 @@ enum {
BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
- BIO_TRACKED, /* set if bio goes through the rq_qos path */
+ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
+ BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9c95df26fc26..eb27312a1b8f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions Copyright (C) 1992 Drew Eckhardt
+ */
#ifndef _LINUX_BLKDEV_H
#define _LINUX_BLKDEV_H
-#include <linux/sched.h>
-#include <linux/genhd.h>
+#include <linux/types.h>
+#include <linux/blk_types.h>
+#include <linux/device.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/minmax.h>
@@ -12,11 +16,15 @@
#include <linux/wait.h>
#include <linux/bio.h>
#include <linux/gfp.h>
+#include <linux/kdev_t.h>
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
#include <linux/blkzoned.h>
+#include <linux/sched.h>
#include <linux/sbitmap.h>
#include <linux/srcu.h>
+#include <linux/uuid.h>
+#include <linux/xarray.h>
struct module;
struct request_queue;
@@ -33,6 +41,10 @@ struct blk_queue_stats;
struct blk_stat_callback;
struct blk_crypto_profile;
+extern const struct device_type disk_type;
+extern struct device_type part_type;
+extern struct class block_class;
+
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
@@ -45,6 +57,145 @@ struct blk_crypto_profile;
*/
#define BLKCG_MAX_POLS 6
+#define DISK_MAX_PARTS 256
+#define DISK_NAME_LEN 32
+
+#define PARTITION_META_INFO_VOLNAMELTH 64
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1)
+
+struct partition_meta_info {
+ char uuid[PARTITION_META_INFO_UUIDLTH];
+ u8 volname[PARTITION_META_INFO_VOLNAMELTH];
+};
+
+/**
+ * DOC: genhd capability flags
+ *
+ * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to
+ * removable media. When set, the device remains present even when media is not
+ * inserted. Shall not be set for devices which are removed entirely when the
+ * media is removed.
+ *
+ * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events,
+ * doesn't appear in sysfs, and can't be opened from userspace or using
+ * blkdev_get*. Used for the underlying components of multipath devices.
+ *
+ * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not
+ * scan for partitions from add_disk, and users can't add partitions manually.
+ *
+ */
+enum {
+ GENHD_FL_REMOVABLE = 1 << 0,
+ GENHD_FL_HIDDEN = 1 << 1,
+ GENHD_FL_NO_PART = 1 << 2,
+};
+
+enum {
+ DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
+ DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
+};
+
+enum {
+ /* Poll even if events_poll_msecs is unset */
+ DISK_EVENT_FLAG_POLL = 1 << 0,
+ /* Forward events to udev */
+ DISK_EVENT_FLAG_UEVENT = 1 << 1,
+ /* Block event polling when open for exclusive write */
+ DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2,
+};
+
+struct disk_events;
+struct badblocks;
+
+struct blk_integrity {
+ const struct blk_integrity_profile *profile;
+ unsigned char flags;
+ unsigned char tuple_size;
+ unsigned char interval_exp;
+ unsigned char tag_size;
+};
+
+struct gendisk {
+ /*
+ * major/first_minor/minors should not be set by any new driver, the
+ * block core will take care of allocating them automatically.
+ */
+ int major;
+ int first_minor;
+ int minors;
+
+ char disk_name[DISK_NAME_LEN]; /* name of major driver */
+
+ unsigned short events; /* supported events */
+ unsigned short event_flags; /* flags related to event processing */
+
+ struct xarray part_tbl;
+ struct block_device *part0;
+
+ const struct block_device_operations *fops;
+ struct request_queue *queue;
+ void *private_data;
+
+ int flags;
+ unsigned long state;
+#define GD_NEED_PART_SCAN 0
+#define GD_READ_ONLY 1
+#define GD_DEAD 2
+#define GD_NATIVE_CAPACITY 3
+#define GD_ADDED 4
+
+ struct mutex open_mutex; /* open/close mutex */
+ unsigned open_partitions; /* number of open partitions */
+
+ struct backing_dev_info *bdi;
+ struct kobject *slave_dir;
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+ struct list_head slave_bdevs;
+#endif
+ struct timer_rand_state *random;
+ atomic_t sync_io; /* RAID */
+ struct disk_events *ev;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ struct kobject integrity_kobj;
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#if IS_ENABLED(CONFIG_CDROM)
+ struct cdrom_device_info *cdi;
+#endif
+ int node_id;
+ struct badblocks *bb;
+ struct lockdep_map lockdep_map;
+ u64 diskseq;
+};
+
+static inline bool disk_live(struct gendisk *disk)
+{
+ return !inode_unhashed(disk->part0->bd_inode);
+}
+
+/*
+ * The gendisk is refcounted by the part0 block_device, and the bd_device
+ * therein is also used for device model presentation in sysfs.
+ */
+#define dev_to_disk(device) \
+ (dev_to_bdev(device)->bd_disk)
+#define disk_to_dev(disk) \
+ (&((disk)->part0->bd_device))
+
+#if IS_REACHABLE(CONFIG_CDROM)
+#define disk_to_cdi(disk) ((disk)->cdi)
+#else
+#define disk_to_cdi(disk) NULL
+#endif
+
+static inline dev_t disk_devt(struct gendisk *disk)
+{
+ return MKDEV(disk->major, disk->first_minor);
+}
+
static inline int blk_validate_block_size(unsigned long bsize)
{
if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
@@ -204,7 +355,7 @@ struct request_queue {
unsigned int queue_depth;
/* hw dispatch queues */
- struct blk_mq_hw_ctx **queue_hw_ctx;
+ struct xarray hctx_table;
unsigned int nr_hw_queues;
/*
@@ -262,6 +413,7 @@ struct request_queue {
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct blk_crypto_profile *crypto_profile;
+ struct kobject *crypto_kobject;
#endif
unsigned int rq_timeout;
@@ -596,6 +748,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
#define for_each_bio(_bio) \
for (; _bio; _bio = _bio->bi_next)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
+{
+ return device_add_disk(NULL, disk, NULL);
+}
+void del_gendisk(struct gendisk *gp);
+void invalidate_disk(struct gendisk *disk);
+void set_disk_ro(struct gendisk *disk, bool read_only);
+void disk_uevent(struct gendisk *disk, enum kobject_action action);
+
+static inline int get_disk_ro(struct gendisk *disk)
+{
+ return disk->part0->bd_read_only ||
+ test_bit(GD_READ_ONLY, &disk->state);
+}
+
+static inline int bdev_read_only(struct block_device *bdev)
+{
+ return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+}
+
+bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
+bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+
+void add_disk_randomness(struct gendisk *disk) __latent_entropy;
+void rand_initialize_disk(struct gendisk *disk);
+
+static inline sector_t get_start_sect(struct block_device *bdev)
+{
+ return bdev->bd_start_sect;
+}
+
+static inline sector_t bdev_nr_sectors(struct block_device *bdev)
+{
+ return bdev->bd_nr_sectors;
+}
+
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+{
+ return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT;
+}
+
+static inline sector_t get_capacity(struct gendisk *disk)
+{
+ return bdev_nr_sectors(disk->part0);
+}
+
+static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+{
+ return bdev_nr_sectors(sb->s_bdev) >>
+ (sb->s_blocksize_bits - SECTOR_SHIFT);
+}
+
+int bdev_disk_changed(struct gendisk *disk, bool invalidate);
+
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ struct lock_class_key *lkclass);
+void put_disk(struct gendisk *disk);
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
+
+/**
+ * blk_alloc_disk - allocate a gendisk structure
+ * @node_id: numa node to allocate on
+ *
+ * Allocate and pre-initialize a gendisk structure for use with BIO based
+ * drivers.
+ *
+ * Context: can sleep
+ */
+#define blk_alloc_disk(node_id) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __blk_alloc_disk(node_id, &__key); \
+})
+void blk_cleanup_disk(struct gendisk *disk);
+
+int __register_blkdev(unsigned int major, const char *name,
+ void (*probe)(dev_t devt));
+#define register_blkdev(major, name) \
+ __register_blkdev(major, name, NULL)
+void unregister_blkdev(unsigned int major, const char *name);
+
+bool bdev_check_media_change(struct block_device *bdev);
+int __invalidate_device(struct block_device *bdev, bool kill_dirty);
+void set_capacity(struct gendisk *disk, sector_t size);
+
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+int bd_register_pending_holders(struct gendisk *disk);
+#else
+static inline int bd_link_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ return 0;
+}
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+}
+static inline int bd_register_pending_holders(struct gendisk *disk)
+{
+ return 0;
+}
+#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
+
+dev_t part_devt(struct gendisk *disk, u8 partno);
+void inc_diskseq(struct gendisk *disk);
+dev_t blk_lookup_devt(const char *name, int partno);
+void blk_request_module(dev_t devt);
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
@@ -748,7 +1012,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
bool __must_check blk_get_queue(struct request_queue *);
extern void blk_put_queue(struct request_queue *);
-extern void blk_set_queue_dying(struct request_queue *);
+
+void blk_mark_disk_dead(struct gendisk *disk);
#ifdef CONFIG_BLOCK
/*
@@ -791,14 +1056,11 @@ extern void blk_start_plug(struct blk_plug *);
extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
extern void blk_finish_plug(struct blk_plug *);
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
-
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule);
+static inline void blk_flush_plug(struct blk_plug *plug, bool async)
{
- struct blk_plug *plug = tsk->plug;
-
- return plug &&
- (plug->mq_list || !list_empty(&plug->cb_list));
+ if (plug)
+ __blk_flush_plug(plug, async);
}
int blkdev_issue_flush(struct block_device *bdev);
@@ -824,11 +1086,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
{
}
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-{
- return false;
-}
-
static inline int blkdev_issue_flush(struct block_device *bdev)
{
return 0;
@@ -1210,6 +1467,7 @@ struct block_device_operations {
void (*unlock_native_capacity) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
int (*set_read_only)(struct block_device *bdev, bool ro);
+ void (*free_disk)(struct gendisk *disk);
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
@@ -1258,6 +1516,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
unsigned long start_time);
+void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev);
@@ -1265,7 +1524,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
/**
* bio_end_io_acct - end I/O accounting for bio based drivers
* @bio: bio to end account for
- * @start: start time returned by bio_start_io_acct()
+ * @start_time: start time returned by bio_start_io_acct()
*/
static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
{
@@ -1310,6 +1569,7 @@ void invalidate_bdev(struct block_device *bdev);
int sync_blockdev(struct block_device *bdev);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
+void printk_all_partitions(void);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
@@ -1325,7 +1585,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-#endif
+static inline void printk_all_partitions(void)
+{
+}
+#endif /* CONFIG_BLOCK */
+
int fsync_bdev(struct block_device *bdev);
int freeze_bdev(struct block_device *bdev);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fa517ae604ad..3121d1fc8e75 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -209,11 +209,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
- (struct bpf_spin_lock){};
+ memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
if (unlikely(map_value_has_timer(map)))
- *(struct bpf_timer *)(dst + map->timer_off) =
- (struct bpf_timer){};
+ memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
}
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
@@ -224,7 +222,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
if (unlikely(map_value_has_spin_lock(map))) {
s_off = map->spin_lock_off;
s_sz = sizeof(struct bpf_spin_lock);
- } else if (unlikely(map_value_has_timer(map))) {
+ }
+ if (unlikely(map_value_has_timer(map))) {
t_off = map->timer_off;
t_sz = sizeof(struct bpf_timer);
}
@@ -1793,6 +1792,11 @@ struct bpf_core_ctx {
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
int relo_idx, void *insn);
+static inline bool unprivileged_ebpf_enabled(void)
+{
+ return !sysctl_unprivileged_bpf_disabled;
+}
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2012,6 +2016,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
{
return NULL;
}
+
+static inline bool unprivileged_ebpf_enabled(void)
+{
+ return false;
+}
+
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 36f33685c8c0..bcb4fe9b8575 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -144,6 +144,7 @@ BUFFER_FNS(Defer_Completion, defer_completion)
((struct buffer_head *)page_private(page)); \
})
#define page_has_buffers(page) PagePrivate(page)
+#define folio_buffers(folio) folio_get_private(folio)
void buffer_check_dirty_writeback(struct page *page,
bool *dirty, bool *writeback);
@@ -216,16 +217,14 @@ extern int buffer_heads_over_limit;
* Generic address_space_operations implementations for buffer_head-backed
* address_spaces.
*/
-void block_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
+void block_invalidate_folio(struct folio *folio, size_t offset, size_t length);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc);
int __block_write_full_page(struct inode *inode, struct page *page,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler);
int block_read_full_page(struct page*, get_block_t*);
-int block_is_partially_uptodate(struct page *page, unsigned long from,
- unsigned long count);
+bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
unsigned flags, struct page **pagep, get_block_t *get_block);
int __block_write_begin(struct page *page, loff_t pos, unsigned len,
@@ -398,7 +397,7 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
}
-extern int __set_page_dirty_buffers(struct page *page);
+bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
#else /* CONFIG_BLOCK */
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
index fef8b607f97e..a6189d21f2ba 100644
--- a/include/linux/cacheflush.h
+++ b/include/linux/cacheflush.h
@@ -4,6 +4,8 @@
#include <asm/cacheflush.h>
+struct folio;
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
void flush_dcache_folio(struct folio *folio);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 6a89ea410e43..edf62eaa6285 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -35,6 +35,7 @@
#define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */
+#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index ff99ce094cfa..e7f2fb2fc207 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -383,6 +383,10 @@ struct ceph_connection_v2_info {
struct ceph_gcm_nonce in_gcm_nonce;
struct ceph_gcm_nonce out_gcm_nonce;
+ struct page **in_enc_pages;
+ int in_enc_page_cnt;
+ int in_enc_resid;
+ int in_enc_i;
struct page **out_enc_pages;
int out_enc_page_cnt;
int out_enc_resid;
@@ -457,6 +461,7 @@ struct ceph_connection {
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
+ struct page *bounce_page;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 75c151413fda..0d1ada8968d7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
+ rcu_read_lock_sched_held() || \
lockdep_is_held(&cgroup_mutex) || \
lockdep_is_held(&css_set_lock) || \
((task)->flags & PF_EXITING) || (__c))
@@ -791,11 +792,9 @@ static inline void cgroup_account_cputime(struct task_struct *task,
cpuacct_charge(task, delta_exec);
- rcu_read_lock();
cgrp = task_dfl_cgroup(task);
if (cgroup_parent(cgrp))
__cgroup_account_cputime(cgrp, delta_exec);
- rcu_read_unlock();
}
static inline void cgroup_account_cputime_field(struct task_struct *task,
@@ -806,11 +805,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
cpuacct_account_field(task, index, delta_exec);
- rcu_read_lock();
cgrp = task_dfl_cgroup(task);
if (cgroup_parent(cgrp))
__cgroup_account_cputime_field(cgrp, index, delta_exec);
- rcu_read_unlock();
}
#else /* CONFIG_CGROUPS */
diff --git a/include/linux/cgroup_api.h b/include/linux/cgroup_api.h
new file mode 100644
index 000000000000..d0cfe8025111
--- /dev/null
+++ b/include/linux/cgroup_api.h
@@ -0,0 +1 @@
+#include <linux/cgroup.h>
diff --git a/include/linux/cma.h b/include/linux/cma.h
index bd801023504b..90fd742fd1ef 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -20,6 +20,14 @@
#define CMA_MAX_NAME 64
+/*
+ * TODO: once the buddy -- especially pageblock merging and alloc_contig_range()
+ * -- can deal with only some pageblocks of a higher-order page being
+ * MIGRATE_CMA, we can use pageblock_nr_pages.
+ */
+#define CMA_MIN_ALIGNMENT_PAGES MAX_ORDER_NR_PAGES
+#define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES)
+
struct cma;
extern unsigned long totalcma_pages;
@@ -50,4 +58,6 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
+
+extern void cma_reserve_pages_on_error(struct cma *cma);
#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index ccbbd31b3aae..deff5b308470 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -97,6 +97,10 @@
#define KASAN_ABI_VERSION 4
#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#endif
+
#if __has_attribute(__no_sanitize_address__)
#define __no_sanitize_address __attribute__((no_sanitize_address))
#else
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 429dcebe2b99..0f7fd205ab7e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -117,14 +117,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define __stringify_label(n) #n
-#define __annotate_reachable(c) ({ \
- asm volatile(__stringify_label(c) ":\n\t" \
- ".pushsection .discard.reachable\n\t" \
- ".long " __stringify_label(c) "b - .\n\t" \
- ".popsection\n\t" : : "i" (c)); \
-})
-#define annotate_reachable() __annotate_reachable(__COUNTER__)
-
#define __annotate_unreachable(c) ({ \
asm volatile(__stringify_label(c) ":\n\t" \
".pushsection .discard.unreachable\n\t" \
@@ -133,24 +125,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
})
#define annotate_unreachable() __annotate_unreachable(__COUNTER__)
-#define ASM_UNREACHABLE \
- "999:\n\t" \
- ".pushsection .discard.unreachable\n\t" \
- ".long 999b - .\n\t" \
+#define ASM_REACHABLE \
+ "998:\n\t" \
+ ".pushsection .discard.reachable\n\t" \
+ ".long 998b - .\n\t" \
".popsection\n\t"
/* Annotate a C jump table to allow objtool to follow the code flow */
#define __annotate_jump_table __section(".rodata..c_jump_table")
#else
-#define annotate_reachable()
#define annotate_unreachable()
+# define ASM_REACHABLE
#define __annotate_jump_table
#endif
-#ifndef ASM_UNREACHABLE
-# define ASM_UNREACHABLE
-#endif
#ifndef unreachable
# define unreachable() do { \
annotate_unreachable(); \
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 248a68c668b4..08a1d3e7e46d 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -12,22 +12,34 @@ struct core_vma_metadata {
unsigned long start, end;
unsigned long flags;
unsigned long dump_size;
+ unsigned long pgoff;
+ struct file *file;
+};
+
+struct coredump_params {
+ const kernel_siginfo_t *siginfo;
+ struct pt_regs *regs;
+ struct file *file;
+ unsigned long limit;
+ unsigned long mm_flags;
+ loff_t written;
+ loff_t pos;
+ loff_t to_skip;
+ int vma_count;
+ size_t vma_data_size;
+ struct core_vma_metadata *vma_meta;
};
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
*/
-struct coredump_params;
extern void dump_skip_to(struct coredump_params *cprm, unsigned long to);
extern void dump_skip(struct coredump_params *cprm, size_t nr);
extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len);
-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
- struct core_vma_metadata **vma_meta,
- size_t *vma_data_size_ptr);
extern void do_coredump(const kernel_siginfo_t *siginfo);
#else
static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1ab29e61b078..35c7d6db4139 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -382,6 +382,9 @@ struct cpufreq_driver {
int (*suspend)(struct cpufreq_policy *policy);
int (*resume)(struct cpufreq_policy *policy);
+ /* Will be called after the driver is fully initialized */
+ void (*ready)(struct cpufreq_policy *policy);
+
struct freq_attr **attr;
/* platform specific boost support code */
@@ -658,6 +661,11 @@ struct gov_attr_set {
/* sysfs ops for cpufreq governors */
extern const struct sysfs_ops governor_sysfs_ops;
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+ return container_of(kobj, struct gov_attr_set, kobj);
+}
+
void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 411a428ace4d..c7dce7883179 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,6 +100,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD,
CPUHP_AP_DTPM_CPU_DEAD,
+ CPUHP_RANDOM_PREPARE,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,
@@ -231,6 +232,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+ CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
@@ -240,6 +242,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_CSKY_ONLINE,
CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
+ CPUHP_AP_RANDOM_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 64dae70d31f5..fe29ac7cc469 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus;
extern cpumask_t cpus_booted_once_mask;
-static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
+static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
{
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
WARN_ON_ONCE(cpu >= bits);
@@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
}
/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static __always_inline unsigned int cpumask_check(unsigned int cpu)
{
cpu_max_bits_warn(cpu, nr_cpumask_bits);
return cpu;
@@ -341,12 +341,12 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
* @cpu: cpu number (< nr_cpu_ids)
* @dstp: the cpumask pointer
*/
-static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
-static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
@@ -357,12 +357,12 @@ static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
* @cpu: cpu number (< nr_cpu_ids)
* @dstp: the cpumask pointer
*/
-static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+static __always_inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
{
clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
-static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
{
__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
@@ -374,7 +374,7 @@ static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
*
* Returns 1 if @cpu is set in @cpumask, else returns 0
*/
-static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
+static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
{
return test_bit(cpumask_check(cpu), cpumask_bits((cpumask)));
}
@@ -388,7 +388,7 @@ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
*
* test_and_set_bit wrapper for cpumasks.
*/
-static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
@@ -402,7 +402,7 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
*
* test_and_clear_bit wrapper for cpumasks.
*/
-static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
diff --git a/include/linux/cpumask_api.h b/include/linux/cpumask_api.h
new file mode 100644
index 000000000000..83bd3ebe82b0
--- /dev/null
+++ b/include/linux/cpumask_api.h
@@ -0,0 +1 @@
+#include <linux/cpumask.h>
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 855869e1fd32..2324ab6f1846 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -133,6 +133,15 @@
#define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000
/*
+ * Mark an algorithm as a service implementation only usable by a
+ * template and never by a normal user of the kernel crypto API.
+ * This is intended to be used by algorithms that are themselves
+ * not FIPS-approved but may instead be used to implement parts of
+ * a FIPS-approved algorithm (e.g., dh vs. ffdhe2048(dh)).
+ */
+#define CRYPTO_ALG_FIPS_INTERNAL 0x00020000
+
+/*
* Transform masks and values (for crt_flags).
*/
#define CRYPTO_TFM_NEED_KEY 0x00000001
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5e1e3a128b77..f23cbfa4248d 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -60,19 +60,18 @@ struct damon_region {
/**
* struct damon_target - Represents a monitoring target.
- * @id: Unique identifier for this target.
+ * @pid: The PID of the virtual address space to monitor.
* @nr_regions: Number of monitoring target regions of this target.
* @regions_list: Head of the monitoring target regions of this target.
* @list: List head for siblings.
*
* Each monitoring context could have multiple targets. For example, a context
* for virtual memory address spaces could have multiple target processes. The
- * @id of each target should be unique among the targets of the context. For
- * example, in the virtual address monitoring context, it could be a pidfd or
- * an address of an mm_struct.
+ * @pid should be set for appropriate &struct damon_operations including the
+ * virtual address spaces monitoring operations.
*/
struct damon_target {
- unsigned long id;
+ struct pid *pid;
unsigned int nr_regions;
struct list_head regions_list;
struct list_head list;
@@ -88,6 +87,7 @@ struct damon_target {
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
* @DAMOS_STAT: Do nothing but count the stat.
+ * @NR_DAMOS_ACTIONS: Total number of DAMOS actions
*/
enum damos_action {
DAMOS_WILLNEED,
@@ -96,6 +96,7 @@ enum damos_action {
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
DAMOS_STAT, /* Do nothing but only record the stat */
+ NR_DAMOS_ACTIONS,
};
/**
@@ -121,9 +122,9 @@ enum damos_action {
* uses smaller one as the effective quota.
*
* For selecting regions within the quota, DAMON prioritizes current scheme's
- * target memory regions using the &struct damon_primitive->get_scheme_score.
+ * target memory regions using the &struct damon_operations->get_scheme_score.
* You could customize the prioritization logic by setting &weight_sz,
- * &weight_nr_accesses, and &weight_age, because monitoring primitives are
+ * &weight_nr_accesses, and &weight_age, because monitoring operations are
* encouraged to respect those.
*/
struct damos_quota {
@@ -158,10 +159,12 @@ struct damos_quota {
*
* @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme.
* @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000].
+ * @NR_DAMOS_WMARK_METRICS: Total number of DAMOS watermark metrics
*/
enum damos_wmark_metric {
DAMOS_WMARK_NONE,
DAMOS_WMARK_FREE_MEM_RATE,
+ NR_DAMOS_WMARK_METRICS,
};
/**
@@ -254,13 +257,26 @@ struct damos {
struct list_head list;
};
+/**
+ * enum damon_ops_id - Identifier for each monitoring operations implementation
+ *
+ * @DAMON_OPS_VADDR: Monitoring operations for virtual address spaces
+ * @DAMON_OPS_PADDR: Monitoring operations for the physical address space
+ */
+enum damon_ops_id {
+ DAMON_OPS_VADDR,
+ DAMON_OPS_PADDR,
+ NR_DAMON_OPS,
+};
+
struct damon_ctx;
/**
- * struct damon_primitive - Monitoring primitives for given use cases.
+ * struct damon_operations - Monitoring operations for given use cases.
*
- * @init: Initialize primitive-internal data structures.
- * @update: Update primitive-internal data structures.
+ * @id: Identifier of this operations set.
+ * @init: Initialize operations-related data structures.
+ * @update: Update operations-related data structures.
* @prepare_access_checks: Prepare next access check of target regions.
* @check_accesses: Check the accesses to target regions.
* @reset_aggregated: Reset aggregated accesses monitoring results.
@@ -270,18 +286,20 @@ struct damon_ctx;
* @cleanup: Clean up the context.
*
* DAMON can be extended for various address spaces and usages. For this,
- * users should register the low level primitives for their target address
- * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread
+ * users should register the low level operations for their target address
+ * space and usecase via the &damon_ctx.ops. Then, the monitoring thread
* (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
- * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * the monitoring, @update after each &damon_ctx.ops_update_interval, and
* @check_accesses, @target_valid and @prepare_access_checks after each
* &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each
* &damon_ctx.aggr_interval.
*
- * @init should initialize primitive-internal data structures. For example,
+ * Each &struct damon_operations instance having valid @id can be registered
+ * via damon_register_ops() and selected by damon_select_ops() later.
+ * @init should initialize operations-related data structures. For example,
* this could be used to construct proper monitoring target regions and link
* those to @damon_ctx.adaptive_targets.
- * @update should update the primitive-internal data structures. For example,
+ * @update should update the operations-related data structures. For example,
* this could be used to update monitoring target regions for current status.
* @prepare_access_checks should manipulate the monitoring regions to be
* prepared for the next access check.
@@ -301,7 +319,8 @@ struct damon_ctx;
* monitoring.
* @cleanup is called from @kdamond just before its termination.
*/
-struct damon_primitive {
+struct damon_operations {
+ enum damon_ops_id id;
void (*init)(struct damon_ctx *context);
void (*update)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
@@ -355,15 +374,15 @@ struct damon_callback {
*
* @sample_interval: The time between access samplings.
* @aggr_interval: The time between monitor results aggregations.
- * @primitive_update_interval: The time between monitoring primitive updates.
+ * @ops_update_interval: The time between monitoring operations updates.
*
* For each @sample_interval, DAMON checks whether each region is accessed or
* not. It aggregates and keeps the access information (number of accesses to
* each region) for @aggr_interval time. DAMON also checks whether the target
* memory regions need update (e.g., by ``mmap()`` calls from the application,
* in case of virtual memory monitoring) and applies the changes for each
- * @primitive_update_interval. All time intervals are in micro-seconds.
- * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * @ops_update_interval. All time intervals are in micro-seconds.
+ * Please refer to &struct damon_operations and &struct damon_callback for more
* detail.
*
* @kdamond: Kernel thread who does the monitoring.
@@ -375,7 +394,7 @@ struct damon_callback {
*
* Once started, the monitoring thread runs until explicitly required to be
* terminated or every monitoring target is invalid. The validity of the
- * targets is checked via the &damon_primitive.target_valid of @primitive. The
+ * targets is checked via the &damon_operations.target_valid of @ops. The
* termination can also be explicitly requested by writing non-zero to
* @kdamond_stop. The thread sets @kdamond to NULL when it terminates.
* Therefore, users can know whether the monitoring is ongoing or terminated by
@@ -385,7 +404,7 @@ struct damon_callback {
* Note that the monitoring thread protects only @kdamond and @kdamond_stop via
* @kdamond_lock. Accesses to other fields must be protected by themselves.
*
- * @primitive: Set of monitoring primitives for given use cases.
+ * @ops: Set of monitoring operations for given use cases.
* @callback: Set of callbacks for monitoring events notifications.
*
* @min_nr_regions: The minimum number of adaptive monitoring regions.
@@ -396,17 +415,17 @@ struct damon_callback {
struct damon_ctx {
unsigned long sample_interval;
unsigned long aggr_interval;
- unsigned long primitive_update_interval;
+ unsigned long ops_update_interval;
/* private: internal use only */
struct timespec64 last_aggregation;
- struct timespec64 last_primitive_update;
+ struct timespec64 last_ops_update;
/* public: */
struct task_struct *kdamond;
struct mutex kdamond_lock;
- struct damon_primitive primitive;
+ struct damon_operations ops;
struct damon_callback callback;
unsigned long min_nr_regions;
@@ -475,7 +494,7 @@ struct damos *damon_new_scheme(
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
-struct damon_target *damon_new_target(unsigned long id);
+struct damon_target *damon_new_target(void);
void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
bool damon_targets_empty(struct damon_ctx *ctx);
void damon_free_target(struct damon_target *t);
@@ -484,28 +503,18 @@ unsigned int damon_nr_regions(struct damon_target *t);
struct damon_ctx *damon_new_ctx(void);
void damon_destroy_ctx(struct damon_ctx *ctx);
-int damon_set_targets(struct damon_ctx *ctx,
- unsigned long *ids, ssize_t nr_ids);
int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
- unsigned long aggr_int, unsigned long primitive_upd_int,
+ unsigned long aggr_int, unsigned long ops_upd_int,
unsigned long min_nr_reg, unsigned long max_nr_reg);
int damon_set_schemes(struct damon_ctx *ctx,
struct damos **schemes, ssize_t nr_schemes);
int damon_nr_running_ctxs(void);
+int damon_register_ops(struct damon_operations *ops);
+int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
-int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
#endif /* CONFIG_DAMON */
-#ifdef CONFIG_DAMON_VADDR
-bool damon_va_target_valid(void *t);
-void damon_va_set_primitives(struct damon_ctx *ctx);
-#endif /* CONFIG_DAMON_VADDR */
-
-#ifdef CONFIG_DAMON_PADDR
-bool damon_pa_target_valid(void *t);
-void damon_pa_set_primitives(struct damon_ctx *ctx);
-#endif /* CONFIG_DAMON_PADDR */
-
#endif /* _DAMON_H */
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index d37e5d06a357..a4a13514b730 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -32,28 +32,25 @@ struct dtpm_ops {
void (*release)(struct dtpm *);
};
-typedef int (*dtpm_init_t)(void);
+struct device_node;
-struct dtpm_descr {
- dtpm_init_t init;
+struct dtpm_subsys_ops {
+ const char *name;
+ int (*init)(void);
+ void (*exit)(void);
+ int (*setup)(struct dtpm *, struct device_node *);
};
-/* Init section thermal table */
-extern struct dtpm_descr __dtpm_table[];
-extern struct dtpm_descr __dtpm_table_end[];
-
-#define DTPM_TABLE_ENTRY(name, __init) \
- static struct dtpm_descr __dtpm_table_entry_##name \
- __used __section("__dtpm_table") = { \
- .init = __init, \
- }
-
-#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init)
+enum DTPM_NODE_TYPE {
+ DTPM_NODE_VIRTUAL = 0,
+ DTPM_NODE_DT,
+};
-#define for_each_dtpm_table(__dtpm) \
- for (__dtpm = __dtpm_table; \
- __dtpm < __dtpm_table_end; \
- __dtpm++)
+struct dtpm_node {
+ enum DTPM_NODE_TYPE type;
+ const char *name;
+ struct dtpm_node *parent;
+};
static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
{
@@ -70,4 +67,7 @@ void dtpm_unregister(struct dtpm *dtpm);
int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
+
+void dtpm_destroy_hierarchy(void);
#endif
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 746e081879a5..f8e206e82476 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
#endif
}
-#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
+#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS
/*
* These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
* extra segments containing the gate DSO contents. Dumping its
@@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void)
{
return 0;
}
-#endif
+#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */
#endif /* _LINUX_ELFCORE_H */
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 2e2b8d6140ed..141952f4fee8 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -454,10 +454,21 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
*
* Conditional reschedule with additional sanity checks.
*/
-void irqentry_exit_cond_resched(void);
+void raw_irqentry_exit_cond_resched(void);
#ifdef CONFIG_PREEMPT_DYNAMIC
-DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
+#define irqentry_exit_cond_resched_dynamic_disabled NULL
+DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
+#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+void dynamic_irqentry_exit_cond_resched(void);
+#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
#endif
+#else /* CONFIG_PREEMPT_DYNAMIC */
+#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
+#endif /* CONFIG_PREEMPT_DYNAMIC */
/**
* irqentry_exit - Handle return from exception that used irqentry_enter()
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index a26f37a27167..11efc45de66a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -111,7 +111,7 @@ struct ethtool_link_ext_state_info {
enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity;
enum ethtool_link_ext_substate_cable_issue cable_issue;
enum ethtool_link_ext_substate_module module;
- u8 __link_ext_substate;
+ u32 __link_ext_substate;
};
};
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index e525f6957c49..2d04f6448cde 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -64,6 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
struct kmem_cache;
+bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3da95842b207..02f362c661c8 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -262,7 +262,7 @@ struct fb_ops {
/* Draws a rectangle */
void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
- /* Copy data from area to another. Obsolete. */
+ /* Copy data from area to another */
void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
/* Draws a image to the display */
void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 907cb01890cf..f6783f58c64a 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -93,6 +93,7 @@ enum pm_api_id {
PM_FPGA_LOAD = 22,
PM_FPGA_GET_STATUS = 23,
PM_GET_CHIPID = 24,
+ PM_SECURE_SHA = 26,
PM_PINCTRL_REQUEST = 28,
PM_PINCTRL_RELEASE = 29,
PM_PINCTRL_GET_FUNCTION = 30,
@@ -427,6 +428,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
const u32 qos,
const enum zynqmp_pm_request_ack ack);
int zynqmp_pm_aes_engine(const u64 address, u32 *out);
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags);
int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags);
int zynqmp_pm_fpga_get_status(u32 *value);
int zynqmp_pm_write_ggs(u32 index, u32 value);
@@ -601,6 +603,12 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out)
return -ENODEV;
}
+static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size,
+ const u32 flags)
+{
+ return -ENODEV;
+}
+
static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size,
const u32 flags)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f3daaea16554..da3fc3cd2428 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -42,6 +42,7 @@
#include <linux/mount.h>
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
+#include <linux/slab.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -367,8 +368,8 @@ struct address_space_operations {
/* Write back some dirty pages from this mapping. */
int (*writepages)(struct address_space *, struct writeback_control *);
- /* Set a page dirty. Return true if this dirtied it */
- int (*set_page_dirty)(struct page *page);
+ /* Mark a folio dirty. Return true if this dirtied it */
+ bool (*dirty_folio)(struct address_space *, struct folio *);
/*
* Reads in the requested pages. Unlike ->readpage(), this is
@@ -387,7 +388,7 @@ struct address_space_operations {
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
sector_t (*bmap)(struct address_space *, sector_t);
- void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+ void (*invalidate_folio) (struct folio *, size_t offset, size_t len);
int (*releasepage) (struct page *, gfp_t);
void (*freepage)(struct page *);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
@@ -399,9 +400,9 @@ struct address_space_operations {
struct page *, struct page *, enum migrate_mode);
bool (*isolate_page)(struct page *, isolate_mode_t);
void (*putback_page)(struct page *);
- int (*launder_page) (struct page *);
- int (*is_partially_uptodate) (struct page *, unsigned long,
- unsigned long);
+ int (*launder_folio)(struct folio *);
+ bool (*is_partially_uptodate) (struct folio *, size_t from,
+ size_t count);
void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page)(struct address_space *, struct page *);
@@ -930,10 +931,15 @@ struct fown_struct {
* struct file_ra_state - Track a file's readahead state.
* @start: Where the most recent readahead started.
* @size: Number of pages read in the most recent readahead.
- * @async_size: Start next readahead when this many pages are left.
- * @ra_pages: Maximum size of a readahead request.
+ * @async_size: Numer of pages that were/are not needed immediately
+ * and so were/are genuinely "ahead". Start next readahead when
+ * the first of these pages is accessed.
+ * @ra_pages: Maximum size of a readahead request, copied from the bdi.
* @mmap_miss: How many mmap accesses missed in the page cache.
* @prev_pos: The last byte in the most recent read request.
+ *
+ * When this structure is passed to ->readahead(), the "most recent"
+ * readahead means the current readahead.
*/
struct file_ra_state {
pgoff_t start;
@@ -1435,6 +1441,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
+#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
/* Possible states of 'frozen' field */
enum {
@@ -1483,7 +1490,7 @@ struct super_block {
#ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
#endif
-#ifdef CONFIG_UNICODE
+#if IS_ENABLED(CONFIG_UNICODE)
struct unicode_map *s_encoding;
__u16 s_encoding_flags;
#endif
@@ -2746,54 +2753,6 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
-unsigned long invalidate_mapping_pages(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
-
-void invalidate_mapping_pagevec(struct address_space *mapping,
- pgoff_t start, pgoff_t end,
- unsigned long *nr_pagevec);
-
-static inline void invalidate_remote_inode(struct inode *inode)
-{
- if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode))
- invalidate_mapping_pages(inode->i_mapping, 0, -1);
-}
-extern int invalidate_inode_pages2(struct address_space *mapping);
-extern int invalidate_inode_pages2_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
-extern int write_inode_now(struct inode *, int);
-extern int filemap_fdatawrite(struct address_space *);
-extern int filemap_flush(struct address_space *);
-extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
-extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
- loff_t lend);
-extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte);
-
-static inline int filemap_fdatawait(struct address_space *mapping)
-{
- return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
-}
-
-extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
- loff_t lend);
-extern int filemap_write_and_wait_range(struct address_space *mapping,
- loff_t lstart, loff_t lend);
-extern int __filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end, int sync_mode);
-extern int filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end);
-extern int filemap_check_errors(struct address_space *mapping);
-extern void __filemap_set_wb_err(struct address_space *mapping, int err);
-int filemap_fdatawrite_wbc(struct address_space *mapping,
- struct writeback_control *wbc);
-
-static inline int filemap_write_and_wait(struct address_space *mapping)
-{
- return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
-}
-
extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
loff_t lend);
extern int __must_check file_check_and_advance_wb_err(struct file *file);
@@ -2805,67 +2764,6 @@ static inline int file_write_and_wait(struct file *file)
return file_write_and_wait_range(file, 0, LLONG_MAX);
}
-/**
- * filemap_set_wb_err - set a writeback error on an address_space
- * @mapping: mapping in which to set writeback error
- * @err: error to be set in mapping
- *
- * When writeback fails in some way, we must record that error so that
- * userspace can be informed when fsync and the like are called. We endeavor
- * to report errors on any file that was open at the time of the error. Some
- * internal callers also need to know when writeback errors have occurred.
- *
- * When a writeback error occurs, most filesystems will want to call
- * filemap_set_wb_err to record the error in the mapping so that it will be
- * automatically reported whenever fsync is called on the file.
- */
-static inline void filemap_set_wb_err(struct address_space *mapping, int err)
-{
- /* Fastpath for common case of no error */
- if (unlikely(err))
- __filemap_set_wb_err(mapping, err);
-}
-
-/**
- * filemap_check_wb_err - has an error occurred since the mark was sampled?
- * @mapping: mapping to check for writeback errors
- * @since: previously-sampled errseq_t
- *
- * Grab the errseq_t value from the mapping, and see if it has changed "since"
- * the given value was sampled.
- *
- * If it has then report the latest error set, otherwise return 0.
- */
-static inline int filemap_check_wb_err(struct address_space *mapping,
- errseq_t since)
-{
- return errseq_check(&mapping->wb_err, since);
-}
-
-/**
- * filemap_sample_wb_err - sample the current errseq_t to test for later errors
- * @mapping: mapping to be sampled
- *
- * Writeback errors are always reported relative to a particular sample point
- * in the past. This function provides those sample points.
- */
-static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
-{
- return errseq_sample(&mapping->wb_err);
-}
-
-/**
- * file_sample_sb_err - sample the current errseq_t to test for later errors
- * @file: file pointer to be sampled
- *
- * Grab the most current superblock-level errseq_t value for the given
- * struct file.
- */
-static inline errseq_t file_sample_sb_err(struct file *file)
-{
- return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
-}
-
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);
@@ -3108,6 +3006,16 @@ extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_privs(struct file *);
+/*
+ * This must be used for allocating filesystems specific inodes to set
+ * up the inode reclaim context correctly.
+ */
+static inline void *
+alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp)
+{
+ return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp);
+}
+
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
{
@@ -3130,6 +3038,7 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
extern int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count);
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
@@ -3173,6 +3082,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
+int rw_verify_area(int, struct file *, const loff_t *, size_t);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
extern int stream_open(struct inode * inode, struct file * filp);
@@ -3322,8 +3232,6 @@ extern int simple_rename(struct user_namespace *, struct inode *,
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
extern int noop_fsync(struct file *, loff_t, loff_t, int);
-extern void noop_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
extern int simple_write_begin(struct file *file, struct address_space *mapping,
@@ -3608,15 +3516,4 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
int advice);
-/*
- * Flush file data before changing attributes. Caller must hold any locks
- * required to prevent further writes to this file until we're done setting
- * flags.
- */
-static inline int inode_drain_writes(struct inode *inode)
-{
- inode_dio_wait(inode);
- return filemap_write_and_wait(inode->i_mapping);
-}
-
#endif /* _LINUX_FS_H */
diff --git a/include/linux/fs_api.h b/include/linux/fs_api.h
new file mode 100644
index 000000000000..83be38d6d413
--- /dev/null
+++ b/include/linux/fs_api.h
@@ -0,0 +1 @@
+#include <linux/fs.h>
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 296c5f1d9f35..d44ff747a657 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -616,9 +616,11 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
}
#if __fscache_available
-extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie);
+bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
+ struct fscache_cookie *cookie);
#else
-#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE)))
+#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \
+ filemap_dirty_folio(MAPPING, FOLIO)
#endif
/**
@@ -626,7 +628,7 @@ extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cook
* @wbc: The writeback control
* @cookie: The cookie referring to the cache object
*
- * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is
+ * Unpin the writeback resources pinned by fscache_dirty_folio(). This is
* intended to be called by the netfs's ->write_inode() method.
*/
static inline void fscache_unpin_writeback(struct writeback_control *wbc,
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 91ea9477e9bd..50d92d805bd8 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -714,6 +714,10 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
bool fscrypt_mergeable_bio_bh(struct bio *bio,
const struct buffer_head *next_bh);
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter);
+
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks);
+
#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
@@ -742,6 +746,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
{
return true;
}
+
+static inline bool fscrypt_dio_supported(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ const struct inode *inode = file_inode(iocb->ki_filp);
+
+ return !fscrypt_needs_contents_encryption(inode);
+}
+
+static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk,
+ u64 nr_blocks)
+{
+ return nr_blocks;
+}
#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
/**
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 3a2d7dc3c607..bb8467cd11ae 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -225,16 +225,53 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
}
/*
+ * fsnotify_delete - @dentry was unlinked and unhashed
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ *
+ * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode
+ * as this may be called after d_delete() and old_dentry may be negative.
+ */
+static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
+ struct dentry *dentry)
+{
+ __u32 mask = FS_DELETE;
+
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_ISDIR;
+
+ fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name,
+ 0);
+}
+
+/**
+ * d_delete_notify - delete a dentry and call fsnotify_delete()
+ * @dentry: The dentry to delete
+ *
+ * This helper is used to guaranty that the unlinked inode cannot be found
+ * by lookup of this name after fsnotify_delete() event has been delivered.
+ */
+static inline void d_delete_notify(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ ihold(inode);
+ d_delete(dentry);
+ fsnotify_delete(dir, inode, dentry);
+ iput(inode);
+}
+
+/*
* fsnotify_unlink - 'name' was unlinked
*
* Caller must make sure that dentry->d_name is stable.
*/
static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
{
- /* Expected to be called before d_delete() */
- WARN_ON_ONCE(d_is_negative(dentry));
+ if (WARN_ON_ONCE(d_is_negative(dentry)))
+ return;
- fsnotify_dirent(dir, dentry, FS_DELETE);
+ fsnotify_delete(dir, d_inode(dentry), dentry);
}
/*
@@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
*/
static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry)
{
- /* Expected to be called before d_delete() */
- WARN_ON_ONCE(d_is_negative(dentry));
+ if (WARN_ON_ONCE(d_is_negative(dentry)))
+ return;
- fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR);
+ fsnotify_delete(dir, d_inode(dentry), dentry);
}
/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
deleted file mode 100644
index 6906a45bc761..000000000000
--- a/include/linux/genhd.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_GENHD_H
-#define _LINUX_GENHD_H
-
-/*
- * genhd.h Copyright (C) 1992 Drew Eckhardt
- * Generic hard disk header file by
- * Drew Eckhardt
- *
- * <drew@colorado.edu>
- */
-
-#include <linux/types.h>
-#include <linux/kdev_t.h>
-#include <linux/uuid.h>
-#include <linux/blk_types.h>
-#include <linux/device.h>
-#include <linux/xarray.h>
-
-extern const struct device_type disk_type;
-extern struct device_type part_type;
-extern struct class block_class;
-
-#define DISK_MAX_PARTS 256
-#define DISK_NAME_LEN 32
-
-#define PARTITION_META_INFO_VOLNAMELTH 64
-/*
- * Enough for the string representation of any kind of UUID plus NULL.
- * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
- */
-#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1)
-
-struct partition_meta_info {
- char uuid[PARTITION_META_INFO_UUIDLTH];
- u8 volname[PARTITION_META_INFO_VOLNAMELTH];
-};
-
-/**
- * DOC: genhd capability flags
- *
- * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to
- * removable media. When set, the device remains present even when media is not
- * inserted. Shall not be set for devices which are removed entirely when the
- * media is removed.
- *
- * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events,
- * doesn't appear in sysfs, and can't be opened from userspace or using
- * blkdev_get*. Used for the underlying components of multipath devices.
- *
- * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not
- * scan for partitions from add_disk, and users can't add partitions manually.
- *
- */
-enum {
- GENHD_FL_REMOVABLE = 1 << 0,
- GENHD_FL_HIDDEN = 1 << 1,
- GENHD_FL_NO_PART = 1 << 2,
-};
-
-enum {
- DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
- DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
-};
-
-enum {
- /* Poll even if events_poll_msecs is unset */
- DISK_EVENT_FLAG_POLL = 1 << 0,
- /* Forward events to udev */
- DISK_EVENT_FLAG_UEVENT = 1 << 1,
- /* Block event polling when open for exclusive write */
- DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2,
-};
-
-struct disk_events;
-struct badblocks;
-
-struct blk_integrity {
- const struct blk_integrity_profile *profile;
- unsigned char flags;
- unsigned char tuple_size;
- unsigned char interval_exp;
- unsigned char tag_size;
-};
-
-struct gendisk {
- /*
- * major/first_minor/minors should not be set by any new driver, the
- * block core will take care of allocating them automatically.
- */
- int major;
- int first_minor;
- int minors;
-
- char disk_name[DISK_NAME_LEN]; /* name of major driver */
-
- unsigned short events; /* supported events */
- unsigned short event_flags; /* flags related to event processing */
-
- struct xarray part_tbl;
- struct block_device *part0;
-
- const struct block_device_operations *fops;
- struct request_queue *queue;
- void *private_data;
-
- int flags;
- unsigned long state;
-#define GD_NEED_PART_SCAN 0
-#define GD_READ_ONLY 1
-#define GD_DEAD 2
-#define GD_NATIVE_CAPACITY 3
-
- struct mutex open_mutex; /* open/close mutex */
- unsigned open_partitions; /* number of open partitions */
-
- struct backing_dev_info *bdi;
- struct kobject *slave_dir;
-#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
- struct list_head slave_bdevs;
-#endif
- struct timer_rand_state *random;
- atomic_t sync_io; /* RAID */
- struct disk_events *ev;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- struct kobject integrity_kobj;
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-#if IS_ENABLED(CONFIG_CDROM)
- struct cdrom_device_info *cdi;
-#endif
- int node_id;
- struct badblocks *bb;
- struct lockdep_map lockdep_map;
- u64 diskseq;
-};
-
-static inline bool disk_live(struct gendisk *disk)
-{
- return !inode_unhashed(disk->part0->bd_inode);
-}
-
-/*
- * The gendisk is refcounted by the part0 block_device, and the bd_device
- * therein is also used for device model presentation in sysfs.
- */
-#define dev_to_disk(device) \
- (dev_to_bdev(device)->bd_disk)
-#define disk_to_dev(disk) \
- (&((disk)->part0->bd_device))
-
-#if IS_REACHABLE(CONFIG_CDROM)
-#define disk_to_cdi(disk) ((disk)->cdi)
-#else
-#define disk_to_cdi(disk) NULL
-#endif
-
-static inline dev_t disk_devt(struct gendisk *disk)
-{
- return MKDEV(disk->major, disk->first_minor);
-}
-
-void disk_uevent(struct gendisk *disk, enum kobject_action action);
-
-/* block/genhd.c */
-int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
- const struct attribute_group **groups);
-static inline int __must_check add_disk(struct gendisk *disk)
-{
- return device_add_disk(NULL, disk, NULL);
-}
-extern void del_gendisk(struct gendisk *gp);
-
-void invalidate_disk(struct gendisk *disk);
-
-void set_disk_ro(struct gendisk *disk, bool read_only);
-
-static inline int get_disk_ro(struct gendisk *disk)
-{
- return disk->part0->bd_read_only ||
- test_bit(GD_READ_ONLY, &disk->state);
-}
-
-static inline int bdev_read_only(struct block_device *bdev)
-{
- return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
-}
-
-extern void disk_block_events(struct gendisk *disk);
-extern void disk_unblock_events(struct gendisk *disk);
-extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
-bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
-
-/* drivers/char/random.c */
-extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
-extern void rand_initialize_disk(struct gendisk *disk);
-
-static inline sector_t get_start_sect(struct block_device *bdev)
-{
- return bdev->bd_start_sect;
-}
-
-static inline sector_t bdev_nr_sectors(struct block_device *bdev)
-{
- return bdev->bd_nr_sectors;
-}
-
-static inline loff_t bdev_nr_bytes(struct block_device *bdev)
-{
- return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT;
-}
-
-static inline sector_t get_capacity(struct gendisk *disk)
-{
- return bdev_nr_sectors(disk->part0);
-}
-
-static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
-{
- return bdev_nr_sectors(sb->s_bdev) >>
- (sb->s_blocksize_bits - SECTOR_SHIFT);
-}
-
-int bdev_disk_changed(struct gendisk *disk, bool invalidate);
-void blk_drop_partitions(struct gendisk *disk);
-
-struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
- struct lock_class_key *lkclass);
-extern void put_disk(struct gendisk *disk);
-struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
-
-/**
- * blk_alloc_disk - allocate a gendisk structure
- * @node_id: numa node to allocate on
- *
- * Allocate and pre-initialize a gendisk structure for use with BIO based
- * drivers.
- *
- * Context: can sleep
- */
-#define blk_alloc_disk(node_id) \
-({ \
- static struct lock_class_key __key; \
- \
- __blk_alloc_disk(node_id, &__key); \
-})
-void blk_cleanup_disk(struct gendisk *disk);
-
-int __register_blkdev(unsigned int major, const char *name,
- void (*probe)(dev_t devt));
-#define register_blkdev(major, name) \
- __register_blkdev(major, name, NULL)
-void unregister_blkdev(unsigned int major, const char *name);
-
-bool bdev_check_media_change(struct block_device *bdev);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
-void set_capacity(struct gendisk *disk, sector_t size);
-
-#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
-int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-int bd_register_pending_holders(struct gendisk *disk);
-#else
-static inline int bd_link_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
- return 0;
-}
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
-}
-static inline int bd_register_pending_holders(struct gendisk *disk)
-{
- return 0;
-}
-#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
-
-dev_t part_devt(struct gendisk *disk, u8 partno);
-void inc_diskseq(struct gendisk *disk);
-dev_t blk_lookup_devt(const char *name, int partno);
-void blk_request_module(dev_t devt);
-#ifdef CONFIG_BLOCK
-void printk_all_partitions(void);
-#else /* CONFIG_BLOCK */
-static inline void printk_all_partitions(void)
-{
-}
-#endif /* CONFIG_BLOCK */
-
-#endif /* _LINUX_GENHD_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 80f63c862be5..20f6fbe12993 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -79,7 +79,7 @@ struct vm_area_struct;
* DOC: Page mobility and placement hints
*
* Page mobility and placement hints
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ---------------------------------
*
* These flags provide hints about how mobile the page is. Pages with similar
* mobility are placed within the same pageblocks to minimise problems due
@@ -112,7 +112,7 @@ struct vm_area_struct;
* DOC: Watermark modifiers
*
* Watermark modifiers -- controls access to emergency reserves
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ------------------------------------------------------------
*
* %__GFP_HIGH indicates that the caller is high-priority and that granting
* the request is necessary before the system can make forward progress.
@@ -144,7 +144,7 @@ struct vm_area_struct;
* DOC: Reclaim modifiers
*
* Reclaim modifiers
- * ~~~~~~~~~~~~~~~~~
+ * -----------------
* Please note that all the following flags are only applicable to sleepable
* allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
*
@@ -224,7 +224,7 @@ struct vm_area_struct;
* DOC: Action modifiers
*
* Action modifiers
- * ~~~~~~~~~~~~~~~~
+ * ----------------
*
* %__GFP_NOWARN suppresses allocation failure reports.
*
@@ -256,7 +256,7 @@ struct vm_area_struct;
* DOC: Useful GFP flag combinations
*
* Useful GFP flag combinations
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * ----------------------------
*
* Useful GFP flag combinations that are commonly used. It is recommended
* that subsystems start with one of these combinations and then set/clear
diff --git a/include/linux/gfp_api.h b/include/linux/gfp_api.h
new file mode 100644
index 000000000000..5a05a2764a86
--- /dev/null
+++ b/include/linux/gfp_api.h
@@ -0,0 +1 @@
+#include <linux/gfp.h>
diff --git a/include/linux/hashtable_api.h b/include/linux/hashtable_api.h
new file mode 100644
index 000000000000..c268ac2c5c0e
--- /dev/null
+++ b/include/linux/hashtable_api.h
@@ -0,0 +1 @@
+#include <linux/hashtable.h>
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 0a0b2b09b1b8..a77be5630209 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -246,6 +246,16 @@ do { \
__kunmap_atomic(__addr); \
} while (0)
+/**
+ * kunmap_local - Unmap a page mapped via kmap_local_page().
+ * @__addr: An address within the page mapped
+ *
+ * @__addr can be any address within the mapped page. Commonly it is the
+ * address return from kmap_local_page(), but it can also include offsets.
+ *
+ * Unmapping should be done in the reverse order of the mapping. See
+ * kmap_local_page() for details.
+ */
#define kunmap_local(__addr) \
do { \
BUILD_BUG_ON(__same_type((__addr), struct page *)); \
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 2fd2e91d5107..d5a6f101f843 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -9,14 +9,9 @@
#ifndef LINUX_HMM_H
#define LINUX_HMM_H
-#include <linux/kconfig.h>
-#include <linux/pgtable.h>
+#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/migrate.h>
-#include <linux/memremap.h>
-#include <linux/completion.h>
-#include <linux/mmu_notifier.h>
+struct mmu_interval_notifier;
/*
* On output:
diff --git a/include/linux/hrtimer_api.h b/include/linux/hrtimer_api.h
new file mode 100644
index 000000000000..8d9700894468
--- /dev/null
+++ b/include/linux/hrtimer_api.h
@@ -0,0 +1 @@
+#include <linux/hrtimer.h>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e4c18ba8d3bf..0734aff8fa19 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -185,7 +185,7 @@ void prep_transhuge_page(struct page *page);
void free_transhuge_page(struct page *page);
bool is_transparent_hugepage(struct page *page);
-bool can_split_huge_page(struct page *page, int *pextra_pins);
+bool can_split_folio(struct folio *folio, int *pextra_pins);
int split_huge_page_to_list(struct page *page, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
@@ -194,7 +194,7 @@ static inline int split_huge_page(struct page *page)
void deferred_split_huge_page(struct page *page);
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze, struct page *page);
+ unsigned long address, bool freeze, struct folio *folio);
#define split_huge_pmd(__vma, __pmd, __address) \
do { \
@@ -207,7 +207,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
- bool freeze, struct page *page);
+ bool freeze, struct folio *folio);
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
unsigned long address);
@@ -251,30 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
}
/**
- * thp_order - Order of a transparent huge page.
- * @page: Head page of a transparent huge page.
- */
-static inline unsigned int thp_order(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- if (PageHead(page))
- return HPAGE_PMD_ORDER;
- return 0;
-}
-
-/**
- * thp_nr_pages - The number of regular pages in this huge page.
- * @page: The head page of a huge page.
- */
-static inline int thp_nr_pages(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- if (PageHead(page))
- return HPAGE_PMD_NR;
- return 1;
-}
-
-/**
* folio_test_pmd_mappable - Can we map this folio with a PMD?
* @folio: The folio to test
*/
@@ -336,18 +312,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
-static inline unsigned int thp_order(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return 0;
-}
-
-static inline int thp_nr_pages(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return 1;
-}
-
static inline bool folio_test_pmd_mappable(struct folio *folio)
{
return false;
@@ -387,7 +351,7 @@ static inline bool is_transparent_hugepage(struct page *page)
#define thp_get_unmapped_area NULL
static inline bool
-can_split_huge_page(struct page *page, int *pextra_pins)
+can_split_folio(struct folio *folio, int *pextra_pins)
{
BUILD_BUG();
return false;
@@ -406,9 +370,9 @@ static inline void deferred_split_huge_page(struct page *page) {}
do { } while (0)
static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze, struct page *page) {}
+ unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
- unsigned long address, bool freeze, struct page *page) {}
+ unsigned long address, bool freeze, struct folio *folio) {}
#define split_huge_pud(__vma, __pmd, __address) \
do { } while (0)
@@ -483,15 +447,10 @@ static inline bool thp_migration_supported(void)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/**
- * thp_size - Size of a transparent huge page.
- * @page: Head page of a transparent huge page.
- *
- * Return: Number of bytes in this page.
- */
-static inline unsigned long thp_size(struct page *page)
+static inline int split_folio_to_list(struct folio *folio,
+ struct list_head *list)
{
- return PAGE_SIZE << thp_order(page);
+ return split_huge_page_to_list(&folio->page, list);
}
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d1897a69c540..53c1b6082a4c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -754,7 +754,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { }
static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
vm_flags_t flags)
{
- return entry;
+ return pte_mkhuge(entry);
}
#endif
@@ -970,6 +970,11 @@ static inline struct hstate *page_hstate(struct page *page)
return NULL;
}
+static inline struct hstate *size_to_hstate(unsigned long size)
+{
+ return NULL;
+}
+
static inline unsigned long huge_page_size(struct hstate *h)
{
return PAGE_SIZE;
@@ -1075,12 +1080,6 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
}
#endif /* CONFIG_HUGETLB_PAGE */
-#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
-extern bool hugetlb_free_vmemmap_enabled;
-#else
-#define hugetlb_free_vmemmap_enabled false
-#endif
-
static inline spinlock_t *huge_pte_lock(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 8e6dd908da21..aa1d4da03538 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
/** Unregister a Hardware Random Number Generator driver. */
extern void hwrng_unregister(struct hwrng *rng);
extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
-/** Feed random bits into the pool. */
-extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
#endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index fad1f1df26df..eba380b76d15 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -332,12 +332,14 @@ enum hwmon_pwm_attributes {
hwmon_pwm_enable,
hwmon_pwm_mode,
hwmon_pwm_freq,
+ hwmon_pwm_auto_channels_temp,
};
#define HWMON_PWM_INPUT BIT(hwmon_pwm_input)
#define HWMON_PWM_ENABLE BIT(hwmon_pwm_enable)
#define HWMON_PWM_MODE BIT(hwmon_pwm_mode)
#define HWMON_PWM_FREQ BIT(hwmon_pwm_freq)
+#define HWMON_PWM_AUTO_CHANNELS_TEMP BIT(hwmon_pwm_auto_channels_temp)
enum hwmon_intrusion_attributes {
hwmon_intrusion_alarm,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f565a8938836..fe2e0179ed51 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1262,6 +1262,7 @@ struct hv_device {
struct vmbus_channel *channel;
struct kset *channels_kset;
struct device_dma_parameters dma_parms;
+ u64 dma_mask;
/* place holder to keep track of the dir for hv device in debugfs */
struct dentry *debug_dir;
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index b712217f7030..1ed52441972f 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
case ARPHRD_VOID:
case ARPHRD_NONE:
case ARPHRD_RAWIP:
+ case ARPHRD_PIMREG:
return false;
default:
return true;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8420fe504927..2be4dd7e90a9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -46,8 +46,10 @@ struct vlan_hdr {
* @h_vlan_encapsulated_proto: packet type ID or len
*/
struct vlan_ethhdr {
- unsigned char h_dest[ETH_ALEN];
- unsigned char h_source[ETH_ALEN];
+ struct_group(addrs,
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ );
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9367f1cb2e3c..f40754caaefa 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -579,7 +579,16 @@ enum
NR_SOFTIRQS
};
-#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
+/*
+ * The following vectors can be safely ignored after ksoftirqd is parked:
+ *
+ * _ RCU:
+ * 1) rcutree_migrate_callbacks() migrates the queue.
+ * 2) rcu_report_dead() reports the final quiescent states.
+ *
+ * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
+ */
+#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
/* map softirq index to softirq name. update 'softirq_to_name' in
* kernel/softirq.c when adding a new softirq.
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 649a4d7c241b..1814e698d861 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -9,11 +9,14 @@
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
+void io_uring_unreg_ringfd(void);
static inline void io_uring_files_cancel(void)
{
- if (current->io_uring)
+ if (current->io_uring) {
+ io_uring_unreg_ringfd();
__io_uring_cancel(false);
+ }
}
static inline void io_uring_task_cancel(void)
{
diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index e9dacd4b9f6b..af1c9d62e642 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -34,13 +34,16 @@ struct ioasid_allocator_ops {
#if IS_ENABLED(CONFIG_IOASID)
ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
void *private);
-void ioasid_get(ioasid_t ioasid);
-bool ioasid_put(ioasid_t ioasid);
+void ioasid_free(ioasid_t ioasid);
void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
bool (*getter)(void *));
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data);
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+ return ioasid != INVALID_IOASID;
+}
#else /* !CONFIG_IOASID */
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
return INVALID_IOASID;
}
-static inline void ioasid_get(ioasid_t ioasid)
-{
-}
-
-static inline bool ioasid_put(ioasid_t ioasid)
-{
- return false;
-}
+static inline void ioasid_free(ioasid_t ioasid) { }
static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
bool (*getter)(void *))
@@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
return -ENOTSUPP;
}
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+ return false;
+}
+
#endif /* CONFIG_IOASID */
#endif /* __LINUX_IOASID_H */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index b55bd49e55f5..b76f0dd149fb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -227,12 +227,9 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
int iomap_readpage(struct page *page, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
-int iomap_is_partially_uptodate(struct page *page, unsigned long from,
- unsigned long count);
+bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
int iomap_releasepage(struct page *page, gfp_t gfp_mask);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
-void iomap_invalidatepage(struct page *page, unsigned int offset,
- unsigned int len);
#ifdef CONFIG_MIGRATION
int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode);
@@ -263,9 +260,11 @@ struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
+ u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */
+ sector_t io_sector; /* start sector of ioend */
struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */
};
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 848e1e12c5c6..f92788ccdba2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
/**
* struct irq_chip - hardware interrupt chip descriptor
*
- * @parent_device: pointer to parent device for irqchip
* @name: name for /proc/interrupts
* @irq_startup: start up the interrupt (defaults to ->enable if NULL)
* @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL)
@@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* @flags: chip specific flags
*/
struct irq_chip {
- struct device *parent_device;
const char *name;
unsigned int (*irq_startup)(struct irq_data *data);
void (*irq_shutdown)(struct irq_data *data);
@@ -712,10 +710,11 @@ extern struct irq_chip no_irq_chip;
extern struct irq_chip dummy_irq_chip;
extern void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
irq_flow_handler_t handle, const char *name);
-static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+static inline void irq_set_chip_and_handler(unsigned int irq,
+ const struct irq_chip *chip,
irq_flow_handler_t handle)
{
irq_set_chip_and_handler_name(irq, chip, handle, NULL);
@@ -805,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
}
/* Set/get chip/data for an IRQ: */
-extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
+extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip);
extern int irq_set_handler_data(unsigned int irq, void *data);
extern int irq_set_chip_data(unsigned int irq, void *data);
extern int irq_set_irq_type(unsigned int irq, unsigned int type);
diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h
deleted file mode 100644
index a978fc8c7996..000000000000
--- a/include/linux/irqchip/versatile-fpga.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PLAT_FPGA_IRQ_H
-#define PLAT_FPGA_IRQ_H
-
-struct device_node;
-struct pt_regs;
-
-void fpga_handle_irq(struct pt_regs *regs);
-void fpga_irq_init(void __iomem *, const char *, int, int, u32,
- struct device_node *node);
-int fpga_irq_of_init(struct device_node *node,
- struct device_node *parent);
-
-#endif
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 93d270ca0c56..a77584593f7d 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
int handle_irq_desc(struct irq_desc *desc);
int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
#ifdef CONFIG_IRQ_DOMAIN
/*
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d476405802e9..00d577f90883 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,6 +151,8 @@ struct irq_domain_chip_generic;
* @gc: Pointer to a list of generic chips. There is a helper function for
* setting up one or more generic chips for interrupt controllers
* drivers using the generic chip library which uses this pointer.
+ * @dev: Pointer to a device that the domain represent, and that will be
+ * used for power management purposes.
* @parent: Pointer to parent irq_domain to support hierarchy irq_domains
*
* Revmap data, used internally by irq_domain
@@ -171,6 +173,7 @@ struct irq_domain {
struct fwnode_handle *fwnode;
enum irq_domain_bus_token bus_token;
struct irq_domain_chip_generic *gc;
+ struct device *dev;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
struct irq_domain *parent;
#endif
@@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
return to_of_node(d->fwnode);
}
+static inline void irq_domain_set_pm_device(struct irq_domain *d,
+ struct device *dev)
+{
+ if (d)
+ d->dev = dev;
+}
+
#ifdef CONFIG_IRQ_DOMAIN
struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
const char *name, phys_addr_t *pa);
@@ -469,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
unsigned int virq);
extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq,
+ const struct irq_chip *chip,
void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name);
extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
@@ -512,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
unsigned int virq,
irq_hw_number_t hwirq,
- struct irq_chip *chip,
+ const struct irq_chip *chip,
void *chip_data);
extern void irq_domain_free_irqs_common(struct irq_domain *domain,
unsigned int virq,
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index fd933c45281a..de9536680b2b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
* ->j_list_lock
*
* j_state_lock
- * ->t_handle_lock
- *
- * j_state_lock
* ->j_list_lock (journal_unmap_buffer)
*
*/
@@ -594,7 +591,7 @@ struct transaction_s
*/
unsigned long t_log_start;
- /*
+ /*
* Number of buffers on the t_buffers list [j_list_lock, no locks
* needed for jbd2 thread]
*/
@@ -1295,7 +1292,7 @@ struct journal_s
* Clean-up after fast commit or full commit. JBD2 calls this function
* after every commit operation.
*/
- void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
/**
* @j_fc_replay_callback:
@@ -1419,9 +1416,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
extern bool __jbd2_journal_refile_buffer(struct journal_head *);
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_free_buffer(struct journal_head *bh);
extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_clean_data_list(transaction_t *transaction);
static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
{
list_add_tail(&bh->b_assoc_buffers, head);
@@ -1486,9 +1481,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out,
sector_t blocknr);
-/* Transaction locking */
-extern void __wait_on_journal (journal_t *);
-
/* Transaction cache support */
extern void jbd2_journal_destroy_transaction_cache(void);
extern int __init jbd2_journal_init_transaction_cache(void);
@@ -1535,14 +1527,16 @@ void jbd2_journal_set_triggers(struct buffer_head *,
struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
-extern int jbd2_journal_invalidatepage(journal_t *,
- struct page *, unsigned int, unsigned int);
+int jbd2_journal_invalidate_folio(journal_t *, struct folio *,
+ size_t offset, size_t length);
extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
extern int jbd2_journal_stop(handle_t *);
extern int jbd2_journal_flush(journal_t *journal, unsigned int flags);
extern void jbd2_journal_lock_updates (journal_t *);
extern void jbd2_journal_unlock_updates (journal_t *);
+void jbd2_journal_wait_updates(journal_t *);
+
extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
unsigned long long start, int len, int bsize);
@@ -1774,8 +1768,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
#define BJ_Reserved 4 /* Buffer is reserved for access by journal */
#define BJ_Types 5
-extern int jbd_blocks_per_page(struct inode *inode);
-
/* JBD uses a CRC32 checksum */
#define JBD_MAX_CHECKSUM_SIZE 4
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 48b9b2a82767..107751cc047b 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -82,10 +82,9 @@ extern bool static_key_initialized;
"%s(): static key '%pS' used before call to jump_label_init()", \
__func__, (key))
-#ifdef CONFIG_JUMP_LABEL
-
struct static_key {
atomic_t enabled;
+#ifdef CONFIG_JUMP_LABEL
/*
* Note:
* To make anonymous unions work with old compilers, the static
@@ -104,13 +103,9 @@ struct static_key {
struct jump_entry *entries;
struct static_key_mod *next;
};
+#endif /* CONFIG_JUMP_LABEL */
};
-#else
-struct static_key {
- atomic_t enabled;
-};
-#endif /* CONFIG_JUMP_LABEL */
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_JUMP_LABEL
@@ -251,10 +246,10 @@ extern void static_key_disable_cpuslocked(struct static_key *key);
*/
#define STATIC_KEY_INIT_TRUE \
{ .enabled = { 1 }, \
- { .entries = (void *)JUMP_TYPE_TRUE } }
+ { .type = JUMP_TYPE_TRUE } }
#define STATIC_KEY_INIT_FALSE \
{ .enabled = { 0 }, \
- { .entries = (void *)JUMP_TYPE_FALSE } }
+ { .type = JUMP_TYPE_FALSE } }
#else /* !CONFIG_JUMP_LABEL */
diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h
new file mode 100644
index 000000000000..6f612d69ea0c
--- /dev/null
+++ b/include/linux/kasan-enabled.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_ENABLED_H
+#define _LINUX_KASAN_ENABLED_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_KASAN_HW_TAGS
+
+DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
+
+static __always_inline bool kasan_enabled(void)
+{
+ return static_branch_likely(&kasan_flag_enabled);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+ return kasan_enabled();
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool kasan_enabled(void)
+{
+ return IS_ENABLED(CONFIG_KASAN);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#endif /* LINUX_KASAN_ENABLED_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4a45562d8893..b6a93261c92a 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -3,6 +3,7 @@
#define _LINUX_KASAN_H
#include <linux/bug.h>
+#include <linux/kasan-enabled.h>
#include <linux/kernel.h>
#include <linux/static_key.h>
#include <linux/types.h>
@@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {}
#ifdef CONFIG_KASAN_HW_TAGS
-DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
-
-static __always_inline bool kasan_enabled(void)
-{
- return static_branch_likely(&kasan_flag_enabled);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
- return kasan_enabled();
-}
-
void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags);
void kasan_free_pages(struct page *page, unsigned int order);
#else /* CONFIG_KASAN_HW_TAGS */
-static inline bool kasan_enabled(void)
-{
- return IS_ENABLED(CONFIG_KASAN);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
- return false;
-}
-
static __always_inline void kasan_alloc_pages(struct page *page,
unsigned int order, gfp_t flags)
{
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 33f47a996513..a890428bcc1a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -99,7 +99,7 @@ struct user;
extern int __cond_resched(void);
# define might_resched() __cond_resched()
-#elif defined(CONFIG_PREEMPT_DYNAMIC)
+#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
extern int __cond_resched(void);
@@ -110,6 +110,11 @@ static __always_inline void might_resched(void)
static_call_mod(might_resched)();
}
+#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+
+extern int dynamic_might_resched(void);
+# define might_resched() dynamic_might_resched()
+
#else
# define might_resched() do { } while (0)
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 4b5e3679a72c..f49e64222628 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -17,6 +17,8 @@
#include <linux/atomic.h>
#include <linux/static_key.h>
+extern unsigned long kfence_sample_interval;
+
/*
* We allocate an even number of pages, as it simplifies calculations to map
* address to metadata indices; effectively, the very first page serves as an
diff --git a/include/linux/kobject_api.h b/include/linux/kobject_api.h
new file mode 100644
index 000000000000..6e36a054c2d6
--- /dev/null
+++ b/include/linux/kobject_api.h
@@ -0,0 +1 @@
+#include <linux/kobject.h>
diff --git a/include/linux/kref_api.h b/include/linux/kref_api.h
new file mode 100644
index 000000000000..d67e554721d2
--- /dev/null
+++ b/include/linux/kref_api.h
@@ -0,0 +1 @@
+#include <linux/kref.h>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index a38a5bca1ba5..0630e545f4cb 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -51,7 +51,7 @@ static inline void ksm_exit(struct mm_struct *mm)
struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
-void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
+void rmap_walk_ksm(struct folio *folio, const struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
#else /* !CONFIG_KSM */
@@ -78,8 +78,8 @@ static inline struct page *ksm_might_need_to_copy(struct page *page,
return page;
}
-static inline void rmap_walk_ksm(struct page *page,
- struct rmap_walk_control *rwc)
+static inline void rmap_walk_ksm(struct folio *folio,
+ const struct rmap_walk_control *rwc)
{
}
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 3df4ea04716f..de5d75bafd66 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -141,12 +141,6 @@ struct kthread_delayed_work {
struct timer_list timer;
};
-#define KTHREAD_WORKER_INIT(worker) { \
- .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \
- .work_list = LIST_HEAD_INIT((worker).work_list), \
- .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\
- }
-
#define KTHREAD_WORK_INIT(work, fn) { \
.node = LIST_HEAD_INIT((work).node), \
.func = (fn), \
@@ -158,9 +152,6 @@ struct kthread_delayed_work {
TIMER_IRQSAFE), \
}
-#define DEFINE_KTHREAD_WORKER(worker) \
- struct kthread_worker worker = KTHREAD_WORKER_INIT(worker)
-
#define DEFINE_KTHREAD_WORK(work, fn) \
struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
@@ -168,19 +159,6 @@ struct kthread_delayed_work {
struct kthread_delayed_work dwork = \
KTHREAD_DELAYED_WORK_INIT(dwork, fn)
-/*
- * kthread_worker.lock needs its own lockdep class key when defined on
- * stack with lockdep enabled. Use the following macros in such cases.
- */
-#ifdef CONFIG_LOCKDEP
-# define KTHREAD_WORKER_INIT_ONSTACK(worker) \
- ({ kthread_init_worker(&worker); worker; })
-# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \
- struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
-#else
-# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
-#endif
-
extern void __kthread_init_worker(struct kthread_worker *worker,
const char *name, struct lock_class_key *key);
diff --git a/include/linux/ktime_api.h b/include/linux/ktime_api.h
new file mode 100644
index 000000000000..f697d493960f
--- /dev/null
+++ b/include/linux/ktime_api.h
@@ -0,0 +1 @@
+#include <linux/ktime.h>
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 06912d6b39d0..f11039944c08 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -29,7 +29,9 @@
#include <linux/refcount.h>
#include <linux/nospec.h>
#include <linux/notifier.h>
+#include <linux/ftrace.h>
#include <linux/hashtable.h>
+#include <linux/instrumentation.h>
#include <linux/interval_tree.h>
#include <linux/rbtree.h>
#include <linux/xarray.h>
@@ -368,8 +370,11 @@ struct kvm_vcpu {
u64 last_used_slot_gen;
};
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+/*
+ * Start accounting time towards a guest.
+ * Must be called before entering guest context.
+ */
+static __always_inline void guest_timing_enter_irqoff(void)
{
/*
* This is running in ioctl context so its safe to assume that it's the
@@ -378,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void)
instrumentation_begin();
vtime_account_guest_enter();
instrumentation_end();
+}
+/*
+ * Enter guest context and enter an RCU extended quiescent state.
+ *
+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ * unsafe to use any code which may directly or indirectly use RCU, tracing
+ * (including IRQ flag tracing), or lockdep. All code in this period must be
+ * non-instrumentable.
+ */
+static __always_inline void guest_context_enter_irqoff(void)
+{
/*
* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
@@ -394,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void)
}
}
-static __always_inline void guest_exit_irqoff(void)
+/*
+ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
+ * guest_state_enter_irqoff().
+ */
+static __always_inline void guest_enter_irqoff(void)
+{
+ guest_timing_enter_irqoff();
+ guest_context_enter_irqoff();
+}
+
+/**
+ * guest_state_enter_irqoff - Fixup state when entering a guest
+ *
+ * Entry to a guest will enable interrupts, but the kernel state is interrupts
+ * disabled when this is invoked. Also tell RCU about it.
+ *
+ * 1) Trace interrupts on state
+ * 2) Invoke context tracking if enabled to adjust RCU state
+ * 3) Tell lockdep that interrupts are enabled
+ *
+ * Invoked from architecture specific code before entering a guest.
+ * Must be called with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke guest_timing_enter_irqoff() before this.
+ *
+ * Note: this is analogous to exit_to_user_mode().
+ */
+static __always_inline void guest_state_enter_irqoff(void)
+{
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ instrumentation_end();
+
+ guest_context_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+/*
+ * Exit guest context and exit an RCU extended quiescent state.
+ *
+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ * unsafe to use any code which may directly or indirectly use RCU, tracing
+ * (including IRQ flag tracing), or lockdep. All code in this period must be
+ * non-instrumentable.
+ */
+static __always_inline void guest_context_exit_irqoff(void)
{
context_tracking_guest_exit();
+}
+/*
+ * Stop accounting time towards a guest.
+ * Must be called after exiting guest context.
+ */
+static __always_inline void guest_timing_exit_irqoff(void)
+{
instrumentation_begin();
/* Flush the guest cputime we spent on the guest */
vtime_account_guest_exit();
instrumentation_end();
}
+/*
+ * Deprecated. Architectures should move to guest_state_exit_irqoff() and
+ * guest_timing_exit_irqoff().
+ */
+static __always_inline void guest_exit_irqoff(void)
+{
+ guest_context_exit_irqoff();
+ guest_timing_exit_irqoff();
+}
+
static inline void guest_exit(void)
{
unsigned long flags;
@@ -413,6 +492,33 @@ static inline void guest_exit(void)
local_irq_restore(flags);
}
+/**
+ * guest_state_exit_irqoff - Establish state when returning from guest mode
+ *
+ * Entry from a guest disables interrupts, but guest mode is traced as
+ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
+ *
+ * 1) Tell lockdep that interrupts are disabled
+ * 2) Invoke context tracking if enabled to reactivate RCU
+ * 3) Trace interrupts off state
+ *
+ * Invoked from architecture specific code after exiting a guest.
+ * Must be invoked with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke guest_timing_exit_irqoff() after this.
+ *
+ * Note: this is analogous to enter_from_user_mode().
+ */
+static __always_inline void guest_state_exit_irqoff(void)
+{
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ guest_context_exit_irqoff();
+
+ instrumentation_begin();
+ trace_hardirqs_off_finish();
+ instrumentation_end();
+}
+
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
/*
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 605756f645be..7f99b4d78822 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -380,6 +380,7 @@ enum {
ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */
ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */
ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */
+ ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */
/* DMA mask for user DMA control: User visible values; DO NOT
renumber */
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index dbf8506decca..acb1ad2356f1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -165,7 +165,18 @@
#ifndef SYM_END
#define SYM_END(name, sym_type) \
.type name sym_type ASM_NL \
- .size name, .-name
+ .set .L__sym_size_##name, .-name ASM_NL \
+ .size name, .L__sym_size_##name
+#endif
+
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage) \
+ linkage(alias) ASM_NL \
+ .set alias, name ASM_NL \
+ .type alias sym_type ASM_NL \
+ .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \
+ .size alias, .L__sym_size_##alias
#endif
/* === code annotations === */
@@ -200,30 +211,8 @@
SYM_ENTRY(name, linkage, SYM_A_NONE)
#endif
-/*
- * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one
- * function
- */
-#ifndef SYM_FUNC_START_LOCAL_ALIAS
-#define SYM_FUNC_START_LOCAL_ALIAS(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
-#endif
-
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
-#endif
-
/* SYM_FUNC_START -- use for global functions */
#ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
#define SYM_FUNC_START(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
@@ -236,7 +225,6 @@
/* SYM_FUNC_START_LOCAL -- use for local functions */
#ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
#endif
@@ -259,22 +247,39 @@
SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
#endif
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name) \
- SYM_END(name, SYM_T_FUNC)
-#endif
-
/*
* SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
* SYM_FUNC_START_WEAK, ...
*/
#ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_END(name) \
SYM_END(name, SYM_T_FUNC)
#endif
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
/* SYM_CODE_START -- use for non-C (special) functions */
#ifndef SYM_CODE_START
#define SYM_CODE_START(name) \
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 1b5fceb565df..b35968ee9fb5 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/shrinker.h>
+#include <linux/xarray.h>
struct mem_cgroup;
@@ -33,8 +34,8 @@ struct list_lru_one {
struct list_lru_memcg {
struct rcu_head rcu;
- /* array of per cgroup lists, indexed by memcg_cache_id */
- struct list_lru_one *lru[];
+ /* array of per cgroup per node lists, indexed by node id */
+ struct list_lru_one node[];
};
struct list_lru_node {
@@ -42,11 +43,7 @@ struct list_lru_node {
spinlock_t lock;
/* global list, used for the root cgroup in cgroup aware lrus */
struct list_lru_one lru;
-#ifdef CONFIG_MEMCG_KMEM
- /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
- struct list_lru_memcg __rcu *memcg_lrus;
-#endif
- long nr_items;
+ long nr_items;
} ____cacheline_aligned_in_smp;
struct list_lru {
@@ -55,6 +52,7 @@ struct list_lru {
struct list_head list;
int shrinker_id;
bool memcg_aware;
+ struct xarray xa;
#endif
};
@@ -69,8 +67,9 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
#define list_lru_init_memcg(lru, shrinker) \
__list_lru_init((lru), true, NULL, shrinker)
-int memcg_update_all_list_lrus(int num_memcgs);
-void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg);
+int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
+ gfp_t gfp);
+void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
/**
* list_lru_add: add an element to the lru list's tail
diff --git a/include/linux/llist_api.h b/include/linux/llist_api.h
new file mode 100644
index 000000000000..625bec0393a1
--- /dev/null
+++ b/include/linux/llist_api.h
@@ -0,0 +1 @@
+#include <linux/llist.h>
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index 975e33b793a7..6d635e8306d6 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l)
}
#else /* CONFIG_DEBUG_LOCK_ALLOC */
# define LOCAL_LOCK_DEBUG_INIT(lockname)
-static inline void local_lock_acquire(local_lock_t *l) { }
-static inline void local_lock_release(local_lock_t *l) { }
-static inline void local_lock_debug_init(local_lock_t *l) { }
+# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0)
+# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0)
+# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0)
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) }
diff --git a/include/linux/lockdep_api.h b/include/linux/lockdep_api.h
new file mode 100644
index 000000000000..907e66979ab2
--- /dev/null
+++ b/include/linux/lockdep_api.h
@@ -0,0 +1 @@
+#include <linux/lockdep.h>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index a5a724c308d8..db924fe379c9 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -80,7 +80,7 @@ LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb,
unsigned long *set_kern_flags)
LSM_HOOK(int, 0, move_mount, const struct path *from_path,
const struct path *to_path)
-LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,
+LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry,
int mode, const struct qstr *name, const char **xattr_name,
void **ctx, u32 *ctxlen)
LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
@@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
struct sockaddr *address, int addrlen)
LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
struct sock *sk, struct sock *newsk)
+LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc,
+ struct sk_buff *skb)
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 3bf5c658bc44..419b5febc3ca 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1046,6 +1046,11 @@
* @asoc pointer to current sctp association structure.
* @sk pointer to current sock structure.
* @newsk pointer to new sock structure.
+ * @sctp_assoc_established:
+ * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
+ * to the security module.
+ * @asoc pointer to sctp association structure.
+ * @skb pointer to skbuff of association packet.
*
* Security hooks for Infiniband
*
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b72d75141e12..a68dce3873fc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -34,6 +34,7 @@ enum memcg_stat_item {
MEMCG_SOCK,
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
+ MEMCG_KMEM,
MEMCG_NR_STAT,
};
@@ -219,7 +220,7 @@ struct obj_cgroup {
struct mem_cgroup *memcg;
atomic_t nr_charged_bytes;
union {
- struct list_head list;
+ struct list_head list; /* protected by objcg_lock */
struct rcu_head rcu;
};
};
@@ -315,7 +316,8 @@ struct mem_cgroup {
#ifdef CONFIG_MEMCG_KMEM
int kmemcg_id;
struct obj_cgroup __rcu *objcg;
- struct list_head objcg_list; /* list of inherited objcgs */
+ /* list of inherited objcgs, protected by objcg_lock */
+ struct list_head objcg_list;
#endif
MEMCG_PADDING(_pad2_);
@@ -522,6 +524,20 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
+static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+retry:
+ memcg = obj_cgroup_memcg(objcg);
+ if (unlikely(!css_tryget(&memcg->css)))
+ goto retry;
+ rcu_read_unlock();
+
+ return memcg;
+}
+
#ifdef CONFIG_MEMCG_KMEM
/*
* folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
@@ -840,9 +856,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
*/
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
- if (!memcg->memory.parent)
- return NULL;
- return mem_cgroup_from_counter(memcg->memory.parent, memory);
+ return mem_cgroup_from_css(memcg->css.parent);
}
static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
@@ -1671,18 +1685,6 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
extern struct static_key_false memcg_kmem_enabled_key;
-extern int memcg_nr_cache_ids;
-void memcg_get_cache_ids(void);
-void memcg_put_cache_ids(void);
-
-/*
- * Helper macro to loop through all memcg-specific caches. Callers must still
- * check if the cache is valid (it is either valid or NULL).
- * the slab_mutex must be held when looping through those caches
- */
-#define for_each_memcg_cache_index(_idx) \
- for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++)
-
static inline bool memcg_kmem_enabled(void)
{
return static_branch_likely(&memcg_kmem_enabled_key);
@@ -1706,7 +1708,7 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
* A helper for accessing memcg's kmem_id, used for getting
* corresponding LRU lists.
*/
-static inline int memcg_cache_id(struct mem_cgroup *memcg)
+static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
return memcg ? memcg->kmemcg_id : -1;
}
@@ -1739,27 +1741,16 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}
-#define for_each_memcg_cache_index(_idx) \
- for (; NULL; )
-
static inline bool memcg_kmem_enabled(void)
{
return false;
}
-static inline int memcg_cache_id(struct mem_cgroup *memcg)
+static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
return -1;
}
-static inline void memcg_get_cache_ids(void)
-{
-}
-
-static inline void memcg_put_cache_ids(void)
-{
-}
-
static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
return NULL;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 88eb587b5143..aa619464a1df 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -70,6 +70,13 @@ struct memory_block {
unsigned long state; /* serialized by the dev->lock */
int online_type; /* for passing data to online routine */
int nid; /* NID for this memory block */
+ /*
+ * The single zone of this memory block if all PFNs of this memory block
+ * that are System RAM (not a memory hole, not ZONE_DEVICE ranges) are
+ * managed by a single zone. NULL if multiple zones (including nodes)
+ * apply.
+ */
+ struct zone *zone;
struct device dev;
/*
* Number of vmemmap pages. These pages
@@ -161,6 +168,11 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
})
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
+
+#ifdef CONFIG_NUMA
+void memory_block_add_nid(struct memory_block *mem, int nid,
+ enum meminit_context context);
+#endif /* CONFIG_NUMA */
#endif /* CONFIG_MEMORY_HOTPLUG */
/*
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index be48e003a518..1ce6f8044f1e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -16,6 +16,62 @@ struct memory_group;
struct resource;
struct vmem_altmap;
+#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
+/*
+ * For supporting node-hotadd, we have to allocate a new pgdat.
+ *
+ * If an arch has generic style NODE_DATA(),
+ * node_data[nid] = kzalloc() works well. But it depends on the architecture.
+ *
+ * In general, generic_alloc_nodedata() is used.
+ *
+ */
+extern pg_data_t *arch_alloc_nodedata(int nid);
+extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
+
+#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
+
+#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid)
+
+#ifdef CONFIG_NUMA
+/*
+ * XXX: node aware allocation can't work well to get new node's memory at this time.
+ * Because, pgdat for the new node is not allocated/initialized yet itself.
+ * To use new node's memory, more consideration will be necessary.
+ */
+#define generic_alloc_nodedata(nid) \
+({ \
+ memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \
+})
+/*
+ * This definition is just for error path in node hotadd.
+ * For node hotremove, we have to replace this.
+ */
+#define generic_free_nodedata(pgdat) kfree(pgdat)
+
+extern pg_data_t *node_data[];
+static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
+{
+ node_data[nid] = pgdat;
+}
+
+#else /* !CONFIG_NUMA */
+
+/* never called */
+static inline pg_data_t *generic_alloc_nodedata(int nid)
+{
+ BUG();
+ return NULL;
+}
+static inline void generic_free_nodedata(pg_data_t *pgdat)
+{
+}
+static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
+{
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
+
#ifdef CONFIG_MEMORY_HOTPLUG
struct page *pfn_to_online_page(unsigned long pfn);
@@ -107,8 +163,6 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
-extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
- unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn,
unsigned long end_pfn);
@@ -154,66 +208,6 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
struct mhp_params *params);
#endif /* ARCH_HAS_ADD_PAGES */
-#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
-/*
- * For supporting node-hotadd, we have to allocate a new pgdat.
- *
- * If an arch has generic style NODE_DATA(),
- * node_data[nid] = kzalloc() works well. But it depends on the architecture.
- *
- * In general, generic_alloc_nodedata() is used.
- * Now, arch_free_nodedata() is just defined for error path of node_hot_add.
- *
- */
-extern pg_data_t *arch_alloc_nodedata(int nid);
-extern void arch_free_nodedata(pg_data_t *pgdat);
-extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
-
-#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
-
-#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid)
-#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat)
-
-#ifdef CONFIG_NUMA
-/*
- * If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat.
- * XXX: kmalloc_node() can't work well to get new node's memory at this time.
- * Because, pgdat for the new node is not allocated/initialized yet itself.
- * To use new node's memory, more consideration will be necessary.
- */
-#define generic_alloc_nodedata(nid) \
-({ \
- kzalloc(sizeof(pg_data_t), GFP_KERNEL); \
-})
-/*
- * This definition is just for error path in node hotadd.
- * For node hotremove, we have to replace this.
- */
-#define generic_free_nodedata(pgdat) kfree(pgdat)
-
-extern pg_data_t *node_data[];
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
- node_data[nid] = pgdat;
-}
-
-#else /* !CONFIG_NUMA */
-
-/* never called */
-static inline pg_data_t *generic_alloc_nodedata(int nid)
-{
- BUG();
- return NULL;
-}
-static inline void generic_free_nodedata(pg_data_t *pgdat)
-{
-}
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
-}
-#endif /* CONFIG_NUMA */
-#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
-
void get_online_mems(void);
void put_online_mems(void);
@@ -297,7 +291,7 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
extern void try_offline_node(int nid);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
- struct memory_group *group);
+ struct zone *zone, struct memory_group *group);
extern int remove_memory(u64 start, u64 size);
extern void __remove_memory(u64 start, u64 size);
extern int offline_and_remove_memory(u64 start, u64 size);
@@ -306,7 +300,7 @@ extern int offline_and_remove_memory(u64 start, u64 size);
static inline void try_offline_node(int nid) {}
static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
- struct memory_group *group)
+ struct zone *zone, struct memory_group *group)
{
return -EINVAL;
}
@@ -323,7 +317,7 @@ extern void set_zone_contiguous(struct zone *zone);
extern void clear_zone_contiguous(struct zone *zone);
#ifdef CONFIG_MEMORY_HOTPLUG
-extern void __ref free_area_init_core_hotplug(int nid);
+extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
extern int add_memory_resource(int nid, struct resource *resource,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 1fafcc38acba..8af304f6b504 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
+
+#include <linux/mm.h>
#include <linux/range.h>
#include <linux/ioport.h>
#include <linux/percpu-refcount.h>
@@ -66,9 +68,9 @@ enum memory_type {
struct dev_pagemap_ops {
/*
- * Called once the page refcount reaches 1. (ZONE_DEVICE pages never
- * reach 0 refcount unless there is a refcount bug. This allows the
- * device driver to implement its own memory management.)
+ * Called once the page refcount reaches 0. The reference count will be
+ * reset to one by the core code after the method is called to prepare
+ * for handing out the page again.
*/
void (*page_free)(struct page *page);
@@ -129,6 +131,25 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
return 1 << pgmap->vmemmap_shift;
}
+static inline bool is_device_private_page(const struct page *page)
+{
+ return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
+ is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PRIVATE;
+}
+
+static inline bool folio_is_device_private(const struct folio *folio)
+{
+ return is_device_private_page(&folio->page);
+}
+
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+ return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
+ is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+}
+
#ifdef CONFIG_ZONE_DEVICE
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index 39967a5eca6d..ea4a4b1b246a 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -8,7 +8,7 @@
#ifndef LPC_ICH_H
#define LPC_ICH_H
-#include <linux/platform_data/x86/intel-spi.h>
+#include <linux/platform_data/x86/spi-intel.h>
/* GPIO resources */
#define ICH_RES_GPIO 0
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index db96e10eb8da..90e75d5a54d6 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping,
struct folio *newfolio, struct folio *folio, int extra_count);
extern bool numa_demotion_enabled;
+extern void migrate_on_reclaim_init(void);
+#ifdef CONFIG_HOTPLUG_CPU
+extern void set_migration_target_nodes(void);
#else
+static inline void set_migration_target_nodes(void) {}
+#endif
+#else
+
+static inline void set_migration_target_nodes(void) {}
static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t new,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 598ac3bcc901..49a48d7709ac 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3434,7 +3434,6 @@ enum {
enum {
MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0),
MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1),
- MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2),
};
enum {
@@ -9900,8 +9899,8 @@ struct mlx5_ifc_bufferx_reg_bits {
u8 reserved_at_0[0x6];
u8 lossy[0x1];
u8 epsb[0x1];
- u8 reserved_at_8[0xc];
- u8 size[0xc];
+ u8 reserved_at_8[0x8];
+ u8 size[0x10];
u8 xoff_threshold[0x10];
u8 xon_threshold[0x10];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1a84b1e6787..7a3dd7e617e4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3,9 +3,6 @@
#define _LINUX_MM_H
#include <linux/errno.h>
-
-#ifdef __KERNEL__
-
#include <linux/mmdebug.h>
#include <linux/gfp.h>
#include <linux/bug.h>
@@ -26,7 +23,6 @@
#include <linux/err.h>
#include <linux/page-flags.h>
#include <linux/page_ref.h>
-#include <linux/memremap.h>
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/sched.h>
@@ -216,8 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio))
#else
#define nth_page(page,n) ((page) + (n))
+#define folio_page_idx(folio, p) ((p) - &(folio)->page)
#endif
/* to align the pointer to the (next) page boundary */
@@ -227,6 +225,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+static inline struct folio *lru_to_folio(struct list_head *head)
+{
+ return list_entry((head)->prev, struct folio, lru);
+}
void setup_initial_init_mm(void *start_code, void *end_code,
void *end_data, void *brk);
@@ -478,7 +480,8 @@ struct vm_fault {
struct vm_area_struct *vma; /* Target VMA */
gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
- unsigned long address; /* Faulting virtual address */
+ unsigned long address; /* Faulting virtual address - masked */
+ unsigned long real_address; /* Faulting virtual address - unmasked */
};
enum fault_flag flags; /* FAULT_FLAG_xxx flags
* XXX: should really be 'const' */
@@ -774,21 +777,26 @@ static inline int is_vmalloc_or_module_addr(const void *x)
}
#endif
-static inline int head_compound_mapcount(struct page *head)
+/*
+ * How many times the entire folio is mapped as a single unit (eg by a
+ * PMD or PUD entry). This is probably not what you want, except for
+ * debugging purposes; look at folio_mapcount() or page_mapcount()
+ * instead.
+ */
+static inline int folio_entire_mapcount(struct folio *folio)
{
- return atomic_read(compound_mapcount_ptr(head)) + 1;
+ VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+ return atomic_read(folio_mapcount_ptr(folio)) + 1;
}
/*
* Mapcount of compound page as a whole, does not include mapped sub-pages.
*
- * Must be called only for compound pages or any their tail sub-pages.
+ * Must be called only for compound pages.
*/
static inline int compound_mapcount(struct page *page)
{
- VM_BUG_ON_PAGE(!PageCompound(page), page);
- page = compound_head(page);
- return head_compound_mapcount(page);
+ return folio_entire_mapcount(page_folio(page));
}
/*
@@ -818,8 +826,14 @@ static inline int page_mapcount(struct page *page)
return atomic_read(&page->_mapcount) + 1;
}
+int folio_mapcount(struct folio *folio);
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int total_mapcount(struct page *page);
+static inline int total_mapcount(struct page *page)
+{
+ return folio_mapcount(page_folio(page));
+}
+
int page_trans_huge_mapcount(struct page *page);
#else
static inline int total_mapcount(struct page *page)
@@ -889,33 +903,17 @@ static inline void destroy_compound_page(struct page *page)
compound_page_dtors[page[1].compound_dtor](page);
}
-static inline bool hpage_pincount_available(struct page *page)
-{
- /*
- * Can the page->hpage_pinned_refcount field be used? That field is in
- * the 3rd page of the compound page, so the smallest (2-page) compound
- * pages cannot support it.
- */
- page = compound_head(page);
- return PageCompound(page) && compound_order(page) > 1;
-}
-
static inline int head_compound_pincount(struct page *head)
{
return atomic_read(compound_pincount_ptr(head));
}
-static inline int compound_pincount(struct page *page)
-{
- VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
- page = compound_head(page);
- return head_compound_pincount(page);
-}
-
static inline void set_compound_order(struct page *page, unsigned int order)
{
page[1].compound_order = order;
+#ifdef CONFIG_64BIT
page[1].compound_nr = 1U << order;
+#endif
}
/* Returns the number of pages in this potentially compound page. */
@@ -923,7 +921,11 @@ static inline unsigned long compound_nr(struct page *page)
{
if (!PageHead(page))
return 1;
+#ifdef CONFIG_64BIT
return page[1].compound_nr;
+#else
+ return 1UL << compound_order(page);
+#endif
}
/* Returns the number of bytes in this potentially compound page. */
@@ -938,6 +940,37 @@ static inline unsigned int page_shift(struct page *page)
return PAGE_SHIFT + compound_order(page);
}
+/**
+ * thp_order - Order of a transparent huge page.
+ * @page: Head page of a transparent huge page.
+ */
+static inline unsigned int thp_order(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ return compound_order(page);
+}
+
+/**
+ * thp_nr_pages - The number of regular pages in this huge page.
+ * @page: The head page of a huge page.
+ */
+static inline int thp_nr_pages(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ return compound_nr(page);
+}
+
+/**
+ * thp_size - Size of a transparent huge page.
+ * @page: Head page of a transparent huge page.
+ *
+ * Return: Number of bytes in this page.
+ */
+static inline unsigned long thp_size(struct page *page)
+{
+ return PAGE_SIZE << thp_order(page);
+}
+
void free_compound_page(struct page *page);
#ifdef CONFIG_MMU
@@ -1089,59 +1122,35 @@ static inline bool is_zone_device_page(const struct page *page)
}
#endif
+static inline bool folio_is_zone_device(const struct folio *folio)
+{
+ return is_zone_device_page(&folio->page);
+}
+
static inline bool is_zone_movable_page(const struct page *page)
{
return page_zonenum(page) == ZONE_MOVABLE;
}
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-void free_devmap_managed_page(struct page *page);
+#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-static inline bool page_is_devmap_managed(struct page *page)
+bool __put_devmap_managed_page(struct page *page);
+static inline bool put_devmap_managed_page(struct page *page)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
- switch (page->pgmap->type) {
- case MEMORY_DEVICE_PRIVATE:
- case MEMORY_DEVICE_FS_DAX:
- return true;
- default:
- break;
- }
- return false;
+ return __put_devmap_managed_page(page);
}
-void put_devmap_managed_page(struct page *page);
-
-#else /* CONFIG_DEV_PAGEMAP_OPS */
-static inline bool page_is_devmap_managed(struct page *page)
+#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
+static inline bool put_devmap_managed_page(struct page *page)
{
return false;
}
-
-static inline void put_devmap_managed_page(struct page *page)
-{
-}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
-
-static inline bool is_device_private_page(const struct page *page)
-{
- return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
- IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
- is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_pci_p2pdma_page(const struct page *page)
-{
- return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
- IS_ENABLED(CONFIG_PCI_P2PDMA) &&
- is_zone_device_page(page) &&
- page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
-}
+#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
@@ -1167,9 +1176,6 @@ static inline void get_page(struct page *page)
}
bool __must_check try_grab_page(struct page *page, unsigned int flags);
-struct page *try_grab_compound_head(struct page *page, int refs,
- unsigned int flags);
-
static inline __must_check bool try_get_page(struct page *page)
{
@@ -1224,16 +1230,11 @@ static inline void put_page(struct page *page)
struct folio *folio = page_folio(page);
/*
- * For devmap managed pages we need to catch refcount transition from
- * 2 to 1, when refcount reach one it means the page is free and we
- * need to inform the device driver through callback. See
- * include/linux/memremap.h and HMM for details.
+ * For some devmap managed pages we need to catch refcount transition
+ * from 2 to 1:
*/
- if (page_is_devmap_managed(&folio->page)) {
- put_devmap_managed_page(&folio->page);
+ if (put_devmap_managed_page(&folio->page))
return;
- }
-
folio_put(folio);
}
@@ -1263,10 +1264,9 @@ static inline void put_page(struct page *page)
* applications that don't have huge page reference counts, this won't be an
* issue.
*
- * Locking: the lockless algorithm described in page_cache_get_speculative()
- * and page_cache_gup_pin_speculative() provides safe operation for
- * get_user_pages and page_mkclean and other calls that race to set up page
- * table entries.
+ * Locking: the lockless algorithm described in folio_try_get_rcu()
+ * provides safe operation for get_user_pages(), page_mkclean() and
+ * other calls that race to set up page table entries.
*/
#define GUP_PIN_COUNTING_BIAS (1U << 10)
@@ -1277,70 +1277,11 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
bool make_dirty);
void unpin_user_pages(struct page **pages, unsigned long npages);
-/**
- * page_maybe_dma_pinned - Report if a page is pinned for DMA.
- * @page: The page.
- *
- * This function checks if a page has been pinned via a call to
- * a function in the pin_user_pages() family.
- *
- * For non-huge pages, the return value is partially fuzzy: false is not fuzzy,
- * because it means "definitely not pinned for DMA", but true means "probably
- * pinned for DMA, but possibly a false positive due to having at least
- * GUP_PIN_COUNTING_BIAS worth of normal page references".
- *
- * False positives are OK, because: a) it's unlikely for a page to get that many
- * refcounts, and b) all the callers of this routine are expected to be able to
- * deal gracefully with a false positive.
- *
- * For huge pages, the result will be exactly correct. That's because we have
- * more tracking data available: the 3rd struct page in the compound page is
- * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
- * scheme).
- *
- * For more information, please see Documentation/core-api/pin_user_pages.rst.
- *
- * Return: True, if it is likely that the page has been "dma-pinned".
- * False, if the page is definitely not dma-pinned.
- */
-static inline bool page_maybe_dma_pinned(struct page *page)
-{
- if (hpage_pincount_available(page))
- return compound_pincount(page) > 0;
-
- /*
- * page_ref_count() is signed. If that refcount overflows, then
- * page_ref_count() returns a negative value, and callers will avoid
- * further incrementing the refcount.
- *
- * Here, for that overflow case, use the signed bit to count a little
- * bit higher via unsigned math, and thus still get an accurate result.
- */
- return ((unsigned int)page_ref_count(compound_head(page))) >=
- GUP_PIN_COUNTING_BIAS;
-}
-
static inline bool is_cow_mapping(vm_flags_t flags)
{
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
-/*
- * This should most likely only be called during fork() to see whether we
- * should break the cow immediately for a page on the src mm.
- */
-static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
- struct page *page)
-{
- if (!is_cow_mapping(vma->vm_flags))
- return false;
-
- if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
- return false;
-
- return page_maybe_dma_pinned(page);
-}
-
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -1506,11 +1447,18 @@ static inline u8 page_kasan_tag(const struct page *page)
static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
- if (kasan_enabled()) {
- tag ^= 0xff;
- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
- }
+ unsigned long old_flags, flags;
+
+ if (!kasan_enabled())
+ return;
+
+ tag ^= 0xff;
+ old_flags = READ_ONCE(page->flags);
+ do {
+ flags = old_flags;
+ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
}
static inline void page_kasan_tag_reset(struct page *page)
@@ -1578,6 +1526,74 @@ static inline unsigned long folio_pfn(struct folio *folio)
return page_to_pfn(&folio->page);
}
+static inline atomic_t *folio_pincount_ptr(struct folio *folio)
+{
+ return &folio_page(folio, 1)->compound_pincount;
+}
+
+/**
+ * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
+ * @folio: The folio.
+ *
+ * This function checks if a folio has been pinned via a call to
+ * a function in the pin_user_pages() family.
+ *
+ * For small folios, the return value is partially fuzzy: false is not fuzzy,
+ * because it means "definitely not pinned for DMA", but true means "probably
+ * pinned for DMA, but possibly a false positive due to having at least
+ * GUP_PIN_COUNTING_BIAS worth of normal folio references".
+ *
+ * False positives are OK, because: a) it's unlikely for a folio to
+ * get that many refcounts, and b) all the callers of this routine are
+ * expected to be able to deal gracefully with a false positive.
+ *
+ * For large folios, the result will be exactly correct. That's because
+ * we have more tracking data available: the compound_pincount is used
+ * instead of the GUP_PIN_COUNTING_BIAS scheme.
+ *
+ * For more information, please see Documentation/core-api/pin_user_pages.rst.
+ *
+ * Return: True, if it is likely that the page has been "dma-pinned".
+ * False, if the page is definitely not dma-pinned.
+ */
+static inline bool folio_maybe_dma_pinned(struct folio *folio)
+{
+ if (folio_test_large(folio))
+ return atomic_read(folio_pincount_ptr(folio)) > 0;
+
+ /*
+ * folio_ref_count() is signed. If that refcount overflows, then
+ * folio_ref_count() returns a negative value, and callers will avoid
+ * further incrementing the refcount.
+ *
+ * Here, for that overflow case, use the sign bit to count a little
+ * bit higher via unsigned math, and thus still get an accurate result.
+ */
+ return ((unsigned int)folio_ref_count(folio)) >=
+ GUP_PIN_COUNTING_BIAS;
+}
+
+static inline bool page_maybe_dma_pinned(struct page *page)
+{
+ return folio_maybe_dma_pinned(page_folio(page));
+}
+
+/*
+ * This should most likely only be called during fork() to see whether we
+ * should break the cow immediately for a page on the src mm.
+ */
+static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
+ struct page *page)
+{
+ if (!is_cow_mapping(vma->vm_flags))
+ return false;
+
+ if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
+ return false;
+
+ return page_maybe_dma_pinned(page);
+}
+
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
static inline bool is_pinnable_page(struct page *page)
@@ -1592,6 +1608,11 @@ static inline bool is_pinnable_page(struct page *page)
}
#endif
+static inline bool folio_is_pinnable(struct folio *folio)
+{
+ return is_pinnable_page(&folio->page);
+}
+
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
@@ -1741,7 +1762,6 @@ static inline void *folio_address(const struct folio *folio)
}
extern void *page_rmapping(struct page *page);
-extern struct anon_vma *page_anon_vma(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
/*
@@ -1847,7 +1867,6 @@ extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
-int invalidate_inode_page(struct page *page);
#ifdef CONFIG_MMU
extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
@@ -1909,10 +1928,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
long pin_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages, int *locked);
-long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
@@ -1932,9 +1947,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
struct page **pages);
struct page *get_dump_page(unsigned long addr);
-extern void do_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length);
-
bool folio_mark_dirty(struct folio *folio);
bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
@@ -2446,7 +2458,6 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
}
extern void __init pagecache_init(void);
-extern void __init free_area_init_memoryless_node(int nid);
extern void free_initmem(void);
/*
@@ -2619,7 +2630,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
extern struct vm_area_struct *vma_merge(struct mm_struct *,
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
+ struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below);
@@ -2918,13 +2929,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
* and return without waiting upon it */
-#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
-#define FOLL_MLOCK 0x1000 /* lock present pages */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
@@ -3144,10 +3153,12 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
}
#endif
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
int vmemmap_remap_free(unsigned long start, unsigned long end,
unsigned long reuse);
int vmemmap_remap_alloc(unsigned long start, unsigned long end,
unsigned long reuse, gfp_t gfp_mask);
+#endif
void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
@@ -3237,6 +3248,7 @@ enum mf_action_page_type {
MF_MSG_BUDDY,
MF_MSG_DAX,
MF_MSG_UNSPLIT_THP,
+ MF_MSG_DIFFERENT_PAGE_SIZE,
MF_MSG_UNKNOWN,
};
@@ -3365,14 +3377,14 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
#ifdef CONFIG_ANON_VMA_NAME
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
- unsigned long len_in, const char *name);
+ unsigned long len_in,
+ struct anon_vma_name *anon_name);
#else
static inline int
madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
- unsigned long len_in, const char *name) {
+ unsigned long len_in, struct anon_vma_name *anon_name) {
return 0;
}
#endif
-#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_api.h b/include/linux/mm_api.h
new file mode 100644
index 000000000000..a5ace2b198b8
--- /dev/null
+++ b/include/linux/mm_api.h
@@ -0,0 +1 @@
+#include <linux/mm.h>
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index b725839dfe71..ac32125745ab 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -99,7 +99,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
- list_add(&folio->lru, &lruvec->lists[lru]);
+ if (lru != LRU_UNEVICTABLE)
+ list_add(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list(struct page *page,
@@ -115,6 +116,7 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
+ /* This is not expected to be used on LRU_UNEVICTABLE */
list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
@@ -127,8 +129,11 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
static __always_inline
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
{
- list_del(&folio->lru);
- update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
+ enum lru_list lru = folio_lru_list(folio);
+
+ if (lru != LRU_UNEVICTABLE)
+ list_del(&folio->lru);
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
-folio_nr_pages(folio));
}
@@ -140,50 +145,91 @@ static __always_inline void del_page_from_lru_list(struct page *page,
#ifdef CONFIG_ANON_VMA_NAME
/*
- * mmap_lock should be read-locked when calling vma_anon_name() and while using
- * the returned pointer.
+ * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
+ * either keep holding the lock while using the returned pointer or it should
+ * raise anon_vma_name refcount before releasing the lock.
*/
-extern const char *vma_anon_name(struct vm_area_struct *vma);
+extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
+extern struct anon_vma_name *anon_vma_name_alloc(const char *name);
+extern void anon_vma_name_free(struct kref *kref);
-/*
- * mmap_lock should be read-locked for orig_vma->vm_mm.
- * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
- * isolated.
- */
-extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
- struct vm_area_struct *new_vma);
+/* mmap_lock should be read-locked */
+static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
+{
+ if (anon_name)
+ kref_get(&anon_name->kref);
+}
-/*
- * mmap_lock should be write-locked or vma should have been isolated under
- * write-locked mmap_lock protection.
- */
-extern void free_vma_anon_name(struct vm_area_struct *vma);
+static inline void anon_vma_name_put(struct anon_vma_name *anon_name)
+{
+ if (anon_name)
+ kref_put(&anon_name->kref, anon_vma_name_free);
+}
-/* mmap_lock should be read-locked */
-static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
- const char *name)
+static inline
+struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name)
+{
+ /* Prevent anon_name refcount saturation early on */
+ if (kref_read(&anon_name->kref) < REFCOUNT_MAX) {
+ anon_vma_name_get(anon_name);
+ return anon_name;
+
+ }
+ return anon_vma_name_alloc(anon_name->name);
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
{
- const char *vma_name = vma_anon_name(vma);
+ struct anon_vma_name *anon_name = anon_vma_name(orig_vma);
- /* either both NULL, or pointers to same string */
- if (vma_name == name)
+ if (anon_name)
+ new_vma->anon_name = anon_vma_name_reuse(anon_name);
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+ /*
+ * Not using anon_vma_name because it generates a warning if mmap_lock
+ * is not held, which might be the case here.
+ */
+ if (!vma->vm_file)
+ anon_vma_name_put(vma->anon_name);
+}
+
+static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
+ struct anon_vma_name *anon_name2)
+{
+ if (anon_name1 == anon_name2)
return true;
- return name && vma_name && !strcmp(name, vma_name);
+ return anon_name1 && anon_name2 &&
+ !strcmp(anon_name1->name, anon_name2->name);
}
+
#else /* CONFIG_ANON_VMA_NAME */
-static inline const char *vma_anon_name(struct vm_area_struct *vma)
+static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+
+static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
return NULL;
}
-static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
- struct vm_area_struct *new_vma) {}
-static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
-static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
- const char *name)
+
+static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
+static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma) {}
+static inline void free_anon_vma_name(struct vm_area_struct *vma) {}
+
+static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
+ struct anon_vma_name *anon_name2)
{
return true;
}
+
#endif /* CONFIG_ANON_VMA_NAME */
static inline void init_tlb_flush_pending(struct mm_struct *mm)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9db36dc5d4cf..8834e38c06a4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -85,7 +85,16 @@ struct page {
* lruvec->lru_lock. Sometimes used as a generic list
* by the page owner.
*/
- struct list_head lru;
+ union {
+ struct list_head lru;
+ /* Or, for the Unevictable "LRU list" slot */
+ struct {
+ /* Always even, to negate PageTail */
+ void *__filler;
+ /* Count page's or folio's mlocks */
+ unsigned int mlock_count;
+ };
+ };
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
pgoff_t index; /* Our offset within mapping. */
@@ -126,11 +135,14 @@ struct page {
unsigned char compound_dtor;
unsigned char compound_order;
atomic_t compound_mapcount;
+ atomic_t compound_pincount;
+#ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */
+#endif
};
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
- atomic_t hpage_pinned_refcount;
+ unsigned long _compound_pad_2;
/* For both global and memcg */
struct list_head deferred_list;
};
@@ -241,7 +253,13 @@ struct folio {
struct {
/* public: */
unsigned long flags;
- struct list_head lru;
+ union {
+ struct list_head lru;
+ struct {
+ void *__filler;
+ unsigned int mlock_count;
+ };
+ };
struct address_space *mapping;
pgoff_t index;
void *private;
@@ -261,6 +279,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio));
static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
FOLIO_MATCH(flags, flags);
FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(mapping, mapping);
FOLIO_MATCH(compound_head, lru);
FOLIO_MATCH(index, index);
FOLIO_MATCH(private, private);
@@ -284,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page)
static inline atomic_t *compound_pincount_ptr(struct page *page)
{
- return &page[2].hpage_pinned_refcount;
+ return &page[1].compound_pincount;
}
/*
@@ -415,7 +434,10 @@ struct vm_area_struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
- /* Serialized by mmap_sem. */
+ /*
+ * Serialized by mmap_sem. Never use directly because it is
+ * valid only when vm_file is NULL. Use anon_vma_name instead.
+ */
struct anon_vma_name *anon_name;
};
@@ -630,7 +652,7 @@ struct mm_struct {
#endif
struct work_struct async_put_work;
-#ifdef CONFIG_IOMMU_SUPPORT
+#ifdef CONFIG_IOMMU_SVA
u32 pasid;
#endif
} __randomize_layout;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aed44e9b5d89..962b14d403e8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -83,6 +83,17 @@ static inline bool is_migrate_movable(int mt)
return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
}
+/*
+ * Check whether a migratetype can be merged with another migratetype.
+ *
+ * It is only mergeable when it can fall back to other migratetypes for
+ * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c.
+ */
+static inline bool migratetype_is_mergeable(int mt)
+{
+ return mt < MIGRATE_PCPTYPES;
+}
+
#define for_each_migratetype_order(order, type) \
for (order = 0; order < MAX_ORDER; order++) \
for (type = 0; type < MIGRATE_TYPES; type++)
@@ -211,6 +222,9 @@ enum node_stat_item {
#ifdef CONFIG_SWAP
NR_SWAPCACHE,
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ PGPROMOTE_SUCCESS, /* promote successfully */
+#endif
NR_VM_NODE_STAT_ITEMS
};
@@ -339,6 +353,7 @@ enum zone_watermarks {
WMARK_MIN,
WMARK_LOW,
WMARK_HIGH,
+ WMARK_PROMO,
NR_WMARK
};
@@ -920,12 +935,6 @@ typedef struct pglist_data {
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
-#ifdef CONFIG_FLATMEM
-#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
-#else
-#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
-#endif
-#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
@@ -1101,7 +1110,6 @@ static inline struct pglist_data *NODE_DATA(int nid)
{
return &contig_page_data;
}
-#define NODE_MEM_MAP(nid) mem_map
#else /* CONFIG_NUMA */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4bb71979a8fd..5da5d990ff58 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -211,7 +211,7 @@ struct css_device_id {
kernel_ulong_t driver_data;
};
-#define ACPI_ID_LEN 9
+#define ACPI_ID_LEN 16
struct acpi_device_id {
__u8 id[ACPI_ID_LEN];
diff --git a/include/linux/mutex_api.h b/include/linux/mutex_api.h
new file mode 100644
index 000000000000..85ab9491e13e
--- /dev/null
+++ b/include/linux/mutex_api.h
@@ -0,0 +1 @@
+#include <linux/mutex.h>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3213c7227b59..f53ea7038441 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2158,7 +2158,7 @@ struct net_device {
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
unsigned int num_tx_queues;
unsigned int real_num_tx_queues;
- struct Qdisc *qdisc;
+ struct Qdisc __rcu *qdisc;
unsigned int tx_queue_len;
spinlock_t tx_global_lock;
@@ -2548,6 +2548,7 @@ struct packet_type {
struct net_device *);
bool (*id_match)(struct packet_type *ptype,
struct sock *sk);
+ struct net *af_packet_net;
void *af_packet_priv;
struct list_head list;
};
@@ -4601,6 +4602,8 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path);
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type);
struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
netdev_features_t features);
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
index b4dd96e4dc8d..e6487a691136 100644
--- a/include/linux/netfilter_netdev.h
+++ b/include/linux/netfilter_netdev.h
@@ -101,7 +101,11 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
nf_hook_state_init(&state, NF_NETDEV_EGRESS,
NFPROTO_NETDEV, dev, NULL, NULL,
dev_net(dev), NULL);
+
+ /* nf assumes rcu_read_lock, not just read_lock_bh */
+ rcu_read_lock();
ret = nf_hook_slow(skb, &state, e, 0);
+ rcu_read_unlock();
if (ret == 1) {
return skb;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index b46c39d98bbd..614f22213e21 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -244,6 +244,13 @@ struct netfs_cache_ops {
int (*prepare_write)(struct netfs_cache_resources *cres,
loff_t *_start, size_t *_len, loff_t i_size,
bool no_space_allocated_yet);
+
+ /* Query the occupancy of the cache in a region, returning where the
+ * next chunk of data starts and how long it is.
+ */
+ int (*query_occupancy)(struct netfs_cache_resources *cres,
+ loff_t start, size_t len, size_t granularity,
+ loff_t *_data_start, size_t *_data_len);
};
struct readahead_control;
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 0dc7ad38a0da..b06375e88e58 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
memcpy(target->data, source->data, source->size);
}
-
-/*
- * This is really a general kernel constant, but since nothing like
- * this is defined in the kernel headers, I have to do it here.
- */
-#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1))
-
-
enum nfs3_stable_how {
NFS_UNSTABLE = 0,
NFS_DATA_SYNC = 1,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 00835bacd236..784120cc217e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -61,7 +61,9 @@
struct nfs_access_entry {
struct rb_node rb_node;
struct list_head lru;
- const struct cred * cred;
+ kuid_t fsuid;
+ kgid_t fsgid;
+ struct group_info *group_info;
__u32 mask;
struct rcu_head rcu_head;
};
@@ -105,6 +107,7 @@ struct nfs_open_dir_context {
__u64 dup_cookie;
pgoff_t page_index;
signed char duped;
+ bool eof;
};
/*
@@ -395,7 +398,7 @@ extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fa
extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct user_namespace *, const struct path *,
struct kstat *, u32, unsigned int);
-extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *);
extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
@@ -532,8 +535,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
struct nfs_fattr *fattr);
extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
extern void nfs_access_zap_cache(struct inode *inode);
-extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res,
- bool may_block);
+extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block);
/*
* linux/fs/nfs/symlink.c
@@ -580,7 +583,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
extern int nfs_sync_inode(struct inode *inode);
extern int nfs_wb_all(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page *page);
-extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
+int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
extern void nfs_commit_free(struct nfs_commit_data *data);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 77b2dba27bbb..6aa2a200676a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -138,6 +138,7 @@ struct nfs_server {
struct nlm_host *nlm_host; /* NLM client handle */
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
atomic_long_t writeback; /* number of writeback pages */
+ unsigned int write_congested;/* flag set when writeback gets too high */
unsigned int flags; /* various flags */
/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
@@ -266,6 +267,8 @@ struct nfs_server {
#define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
#define NFS_CAP_LGOPEN (1U << 5)
+#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
+#define NFS_CAP_CASE_PRESERVING (1U << 7)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
@@ -282,5 +285,5 @@ struct nfs_server {
#define NFS_CAP_COPY_NOTIFY (1U << 27)
#define NFS_CAP_XATTR (1U << 28)
#define NFS_CAP_READ_PLUS (1U << 29)
-
+#define NFS_CAP_FS_LOCATIONS (1U << 30)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 967a0098f0a9..728cb0c1f0b6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1194,6 +1194,8 @@ struct nfs4_server_caps_res {
u32 has_links;
u32 has_symlinks;
u32 fh_expire_type;
+ u32 case_insensitive;
+ u32 case_preserving;
};
#define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1737,7 +1739,7 @@ struct nfs_rpc_ops {
struct nfs_fh *, struct nfs_fattr *);
int (*lookupp) (struct inode *, struct nfs_fh *,
struct nfs_fattr *);
- int (*access) (struct inode *, struct nfs_access_entry *);
+ int (*access) (struct inode *, struct nfs_access_entry *, const struct cred *);
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
int (*create) (struct inode *, struct dentry *,
@@ -1795,6 +1797,7 @@ struct nfs_rpc_ops {
struct nfs_server *(*create_server)(struct fs_context *);
struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
+ int (*discover_trunking)(struct nfs_server *, struct nfs_fh *);
};
/*
diff --git a/include/linux/node.h b/include/linux/node.h
index bb21fd631b16..40d641a8bfb0 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -99,19 +99,20 @@ extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
-void link_mem_sections(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context);
+void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
+ unsigned long end_pfn,
+ enum meminit_context context);
#else
-static inline void link_mem_sections(int nid, unsigned long start_pfn,
- unsigned long end_pfn,
- enum meminit_context context)
+static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
+ unsigned long end_pfn,
+ enum meminit_context context)
{
}
#endif
extern void unregister_node(struct node *node);
#ifdef CONFIG_NUMA
+extern void node_dev_init(void);
/* Core of the node registration - only memory hotplug should use this */
extern int __register_one_node(int nid);
@@ -128,8 +129,8 @@ static inline int register_one_node(int nid)
error = __register_one_node(nid);
if (error)
return error;
- /* link memory sections under this node */
- link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY);
+ register_memory_blocks_under_node(nid, start_pfn, end_pfn,
+ MEMINIT_EARLY);
}
return error;
@@ -149,6 +150,9 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister);
#endif
#else
+static inline void node_dev_init(void)
+{
+}
static inline int __register_one_node(int nid)
{
return 0;
@@ -181,4 +185,9 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
#define to_node(device) container_of(device, struct node, dev)
+static inline bool node_is_toptier(int node)
+{
+ return node_state(node, N_CPU);
+}
+
#endif /* _LINUX_NODE_H_ */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index cb909edb76c4..5358a5facdee 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -721,7 +721,7 @@ enum {
*
* Fields with static values for the port. Initialized by the
* port_info struct supplied to the registration call.
- * @port_num: NVME-FC transport subsytem port number
+ * @port_num: NVME-FC transport subsystem port number
* @node_name: FC WWNN for the port
* @port_name: FC WWPN for the port
* @private: pointer to memory allocated alongside the local port
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 959e0bd9a913..75470159a194 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -12,6 +12,7 @@
#define NVME_TCP_DISC_PORT 8009
#define NVME_TCP_ADMIN_CCSZ SZ_8K
#define NVME_TCP_DIGEST_LENGTH 4
+#define NVME_TCP_MIN_MAXH2CDATA 4096
enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 855dd9b3e84b..9dbc3ef4daf7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -43,6 +43,12 @@ enum nvme_ctrl_type {
NVME_CTRL_ADMIN = 3, /* Administrative controller */
};
+enum nvme_dctype {
+ NVME_DCTYPE_NOT_REPORTED = 0,
+ NVME_DCTYPE_DDC = 1, /* Direct Discovery Controller */
+ NVME_DCTYPE_CDC = 2, /* Central Discovery Controller */
+};
+
/* Address Family codes for Discovery Log Page entry ADRFAM field */
enum {
NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */
@@ -320,7 +326,9 @@ struct nvme_id_ctrl {
__le16 icdoff;
__u8 ctrattr;
__u8 msdbd;
- __u8 rsvd1804[244];
+ __u8 rsvd1804[2];
+ __u8 dctype;
+ __u8 rsvd1807[241];
struct nvme_id_power_state psd[32];
__u8 vs[1024];
};
@@ -1636,6 +1644,7 @@ enum {
NVME_SC_HOST_ABORTED_CMD = 0x371,
NVME_SC_CRD = 0x1800,
+ NVME_SC_MORE = 0x2000,
NVME_SC_DNR = 0x4000,
};
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 98efb7b5660d..c9a3ac9efeaa 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -70,7 +70,8 @@ struct nvmem_keepout {
* @word_size: Minimum read/write access granularity.
* @stride: Minimum read/write access stride.
* @priv: User context passed to read/write callbacks.
- * @wp-gpio: Write protect pin
+ * @wp-gpio: Write protect pin
+ * @ignore_wp: Write Protect pin is managed by the provider.
*
* Note: A default "nvmem<id>" name will be assigned to the device if
* no name is specified in its configuration. In such case "<id>" is
@@ -92,6 +93,7 @@ struct nvmem_config {
enum nvmem_type type;
bool read_only;
bool root_only;
+ bool ignore_wp;
struct device_node *of_node;
bool no_of_node;
nvmem_reg_read_t reg_read;
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 4669632bd72b..f1221d11f8e5 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -4,6 +4,7 @@
#include <linux/compiler.h>
#include <linux/limits.h>
+#include <linux/const.h>
/*
* We need to compute the minimum and maximum values representable in a given
@@ -118,81 +119,94 @@ static inline bool __must_check __must_check_overflow(bool overflow)
}))
/**
- * array_size() - Calculate size of 2-dimensional array.
- *
- * @a: dimension one
- * @b: dimension two
+ * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
*
- * Calculates size of 2-dimensional array: @a * @b.
+ * @factor1: first factor
+ * @factor2: second factor
*
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
+ * Returns: calculate @factor1 * @factor2, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. The
+ * lvalue must be size_t to avoid implicit type conversion.
*/
-static inline __must_check size_t array_size(size_t a, size_t b)
+static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
{
size_t bytes;
- if (check_mul_overflow(a, b, &bytes))
+ if (check_mul_overflow(factor1, factor2, &bytes))
return SIZE_MAX;
return bytes;
}
/**
- * array3_size() - Calculate size of 3-dimensional array.
+ * size_add() - Calculate size_t addition with saturation at SIZE_MAX
*
- * @a: dimension one
- * @b: dimension two
- * @c: dimension three
- *
- * Calculates size of 3-dimensional array: @a * @b * @c.
+ * @addend1: first addend
+ * @addend2: second addend
*
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
+ * Returns: calculate @addend1 + @addend2, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. The
+ * lvalue must be size_t to avoid implicit type conversion.
*/
-static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+static inline size_t __must_check size_add(size_t addend1, size_t addend2)
{
size_t bytes;
- if (check_mul_overflow(a, b, &bytes))
- return SIZE_MAX;
- if (check_mul_overflow(bytes, c, &bytes))
+ if (check_add_overflow(addend1, addend2, &bytes))
return SIZE_MAX;
return bytes;
}
-/*
- * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
- * struct_size() below.
+/**
+ * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
+ *
+ * @minuend: value to subtract from
+ * @subtrahend: value to subtract from @minuend
+ *
+ * Returns: calculate @minuend - @subtrahend, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. For
+ * composition with the size_add() and size_mul() helpers, neither
+ * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
+ * The lvalue must be size_t to avoid implicit type conversion.
*/
-static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
{
size_t bytes;
- if (check_mul_overflow(a, b, &bytes))
- return SIZE_MAX;
- if (check_add_overflow(bytes, c, &bytes))
+ if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
+ check_sub_overflow(minuend, subtrahend, &bytes))
return SIZE_MAX;
return bytes;
}
/**
- * struct_size() - Calculate size of structure with trailing array.
- * @p: Pointer to the structure.
- * @member: Name of the array member.
- * @count: Number of elements in the array.
+ * array_size() - Calculate size of 2-dimensional array.
*
- * Calculates size of memory needed for structure @p followed by an
- * array of @count number of @member elements.
+ * @a: dimension one
+ * @b: dimension two
*
- * Return: number of bytes needed or SIZE_MAX on overflow.
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
*/
-#define struct_size(p, member, count) \
- __ab_c_size(count, \
- sizeof(*(p)->member) + __must_be_array((p)->member),\
- sizeof(*(p)))
+#define array_size(a, b) size_mul(a, b)
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+#define array3_size(a, b, c) size_mul(size_mul(a, b), c)
/**
* flex_array_size() - Calculate size of a flexible array member
@@ -208,7 +222,25 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define flex_array_size(p, member, count) \
- array_size(count, \
- sizeof(*(p)->member) + __must_be_array((p)->member))
+ __builtin_choose_expr(__is_constexpr(count), \
+ (count) * sizeof(*(p)->member) + __must_be_array((p)->member), \
+ size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member)))
+
+/**
+ * struct_size() - Calculate size of structure with trailing flexible array.
+ *
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @count number of @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, count) \
+ __builtin_choose_expr(__is_constexpr(count), \
+ sizeof(*(p)) + flex_array_size(p, member, count), \
+ size_add(sizeof(*(p)), flex_array_size(p, member, count)))
#endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1c3b6e5c8bfd..88fe1d759cdd 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -190,13 +190,81 @@ enum pageflags {
#ifndef __GENERATING_BOUNDS_H
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON,
+ hugetlb_free_vmemmap_enabled_key);
+
+static __always_inline bool hugetlb_free_vmemmap_enabled(void)
+{
+ return static_branch_maybe(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON,
+ &hugetlb_free_vmemmap_enabled_key);
+}
+
+/*
+ * If the feature of freeing some vmemmap pages associated with each HugeTLB
+ * page is enabled, the head vmemmap page frame is reused and all of the tail
+ * vmemmap addresses map to the head vmemmap page frame (furture details can
+ * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other
+ * words, there are more than one page struct with PG_head associated with each
+ * HugeTLB page. We __know__ that there is only one head page struct, the tail
+ * page structs with PG_head are fake head page structs. We need an approach
+ * to distinguish between those two different types of page structs so that
+ * compound_head() can return the real head page struct when the parameter is
+ * the tail page struct but with PG_head.
+ *
+ * The page_fixed_fake_head() returns the real head page struct if the @page is
+ * fake page head, otherwise, returns @page which can either be a true page
+ * head or tail.
+ */
+static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+ if (!hugetlb_free_vmemmap_enabled())
+ return page;
+
+ /*
+ * Only addresses aligned with PAGE_SIZE of struct page may be fake head
+ * struct page. The alignment check aims to avoid access the fields (
+ * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly)
+ * cold cacheline in some cases.
+ */
+ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
+ test_bit(PG_head, &page->flags)) {
+ /*
+ * We can safely access the field of the @page[1] with PG_head
+ * because the @page is a compound page composed with at least
+ * two contiguous pages.
+ */
+ unsigned long head = READ_ONCE(page[1].compound_head);
+
+ if (likely(head & 1))
+ return (const struct page *)(head - 1);
+ }
+ return page;
+}
+#else
+static inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+ return page;
+}
+
+static inline bool hugetlb_free_vmemmap_enabled(void)
+{
+ return false;
+}
+#endif
+
+static __always_inline int page_is_fake_head(struct page *page)
+{
+ return page_fixed_fake_head(page) != page;
+}
+
static inline unsigned long _compound_head(const struct page *page)
{
unsigned long head = READ_ONCE(page->compound_head);
if (unlikely(head & 1))
return head - 1;
- return (unsigned long)page;
+ return (unsigned long)page_fixed_fake_head(page);
}
#define compound_head(page) ((typeof(page))_compound_head(page))
@@ -231,12 +299,13 @@ static inline unsigned long _compound_head(const struct page *page)
static __always_inline int PageTail(struct page *page)
{
- return READ_ONCE(page->compound_head) & 1;
+ return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
}
static __always_inline int PageCompound(struct page *page)
{
- return test_bit(PG_head, &page->flags) || PageTail(page);
+ return test_bit(PG_head, &page->flags) ||
+ READ_ONCE(page->compound_head) & 1;
}
#define PAGE_POISON_PATTERN -1l
@@ -695,7 +764,20 @@ static inline bool test_set_page_writeback(struct page *page)
return set_page_writeback(page);
}
-__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
+static __always_inline bool folio_test_head(struct folio *folio)
+{
+ return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY));
+}
+
+static __always_inline int PageHead(struct page *page)
+{
+ PF_POISONED_CHECK(page);
+ return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
+}
+
+__SETPAGEFLAG(Head, head, PF_ANY)
+__CLEARPAGEFLAG(Head, head, PF_ANY)
+CLEARPAGEFLAG(Head, head, PF_ANY)
/**
* folio_test_large() - Does this folio contain more than one page?
@@ -918,7 +1000,7 @@ PAGE_TYPE_OPS(Guard, guard)
extern bool is_free_buddy_page(struct page *page);
-__PAGEFLAG(Isolated, isolated, PF_ANY);
+PAGEFLAG(Isolated, isolated, PF_ANY);
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1UL << PG_mlocked)
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 38cace1da7b6..01e16c7696ec 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud);
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd);
static inline void page_table_check_alloc(struct page *page, unsigned int order)
{
@@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm,
__page_table_check_pud_set(mm, addr, pudp, pud);
}
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+ if (static_branch_likely(&page_table_check_disabled))
+ return;
+
+ __page_table_check_pte_clear_range(mm, addr, pmd);
+}
+
#else
static inline void page_table_check_alloc(struct page *page, unsigned int order)
@@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm,
{
}
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+}
+
#endif /* CONFIG_PAGE_TABLE_CHECK */
#endif /* __LINUX_PAGE_TABLE_CHECK_H */
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 973fd731a520..83c7248053a1 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -37,8 +37,11 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
-/* Huge pages are a constant size */
-#define pageblock_order HUGETLB_PAGE_ORDER
+/*
+ * Huge pages are a constant size, but don't exceed the maximum allocation
+ * granularity.
+ */
+#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER - 1)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 270bf5136c34..58f395f3febe 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -18,6 +18,120 @@
struct folio_batch;
+unsigned long invalidate_mapping_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+
+static inline void invalidate_remote_inode(struct inode *inode)
+{
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode))
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
+}
+int invalidate_inode_pages2(struct address_space *mapping);
+int invalidate_inode_pages2_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+int write_inode_now(struct inode *, int sync);
+int filemap_fdatawrite(struct address_space *);
+int filemap_flush(struct address_space *);
+int filemap_fdatawait_keep_errors(struct address_space *mapping);
+int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
+int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte);
+
+static inline int filemap_fdatawait(struct address_space *mapping)
+{
+ return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
+}
+
+bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
+int filemap_write_and_wait_range(struct address_space *mapping,
+ loff_t lstart, loff_t lend);
+int __filemap_fdatawrite_range(struct address_space *mapping,
+ loff_t start, loff_t end, int sync_mode);
+int filemap_fdatawrite_range(struct address_space *mapping,
+ loff_t start, loff_t end);
+int filemap_check_errors(struct address_space *mapping);
+void __filemap_set_wb_err(struct address_space *mapping, int err);
+int filemap_fdatawrite_wbc(struct address_space *mapping,
+ struct writeback_control *wbc);
+
+static inline int filemap_write_and_wait(struct address_space *mapping)
+{
+ return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
+}
+
+/**
+ * filemap_set_wb_err - set a writeback error on an address_space
+ * @mapping: mapping in which to set writeback error
+ * @err: error to be set in mapping
+ *
+ * When writeback fails in some way, we must record that error so that
+ * userspace can be informed when fsync and the like are called. We endeavor
+ * to report errors on any file that was open at the time of the error. Some
+ * internal callers also need to know when writeback errors have occurred.
+ *
+ * When a writeback error occurs, most filesystems will want to call
+ * filemap_set_wb_err to record the error in the mapping so that it will be
+ * automatically reported whenever fsync is called on the file.
+ */
+static inline void filemap_set_wb_err(struct address_space *mapping, int err)
+{
+ /* Fastpath for common case of no error */
+ if (unlikely(err))
+ __filemap_set_wb_err(mapping, err);
+}
+
+/**
+ * filemap_check_wb_err - has an error occurred since the mark was sampled?
+ * @mapping: mapping to check for writeback errors
+ * @since: previously-sampled errseq_t
+ *
+ * Grab the errseq_t value from the mapping, and see if it has changed "since"
+ * the given value was sampled.
+ *
+ * If it has then report the latest error set, otherwise return 0.
+ */
+static inline int filemap_check_wb_err(struct address_space *mapping,
+ errseq_t since)
+{
+ return errseq_check(&mapping->wb_err, since);
+}
+
+/**
+ * filemap_sample_wb_err - sample the current errseq_t to test for later errors
+ * @mapping: mapping to be sampled
+ *
+ * Writeback errors are always reported relative to a particular sample point
+ * in the past. This function provides those sample points.
+ */
+static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
+{
+ return errseq_sample(&mapping->wb_err);
+}
+
+/**
+ * file_sample_sb_err - sample the current errseq_t to test for later errors
+ * @file: file pointer to be sampled
+ *
+ * Grab the most current superblock-level errseq_t value for the given
+ * struct file.
+ */
+static inline errseq_t file_sample_sb_err(struct file *file)
+{
+ return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+}
+
+/*
+ * Flush file data before changing attributes. Caller must hold any locks
+ * required to prevent further writes to this file until we're done setting
+ * flags.
+ */
+static inline int inode_drain_writes(struct inode *inode)
+{
+ inode_dio_wait(inode);
+ return filemap_write_and_wait(inode->i_mapping);
+}
+
static inline bool mapping_empty(struct address_space *mapping)
{
return xa_empty(&mapping->i_pages);
@@ -192,9 +306,14 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
+/*
+ * Large folio support currently depends on THP. These dependencies are
+ * being worked on but are not yet fixed.
+ */
static inline bool mapping_large_folio_support(struct address_space *mapping)
{
- return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
static inline int filemap_nr_thps(struct address_space *mapping)
@@ -212,7 +331,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
if (!mapping_large_folio_support(mapping))
atomic_inc(&mapping->nr_thps);
#else
- WARN_ON_ONCE(1);
+ WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
#endif
}
@@ -222,7 +341,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
if (!mapping_large_folio_support(mapping))
atomic_dec(&mapping->nr_thps);
#else
- WARN_ON_ONCE(1);
+ WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
#endif
}
@@ -283,16 +402,6 @@ static inline struct inode *folio_inode(struct folio *folio)
return folio->mapping->host;
}
-static inline bool page_cache_add_speculative(struct page *page, int count)
-{
- return folio_ref_try_add_rcu((struct folio *)page, count);
-}
-
-static inline bool page_cache_get_speculative(struct page *page)
-{
- return page_cache_add_speculative(page, 1);
-}
-
/**
* folio_attach_private - Attach private data to a folio.
* @folio: Folio to attach data to.
@@ -423,6 +532,24 @@ static inline struct folio *filemap_get_folio(struct address_space *mapping,
}
/**
+ * filemap_lock_folio - Find and lock a folio.
+ * @mapping: The address_space to search.
+ * @index: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @index. If a folio is
+ * present, it is returned locked with an increased refcount.
+ *
+ * Context: May sleep.
+ * Return: A folio or %NULL if there is no folio in the cache for this
+ * index. Will not return a shadow, swap or DAX entry.
+ */
+static inline struct folio *filemap_lock_folio(struct address_space *mapping,
+ pgoff_t index)
+{
+ return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
+}
+
+/**
* find_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
@@ -594,13 +721,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
pgoff_t end, unsigned int nr_pages,
struct page **pages);
-static inline unsigned find_get_pages(struct address_space *mapping,
- pgoff_t *start, unsigned int nr_pages,
- struct page **pages)
-{
- return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages,
- pages);
-}
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
@@ -636,15 +756,15 @@ extern int read_cache_pages(struct address_space *mapping,
struct list_head *pages, filler_t *filler, void *data);
static inline struct page *read_mapping_page(struct address_space *mapping,
- pgoff_t index, void *data)
+ pgoff_t index, struct file *file)
{
- return read_cache_page(mapping, index, NULL, data);
+ return read_cache_page(mapping, index, NULL, file);
}
static inline struct folio *read_mapping_folio(struct address_space *mapping,
- pgoff_t index, void *data)
+ pgoff_t index, struct file *file)
{
- return read_cache_folio(mapping, index, NULL, data);
+ return read_cache_folio(mapping, index, NULL, file);
}
/*
@@ -713,6 +833,17 @@ static inline loff_t folio_file_pos(struct folio *folio)
return page_file_offset(&folio->page);
}
+/*
+ * Get the offset in PAGE_SIZE (even for hugetlb folios).
+ * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
+ */
+static inline pgoff_t folio_pgoff(struct folio *folio)
+{
+ if (unlikely(folio_test_hugetlb(folio)))
+ return hugetlb_basepage_index(&folio->page);
+ return folio->index;
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
@@ -893,6 +1024,7 @@ static inline void cancel_dirty_page(struct page *page)
}
bool folio_clear_dirty_for_io(struct folio *folio);
bool clear_page_dirty_for_io(struct page *page);
+void folio_invalidate(struct folio *folio, size_t offset, size_t length);
int __must_check folio_write_one(struct folio *folio);
static inline int __must_check write_one_page(struct page *page)
{
@@ -900,7 +1032,7 @@ static inline int __must_check write_one_page(struct page *page)
}
int __set_page_dirty_nobuffers(struct page *page);
-int __set_page_dirty_no_writeback(struct page *page);
+bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
void page_endio(struct page *page, bool is_write, int err);
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index 6f7949b2fd8d..abeba356bc3f 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -2,7 +2,7 @@
#ifndef _LINUX_PART_STAT_H
#define _LINUX_PART_STAT_H
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
#include <asm/local.h>
struct disk_stats {
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 2512e2f9cd4e..0407a38b470a 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -26,6 +26,8 @@
*/
/* Event uses a 64bit counter */
#define ARMPMU_EVT_64BIT 1
+/* Event uses a 47bit counter */
+#define ARMPMU_EVT_47BIT 2
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 117f230bcdfd..733649184b27 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -693,18 +693,6 @@ struct perf_event {
u64 total_time_running;
u64 tstamp;
- /*
- * timestamp shadows the actual context timing but it can
- * be safely used in NMI interrupt context. It reflects the
- * context time as it was when the event was last scheduled in,
- * or when ctx_sched_in failed to schedule the event because we
- * run out of PMC.
- *
- * ctx_time already accounts for ctx->timestamp. Therefore to
- * compute ctx_time for a sample, simply add perf_clock().
- */
- u64 shadow_ctx_time;
-
struct perf_event_attr attr;
u16 header_size;
u16 id_header_size;
@@ -852,6 +840,7 @@ struct perf_event_context {
*/
u64 time;
u64 timestamp;
+ u64 timeoffset;
/*
* These fields let us detect when two contexts have both
@@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern {
struct perf_cgroup_info {
u64 time;
u64 timestamp;
+ u64 timeoffset;
+ int active;
};
struct perf_cgroup {
diff --git a/include/linux/perf_event_api.h b/include/linux/perf_event_api.h
new file mode 100644
index 000000000000..c2fd6048b790
--- /dev/null
+++ b/include/linux/perf_event_api.h
@@ -0,0 +1 @@
+#include <linux/perf_event.h>
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index bc8713a76e03..f4f4077b97aa 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
+#define pte_index pte_index
#ifndef pmd_index
static inline unsigned long pmd_index(unsigned long address)
diff --git a/include/linux/pgtable_api.h b/include/linux/pgtable_api.h
new file mode 100644
index 000000000000..ff367a4ba8c4
--- /dev/null
+++ b/include/linux/pgtable_api.h
@@ -0,0 +1 @@
+#include <linux/pgtable.h>
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6de8d7a90d78..8fa70ba063a5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -87,8 +87,8 @@ extern const int phy_10gbit_features_array[1];
*
* @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch
* @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined
- * @PHY_INTERFACE_MODE_MII: Median-independent interface
- * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface
+ * @PHY_INTERFACE_MODE_MII: Media-independent interface
+ * @PHY_INTERFACE_MODE_GMII: Gigabit media-independent interface
* @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface
* @PHY_INTERFACE_MODE_TBI: Ten Bit Interface
* @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7c7e627503d2..07481bb87d4e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -86,4 +86,9 @@ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
void pidhash_init(void);
void pid_idr_init(void);
+static inline bool task_is_in_init_pid_ns(struct task_struct *tsk)
+{
+ return task_active_pid_ns(tsk) == &init_pid_ns;
+}
+
#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 773daf7915a3..5df1ace6d2c9 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -16,7 +16,6 @@ struct platform_device;
* struct s3c64xx_spi_csinfo - ChipSelect description
* @fb_delay: Slave specific feedback delay.
* Refer to FB_CLK_SEL register definition in SPI chapter.
- * @line: Custom 'identity' of the CS line.
*
* This is per SPI-Slave Chipselect information.
* Allocate and initialize one in machine init code and make the
@@ -24,7 +23,6 @@ struct platform_device;
*/
struct s3c64xx_spi_csinfo {
u8 fb_delay;
- unsigned line;
};
/**
@@ -43,26 +41,16 @@ struct s3c64xx_spi_info {
/**
* s3c64xx_spi_set_platdata - SPI Controller configure callback by the board
* initialization code.
- * @cfg_gpio: Pointer to gpio setup function.
* @src_clk_nr: Clock the SPI controller is to use to generate SPI clocks.
* @num_cs: Number of elements in the 'cs' array.
*
* Call this from machine init code for each SPI Controller that
* has some chips attached to it.
*/
-extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs);
-extern void s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs);
-extern void s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
- int num_cs);
+extern void s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs);
/* defined by architecture to configure gpio */
extern int s3c64xx_spi0_cfg_gpio(void);
-extern int s3c64xx_spi1_cfg_gpio(void);
-extern int s3c64xx_spi2_cfg_gpio(void);
extern struct s3c64xx_spi_info s3c64xx_spi0_pdata;
-extern struct s3c64xx_spi_info s3c64xx_spi1_pdata;
-extern struct s3c64xx_spi_info s3c64xx_spi2_pdata;
#endif /*__SPI_S3C64XX_H */
diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/spi-intel.h
index 7f53a5c6f35e..a512ec37abbb 100644
--- a/include/linux/platform_data/x86/intel-spi.h
+++ b/include/linux/platform_data/x86/spi-intel.h
@@ -6,8 +6,8 @@
* Author: Mika Westerberg <mika.westerberg@linux.intel.com>
*/
-#ifndef INTEL_SPI_PDATA_H
-#define INTEL_SPI_PDATA_H
+#ifndef SPI_INTEL_PDATA_H
+#define SPI_INTEL_PDATA_H
enum intel_spi_type {
INTEL_SPI_BYT = 1,
@@ -19,11 +19,13 @@ enum intel_spi_type {
/**
* struct intel_spi_boardinfo - Board specific data for Intel SPI driver
* @type: Type which this controller is compatible with
- * @writeable: The chip is writeable
+ * @set_writeable: Try to make the chip writeable (optional)
+ * @data: Data to be passed to @set_writeable can be %NULL
*/
struct intel_spi_boardinfo {
enum intel_spi_type type;
- bool writeable;
+ bool (*set_writeable)(void __iomem *base, void *data);
+ void *data;
};
-#endif /* INTEL_SPI_PDATA_H */
+#endif /* SPI_INTEL_PDATA_H */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index f7d2be686359..e65b3ab28377 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state);
extern int dpm_suspend(pm_message_t state);
extern int dpm_prepare(pm_message_t state);
-extern void __suspend_report_result(const char *function, void *fn, int ret);
+extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret);
-#define suspend_report_result(fn, ret) \
+#define suspend_report_result(dev, fn, ret) \
do { \
- __suspend_report_result(__func__, fn, ret); \
+ __suspend_report_result(__func__, dev, fn, ret); \
} while (0)
extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
@@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state)
return 0;
}
-#define suspend_report_result(fn, ret) do {} while (0)
+#define suspend_report_result(dev, fn, ret) do {} while (0)
static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
{
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9f09601c465a..2bff6a10095d 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev)
* Allow the runtime PM autosuspend mechanism to be used for @dev whenever
* requested (or "autosuspend" will be handled as direct runtime-suspend for
* it).
+ *
+ * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend()
+ * at driver exit time unless your driver initially enabled pm_runtime
+ * with devm_pm_runtime_enable() (which handles it for you).
*/
static inline void pm_runtime_use_autosuspend(struct device *dev)
{
diff --git a/include/linux/psi.h b/include/linux/psi.h
index a70ca833c6d7..89784763d19e 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -6,6 +6,7 @@
#include <linux/psi_types.h>
#include <linux/sched.h>
#include <linux/poll.h>
+#include <linux/cgroup-defs.h>
struct seq_file;
struct css_set;
@@ -25,18 +26,17 @@ void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-
-#ifdef CONFIG_CGROUPS
-int psi_cgroup_alloc(struct cgroup *cgrp);
-void psi_cgroup_free(struct cgroup *cgrp);
-void cgroup_move_task(struct task_struct *p, struct css_set *to);
-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
+void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgrp);
+void psi_cgroup_free(struct cgroup *cgrp);
+void cgroup_move_task(struct task_struct *p, struct css_set *to);
#endif
#else /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 516c0fe836fd..c7fe7c089718 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -142,8 +142,8 @@ struct psi_trigger {
*/
u64 last_event_time;
- /* Refcounting to prevent premature destruction */
- struct kref refcount;
+ /* Deferred event(s) from previous ratelimit window */
+ bool pending_event;
};
struct psi_group {
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index eb93a54cff31..e97a8188f0fd 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -14,7 +14,7 @@
#include <linux/errno.h>
#include <linux/kmsg_dump.h>
#include <linux/mutex.h>
-#include <linux/semaphore.h>
+#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/types.h>
@@ -87,7 +87,7 @@ struct pstore_record {
* @owner: module which is responsible for this backend driver
* @name: name of the backend driver
*
- * @buf_lock: semaphore to serialize access to @buf
+ * @buf_lock: spinlock to serialize access to @buf
* @buf: preallocated crash dump buffer
* @bufsize: size of @buf available for crash dump bytes (must match
* smallest number of bytes available for writing to a
@@ -178,7 +178,7 @@ struct pstore_info {
struct module *owner;
const char *name;
- struct semaphore buf_lock;
+ spinlock_t buf_lock;
char *buf;
size_t bufsize;
diff --git a/include/linux/ptrace_api.h b/include/linux/ptrace_api.h
new file mode 100644
index 000000000000..26e7d275ad8d
--- /dev/null
+++ b/include/linux/ptrace_api.h
@@ -0,0 +1 @@
+#include <linux/ptrace.h>
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 18ebd39c9487..fd692b4a41d5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid);
*
* When there is no mapping defined for the user-namespace, type,
* qid tuple an invalid kqid is returned. Callers are expected to
- * test for and handle handle invalid kqids being returned.
+ * test for and handle invalid kqids being returned.
* Invalid kqids may be tested for using qid_valid().
*/
static inline struct kqid make_kqid(struct user_namespace *from,
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 2a9fee8ddae3..51b811b62322 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -11,13 +11,20 @@ struct xor_block_template {
struct xor_block_template *next;
const char *name;
int speed;
- void (*do_2)(unsigned long, unsigned long *, unsigned long *);
- void (*do_3)(unsigned long, unsigned long *, unsigned long *,
- unsigned long *);
- void (*do_4)(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *);
- void (*do_5)(unsigned long, unsigned long *, unsigned long *,
- unsigned long *, unsigned long *, unsigned long *);
+ void (*do_2)(unsigned long, unsigned long * __restrict,
+ const unsigned long * __restrict);
+ void (*do_3)(unsigned long, unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict);
+ void (*do_4)(unsigned long, unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict);
+ void (*do_5)(unsigned long, unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict,
+ const unsigned long * __restrict);
};
#endif
diff --git a/include/linux/random.h b/include/linux/random.h
index c45b2693e51f..f673fbb838b3 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -1,9 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/linux/random.h
- *
- * Include file for the random number generator.
- */
+
#ifndef _LINUX_RANDOM_H
#define _LINUX_RANDOM_H
@@ -14,14 +10,10 @@
#include <uapi/linux/random.h>
-struct random_ready_callback {
- struct list_head list;
- void (*func)(struct random_ready_callback *rdy);
- struct module *owner;
-};
+struct notifier_block;
-extern void add_device_randomness(const void *, unsigned int);
-extern void add_bootloader_randomness(const void *, unsigned int);
+extern void add_device_randomness(const void *, size_t);
+extern void add_bootloader_randomness(const void *, size_t);
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
static inline void add_latent_entropy(void)
@@ -36,14 +28,24 @@ static inline void add_latent_entropy(void) {}
extern void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
extern void add_interrupt_randomness(int irq) __latent_entropy;
+extern void add_hwgenerator_randomness(const void *buffer, size_t count,
+ size_t entropy);
+#if IS_ENABLED(CONFIG_VMGENID)
+extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+extern int register_random_vmfork_notifier(struct notifier_block *nb);
+extern int unregister_random_vmfork_notifier(struct notifier_block *nb);
+#else
+static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
+static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
+#endif
-extern void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes(void *buf, size_t nbytes);
extern int wait_for_random_bytes(void);
extern int __init rand_initialize(void);
extern bool rng_is_initialized(void);
-extern int add_random_ready_callback(struct random_ready_callback *rdy);
-extern void del_random_ready_callback(struct random_ready_callback *rdy);
-extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
+extern int register_random_ready_notifier(struct notifier_block *nb);
+extern int unregister_random_ready_notifier(struct notifier_block *nb);
+extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
#ifndef MODULE
extern const struct file_operations random_fops, urandom_fops;
@@ -87,7 +89,7 @@ static inline unsigned long get_random_canary(void)
/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
* Returns the result of the call to wait_for_random_bytes. */
-static inline int get_random_bytes_wait(void *buf, int nbytes)
+static inline int get_random_bytes_wait(void *buf, size_t nbytes)
{
int ret = wait_for_random_bytes();
get_random_bytes(buf, nbytes);
@@ -158,4 +160,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
}
#endif
+#ifdef CONFIG_SMP
+extern int random_prepare_cpu(unsigned int cpu);
+extern int random_online_cpu(unsigned int cpu);
+#endif
+
#endif /* _LINUX_RANDOM_H */
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index bebc911161b6..1468caf001c0 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_RANDOMIZE_KSTACK_H
#define _LINUX_RANDOMIZE_KSTACK_H
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <linux/percpu-defs.h>
@@ -16,8 +17,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
* alignment. Also, since this use is being explicitly masked to a max of
* 10 bits, stack-clash style attacks are unlikely. For more details see
* "VLAs" in Documentation/process/deprecated.rst
+ *
+ * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
+ * only with Clang and not GCC). Initializing the unused area on each syscall
+ * entry is expensive, and generating an implicit call to memset() may also be
+ * problematic (such as in noinstr functions). Therefore, if the compiler
+ * supports it (which it should if it initializes allocas), always use the
+ * "uninitialized" variant of the builtin.
*/
-void *__builtin_alloca(size_t size);
+#if __has_builtin(__builtin_alloca_uninitialized)
+#define __kstack_alloca __builtin_alloca_uninitialized
+#else
+#define __kstack_alloca __builtin_alloca
+#endif
+
/*
* Use, at most, 10 bits of entropy. We explicitly cap this to keep the
* "VLA" from being unbounded (see above). 10 bits leaves enough room for
@@ -36,7 +49,7 @@ void *__builtin_alloca(size_t size);
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \
+ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
/* Keep allocation even after "ptr" loses scope. */ \
asm volatile("" :: "r"(ptr) : "memory"); \
} \
@@ -50,5 +63,9 @@ void *__builtin_alloca(size_t size);
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
+#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+#define add_random_kstack_offset() do { } while (0)
+#define choose_random_kstack_offset(rand) do { } while (0)
+#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 88b42eb46406..e7c39c200e2b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */
void rcu_init(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
void rcu_sched_clock_irq(int user);
void rcu_report_dead(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu);
@@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
*
* kvfree_rcu(ptr);
*
- * where @ptr is a pointer to kvfree().
+ * where @ptr is the pointer to be freed by kvfree().
*
* Please note, head-less way of freeing is permitted to
* use from a context that has to follow might_sleep()
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 858f4d429946..5fed476f977f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void)
rcu_tasks_qs(current, (preempt)); \
} while (0)
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+static inline int rcu_needs_cpu(void)
{
- *nextevt = KTIME_MAX;
return 0;
}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 53209d669400..9c6cfb742504 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -19,7 +19,7 @@
void rcu_softirq_qs(void);
void rcu_note_context_switch(bool preempt);
-int rcu_needs_cpu(u64 basem, u64 *nextevt);
+int rcu_needs_cpu(void);
void rcu_cpu_stall_reset(void);
/*
@@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { }
void exit_rcu(void);
void rcu_scheduler_starting(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
void rcu_end_inkernel_boot(void);
bool rcu_inkernel_boot_has_ended(void);
bool rcu_is_watching(void);
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 61c56cca95c4..8052d34da782 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w)
rcu_assign_pointer(w->task, current);
}
-static inline void finish_rcuwait(struct rcuwait *w)
-{
- rcu_assign_pointer(w->task, NULL);
- __set_current_state(TASK_RUNNING);
-}
+extern void finish_rcuwait(struct rcuwait *w);
#define rcuwait_wait_event(w, condition, state) \
({ \
diff --git a/include/linux/rcuwait_api.h b/include/linux/rcuwait_api.h
new file mode 100644
index 000000000000..f962e28544dd
--- /dev/null
+++ b/include/linux/rcuwait_api.h
@@ -0,0 +1 @@
+#include <linux/rcuwait.h>
diff --git a/include/linux/refcount_api.h b/include/linux/refcount_api.h
new file mode 100644
index 000000000000..5f032589f568
--- /dev/null
+++ b/include/linux/refcount_api.h
@@ -0,0 +1 @@
+#include <linux/refcount.h>
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 22652e5fbc38..de81a94d7b30 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -237,6 +237,10 @@ typedef void (*regmap_unlock)(void *);
* @reg_stride: The register address stride. Valid register addresses are a
* multiple of this value. If set to 0, a value of 1 will be
* used.
+ * @reg_downshift: The number of bits to downshift the register before
+ * performing any operations.
+ * @reg_base: Value to be added to every register address before performing any
+ * operation.
* @pad_bits: Number of bits of padding between register and value.
* @val_bits: Number of bits in a register value, mandatory.
*
@@ -360,6 +364,8 @@ struct regmap_config {
int reg_bits;
int reg_stride;
+ int reg_downshift;
+ unsigned int reg_base;
int pad_bits;
int val_bits;
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c35f3962dc4f..373003ace639 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -308,6 +308,11 @@ static inline bool rfkill_blocked(struct rfkill *rfkill)
return false;
}
+static inline bool rfkill_soft_blocked(struct rfkill *rfkill)
+{
+ return false;
+}
+
static inline enum rfkill_type rfkill_find_type(const char *name)
{
return RFKILL_TYPE_ALL;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e704b1a4c06c..17230c458341 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -11,6 +11,7 @@
#include <linux/rwsem.h>
#include <linux/memcontrol.h>
#include <linux/highmem.h>
+#include <linux/pagemap.h>
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -167,18 +168,19 @@ struct anon_vma *page_get_anon_vma(struct page *page);
*/
void page_move_anon_rmap(struct page *, struct vm_area_struct *);
void page_add_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long, bool);
+ unsigned long address, bool compound);
void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long, int);
+ unsigned long address, int flags);
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long, bool);
-void page_add_file_rmap(struct page *, bool);
-void page_remove_rmap(struct page *, bool);
-
+ unsigned long address, bool compound);
+void page_add_file_rmap(struct page *, struct vm_area_struct *,
+ bool compound);
+void page_remove_rmap(struct page *, struct vm_area_struct *,
+ bool compound);
void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long);
+ unsigned long address);
void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long);
+ unsigned long address);
static inline void page_dup_rmap(struct page *page, bool compound)
{
@@ -188,11 +190,11 @@ static inline void page_dup_rmap(struct page *page, bool compound)
/*
* Called from mm/vmscan.c to handle paging out
*/
-int page_referenced(struct page *, int is_locked,
+int folio_referenced(struct folio *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
-void try_to_migrate(struct page *page, enum ttu_flags flags);
-void try_to_unmap(struct page *, enum ttu_flags flags);
+void try_to_migrate(struct folio *folio, enum ttu_flags flags);
+void try_to_unmap(struct folio *, enum ttu_flags flags);
int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct page **pages,
@@ -200,11 +202,13 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
/* Avoid racy checks */
#define PVMW_SYNC (1 << 0)
-/* Look for migarion entries rather than present PTEs */
+/* Look for migration entries rather than present PTEs */
#define PVMW_MIGRATION (1 << 1)
struct page_vma_mapped_walk {
- struct page *page;
+ unsigned long pfn;
+ unsigned long nr_pages;
+ pgoff_t pgoff;
struct vm_area_struct *vma;
unsigned long address;
pmd_t *pmd;
@@ -213,10 +217,30 @@ struct page_vma_mapped_walk {
unsigned int flags;
};
+#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \
+ struct page_vma_mapped_walk name = { \
+ .pfn = page_to_pfn(_page), \
+ .nr_pages = compound_nr(page), \
+ .pgoff = page_to_pgoff(page), \
+ .vma = _vma, \
+ .address = _address, \
+ .flags = _flags, \
+ }
+
+#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \
+ struct page_vma_mapped_walk name = { \
+ .pfn = folio_pfn(_folio), \
+ .nr_pages = folio_nr_pages(_folio), \
+ .pgoff = folio_pgoff(_folio), \
+ .vma = _vma, \
+ .address = _address, \
+ .flags = _flags, \
+ }
+
static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
{
/* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
- if (pvmw->pte && !PageHuge(pvmw->page))
+ if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma))
pte_unmap(pvmw->pte);
if (pvmw->ptl)
spin_unlock(pvmw->ptl);
@@ -237,18 +261,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
*/
int folio_mkclean(struct folio *);
-/*
- * called in munlock()/munmap() path to check for other vmas holding
- * the page mlocked.
- */
-void page_mlock(struct page *page);
-
-void remove_migration_ptes(struct page *old, struct page *new, bool locked);
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
/*
* Called by memory-failure.c to kill processes.
*/
-struct anon_vma *page_lock_anon_vma_read(struct page *page);
+struct anon_vma *folio_lock_anon_vma_read(struct folio *folio);
void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
@@ -267,15 +285,15 @@ struct rmap_walk_control {
* Return false if page table scanning in rmap_walk should be stopped.
* Otherwise, return true.
*/
- bool (*rmap_one)(struct page *page, struct vm_area_struct *vma,
+ bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, void *arg);
- int (*done)(struct page *page);
- struct anon_vma *(*anon_lock)(struct page *page);
+ int (*done)(struct folio *folio);
+ struct anon_vma *(*anon_lock)(struct folio *folio);
bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
};
-void rmap_walk(struct page *page, struct rmap_walk_control *rwc);
-void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc);
+void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc);
+void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc);
#else /* !CONFIG_MMU */
@@ -283,7 +301,7 @@ void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc);
#define anon_vma_prepare(vma) (0)
#define anon_vma_link(vma) do {} while (0)
-static inline int page_referenced(struct page *page, int is_locked,
+static inline int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg,
unsigned long *vm_flags)
{
@@ -291,7 +309,7 @@ static inline int page_referenced(struct page *page, int is_locked,
return 0;
}
-static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
+static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags)
{
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 95df357ec009..dffeb8281c2d 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -28,14 +28,9 @@ struct seq_file;
*/
struct sbitmap_word {
/**
- * @depth: Number of bits being used in @word/@cleared
- */
- unsigned long depth;
-
- /**
* @word: word holding free bits
*/
- unsigned long word ____cacheline_aligned_in_smp;
+ unsigned long word;
/**
* @cleared: word holding cleared bits
@@ -140,7 +135,7 @@ struct sbitmap_queue {
/**
* @min_shallow_depth: The minimum shallow depth which may be passed to
- * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+ * sbitmap_queue_get_shallow()
*/
unsigned int min_shallow_depth;
};
@@ -164,6 +159,14 @@ struct sbitmap_queue {
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
gfp_t flags, int node, bool round_robin, bool alloc_hint);
+/* sbitmap internal helper */
+static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
+{
+ if (index == sb->map_nr - 1)
+ return sb->depth - (index << sb->shift);
+ return 1U << sb->shift;
+}
+
/**
* sbitmap_free() - Free memory used by a &struct sbitmap.
* @sb: Bitmap to free.
@@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
while (scanned < sb->depth) {
unsigned long word;
unsigned int depth = min_t(unsigned int,
- sb->map[index].depth - nr,
+ __map_depth(sb, index) - nr,
sb->depth - scanned);
scanned += depth;
@@ -460,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
unsigned int *offset);
/**
- * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
+ * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
* sbitmap_queue, limiting the depth used from each word, with preemption
* already disabled.
* @sbq: Bitmap queue to allocate from.
@@ -472,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
*
* Return: Non-negative allocated bit number if successful, -1 otherwise.
*/
-int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
- unsigned int shallow_depth);
+int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+ unsigned int shallow_depth);
/**
* sbitmap_queue_get() - Try to allocate a free bit from a &struct
@@ -496,32 +499,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
}
/**
- * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
- * sbitmap_queue, limiting the depth used from each word.
- * @sbq: Bitmap queue to allocate from.
- * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
- * sbitmap_queue_clear()).
- * @shallow_depth: The maximum number of bits to allocate from a single word.
- * See sbitmap_get_shallow().
- *
- * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
- * initializing @sbq.
- *
- * Return: Non-negative allocated bit number if successful, -1 otherwise.
- */
-static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
- unsigned int *cpu,
- unsigned int shallow_depth)
-{
- int nr;
-
- *cpu = get_cpu();
- nr = __sbitmap_queue_get_shallow(sbq, shallow_depth);
- put_cpu();
- return nr;
-}
-
-/**
* sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
* minimum shallow depth that will be used.
* @sbq: Bitmap queue in question.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 508b91d57470..ff6901dcb06d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -619,10 +619,6 @@ struct sched_dl_entity {
* task has to wait for a replenishment to be performed at the
* next firing of dl_timer.
*
- * @dl_boosted tells if we are boosted due to DI. If so we are
- * outside bandwidth enforcement mechanism (but only until we
- * exit the critical section);
- *
* @dl_yielded tells if task gave up the CPU before consuming
* all its available runtime during the last job.
*
@@ -942,6 +938,9 @@ struct task_struct {
/* Recursion prevention for eventfd_signal() */
unsigned in_eventfd_signal:1;
#endif
+#ifdef CONFIG_IOMMU_SVA
+ unsigned pasid_activated:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -1091,6 +1090,9 @@ struct task_struct {
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+ struct kernel_siginfo forced_info;
+#endif
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
@@ -1624,19 +1626,32 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
-static inline unsigned int task_state_index(struct task_struct *tsk)
+static inline unsigned int __task_state_index(unsigned int tsk_state,
+ unsigned int tsk_exit_state)
{
- unsigned int tsk_state = READ_ONCE(tsk->__state);
- unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+ unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
if (tsk_state == TASK_IDLE)
state = TASK_REPORT_IDLE;
+ /*
+ * We're lying here, but rather than expose a completely new task state
+ * to userspace, we can make this appear as if the task has gone through
+ * a regular rt_mutex_lock() call.
+ */
+ if (tsk_state == TASK_RTLOCK_WAIT)
+ state = TASK_UNINTERRUPTIBLE;
+
return fls(state);
}
+static inline unsigned int task_state_index(struct task_struct *tsk)
+{
+ return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
+}
+
static inline char task_index_to_char(unsigned int state)
{
static const char state_char[] = "RSDTtXZPI";
@@ -1684,7 +1699,6 @@ extern struct pid *cad_pid;
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
-#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
@@ -1694,7 +1708,6 @@ extern struct pid *cad_pid;
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
-#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
@@ -2020,7 +2033,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
extern int __cond_resched(void);
-#ifdef CONFIG_PREEMPT_DYNAMIC
+#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
@@ -2029,6 +2042,14 @@ static __always_inline int _cond_resched(void)
return static_call_mod(cond_resched)();
}
+#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+extern int dynamic_cond_resched(void);
+
+static __always_inline int _cond_resched(void)
+{
+ return dynamic_cond_resched();
+}
+
#else
static inline int _cond_resched(void)
diff --git a/include/linux/sched/affinity.h b/include/linux/sched/affinity.h
new file mode 100644
index 000000000000..227f5be81bcd
--- /dev/null
+++ b/include/linux/sched/affinity.h
@@ -0,0 +1 @@
+#include <linux/sched.h>
diff --git a/include/linux/sched/cond_resched.h b/include/linux/sched/cond_resched.h
new file mode 100644
index 000000000000..227f5be81bcd
--- /dev/null
+++ b/include/linux/sched/cond_resched.h
@@ -0,0 +1 @@
+#include <linux/sched.h>
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 1aff00b65f3c..7c83d4d5a971 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -6,6 +6,8 @@
* NORMAL/BATCH tasks.
*/
+#include <linux/sched.h>
+
#define MAX_DL_PRIO 0
static inline int dl_prio(int prio)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index cc9f393e2a70..8c15abd67aed 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -5,54 +5,55 @@
#include <linux/init.h>
#include <linux/tick.h>
-enum hk_flags {
- HK_FLAG_TIMER = 1,
- HK_FLAG_RCU = (1 << 1),
- HK_FLAG_MISC = (1 << 2),
- HK_FLAG_SCHED = (1 << 3),
- HK_FLAG_TICK = (1 << 4),
- HK_FLAG_DOMAIN = (1 << 5),
- HK_FLAG_WQ = (1 << 6),
- HK_FLAG_MANAGED_IRQ = (1 << 7),
- HK_FLAG_KTHREAD = (1 << 8),
+enum hk_type {
+ HK_TYPE_TIMER,
+ HK_TYPE_RCU,
+ HK_TYPE_MISC,
+ HK_TYPE_SCHED,
+ HK_TYPE_TICK,
+ HK_TYPE_DOMAIN,
+ HK_TYPE_WQ,
+ HK_TYPE_MANAGED_IRQ,
+ HK_TYPE_KTHREAD,
+ HK_TYPE_MAX
};
#ifdef CONFIG_CPU_ISOLATION
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
-extern int housekeeping_any_cpu(enum hk_flags flags);
-extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
-extern bool housekeeping_enabled(enum hk_flags flags);
-extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
-extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
+extern int housekeeping_any_cpu(enum hk_type type);
+extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
+extern bool housekeeping_enabled(enum hk_type type);
+extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
+extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
extern void __init housekeeping_init(void);
#else
-static inline int housekeeping_any_cpu(enum hk_flags flags)
+static inline int housekeeping_any_cpu(enum hk_type type)
{
return smp_processor_id();
}
-static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
+static inline const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
return cpu_possible_mask;
}
-static inline bool housekeeping_enabled(enum hk_flags flags)
+static inline bool housekeeping_enabled(enum hk_type type)
{
return false;
}
static inline void housekeeping_affine(struct task_struct *t,
- enum hk_flags flags) { }
+ enum hk_type type) { }
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */
-static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
+static inline bool housekeeping_cpu(int cpu, enum hk_type type)
{
#ifdef CONFIG_CPU_ISOLATION
if (static_branch_unlikely(&housekeeping_overridden))
- return housekeeping_test_cpu(cpu, flags);
+ return housekeeping_test_cpu(cpu, type);
#endif
return true;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aa5f09ca5bcf..a80356e9dc69 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -8,6 +8,7 @@
#include <linux/mm_types.h>
#include <linux/gfp.h>
#include <linux/sync_core.h>
+#include <linux/ioasid.h>
/*
* Routines for handling mm_structs
@@ -433,4 +434,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
}
#endif
+#ifdef CONFIG_IOMMU_SVA
+static inline void mm_pasid_init(struct mm_struct *mm)
+{
+ mm->pasid = INVALID_IOASID;
+}
+
+/* Associate a PASID with an mm_struct: */
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
+{
+ mm->pasid = pasid;
+}
+
+static inline void mm_pasid_drop(struct mm_struct *mm)
+{
+ if (pasid_valid(mm->pasid)) {
+ ioasid_free(mm->pasid);
+ mm->pasid = INVALID_IOASID;
+ }
+}
+#else
+static inline void mm_pasid_init(struct mm_struct *mm) {}
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
+static inline void mm_pasid_drop(struct mm_struct *mm) {}
+#endif
+
#endif /* _LINUX_SCHED_MM_H */
diff --git a/include/linux/sched/posix-timers.h b/include/linux/sched/posix-timers.h
new file mode 100644
index 000000000000..523a381d6c88
--- /dev/null
+++ b/include/linux/sched/posix-timers.h
@@ -0,0 +1 @@
+#include <linux/posix-timers.h>
diff --git a/include/linux/sched/rseq_api.h b/include/linux/sched/rseq_api.h
new file mode 100644
index 000000000000..cf2af72693e1
--- /dev/null
+++ b/include/linux/sched/rseq_api.h
@@ -0,0 +1 @@
+#include <linux/rseq.h>
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index c19dd5a2c05c..c1076b5e17fb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -23,6 +23,16 @@ enum sched_tunable_scaling {
SCHED_TUNABLESCALING_END,
};
+#define NUMA_BALANCING_DISABLED 0x0
+#define NUMA_BALANCING_NORMAL 0x1
+#define NUMA_BALANCING_MEMORY_TIERING 0x2
+
+#ifdef CONFIG_NUMA_BALANCING
+extern int sysctl_numa_balancing_mode;
+#else
+#define sysctl_numa_balancing_mode 0
+#endif
+
/*
* control realtime throttling:
*
@@ -45,10 +55,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
#endif
-#ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-#endif
-
extern int sysctl_sched_rr_timeslice;
extern int sched_rr_timeslice;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index b9198a1b3a84..e84e54d1b490 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_post_fork(struct task_struct *p,
- struct kernel_clone_args *kargs);
+extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+extern void sched_post_fork(struct task_struct *p);
extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void);
diff --git a/include/linux/sched/task_flags.h b/include/linux/sched/task_flags.h
new file mode 100644
index 000000000000..227f5be81bcd
--- /dev/null
+++ b/include/linux/sched/task_flags.h
@@ -0,0 +1 @@
+#include <linux/sched.h>
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d81..892562ebbd3a 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
static inline void put_task_stack(struct task_struct *tsk) {}
#endif
+void exit_task_stack_account(struct task_struct *tsk);
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/include/linux/sched/thread_info_api.h b/include/linux/sched/thread_info_api.h
new file mode 100644
index 000000000000..2c60fbc16c08
--- /dev/null
+++ b/include/linux/sched/thread_info_api.h
@@ -0,0 +1 @@
+#include <linux/thread_info.h>
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 8054641c0a7b..56cffe42abbc 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -93,6 +93,7 @@ struct sched_domain {
unsigned int busy_factor; /* less balancing by factor if busy */
unsigned int imbalance_pct; /* No balance until over watermark */
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
+ unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */
int nohz_idle; /* NOHZ IDLE status */
int flags; /* See SD_* */
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 835ee87ed792..cb41c5edb4d4 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -5,6 +5,8 @@
#ifndef LINUX_SCHED_CLOCK
#define LINUX_SCHED_CLOCK
+#include <linux/types.h>
+
#ifdef CONFIG_GENERIC_SCHED_CLOCK
/**
* struct clock_read_data - data required to read from sched_clock()
diff --git a/include/linux/security.h b/include/linux/security.h
index 6d72772182c8..25b3ef71f495 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
struct sockaddr *address, int addrlen);
void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
struct sock *newsk);
+int security_sctp_assoc_established(struct sctp_association *asoc,
+ struct sk_buff *skb);
#else /* CONFIG_SECURITY_NETWORK */
static inline int security_unix_stream_connect(struct sock *sock,
@@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
struct sock *newsk)
{
}
+
+static inline int security_sctp_assoc_established(struct sctp_association *asoc,
+ struct sk_buff *skb)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/seqlock_api.h b/include/linux/seqlock_api.h
new file mode 100644
index 000000000000..be91e7d3b826
--- /dev/null
+++ b/include/linux/seqlock_api.h
@@ -0,0 +1 @@
+#include <linux/seqlock.h>
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e65b80ed09e7..ab51d3cd39bd 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -24,6 +24,7 @@ struct shmem_inode_info {
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
atomic_t stop_eviction; /* hold when working on inode */
+ struct timespec64 i_crtime; /* file creation time */
struct inode vfs_inode;
};
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf11e1fbd69b..8a636e678902 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -318,7 +318,7 @@ enum skb_drop_reason {
SKB_DROP_REASON_NO_SOCKET,
SKB_DROP_REASON_PKT_TOO_SMALL,
SKB_DROP_REASON_TCP_CSUM,
- SKB_DROP_REASON_TCP_FILTER,
+ SKB_DROP_REASON_SOCKET_FILTER,
SKB_DROP_REASON_UDP_CSUM,
SKB_DROP_REASON_MAX,
};
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 37bde99b74af..e6addaf91afd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -135,6 +135,7 @@
#include <linux/kasan.h>
+struct list_lru;
struct mem_cgroup;
/*
* struct kmem_cache related prototypes
@@ -416,6 +417,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
+void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
/*
@@ -660,8 +663,7 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag
* allocator where we care about the real place the memory allocation
* request comes from.
*/
-extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
- __alloc_size(1);
+extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
#define kmalloc_track_caller(size, flags) \
__kmalloc_track_caller(size, flags, _RET_IP_)
diff --git a/include/linux/softirq.h b/include/linux/softirq.h
new file mode 100644
index 000000000000..c73d7dcb4cb5
--- /dev/null
+++ b/include/linux/softirq.h
@@ -0,0 +1 @@
+#include <linux/interrupt.h>
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index ca74dce36706..4658e7801b42 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -42,7 +42,6 @@ struct pxa2xx_spi_chip {
u8 rx_threshold;
u8 dma_burst_size;
u32 timeout;
- int gpio_cs;
};
#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h
index 440a71593162..9b8bb22d5b0c 100644
--- a/include/linux/spi/s3c24xx.h
+++ b/include/linux/spi/s3c24xx.h
@@ -10,14 +10,9 @@
#define __LINUX_SPI_S3C24XX_H __FILE__
struct s3c2410_spi_info {
- int pin_cs; /* simple gpio cs */
unsigned int num_cs; /* total chipselects */
int bus_num; /* bus number to use. */
-
unsigned int use_fiq:1; /* use fiq */
-
- void (*gpio_setup)(struct s3c2410_spi_info *spi, int enable);
- void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol);
};
extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7ab3fed7b804..7d005fa4631c 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -16,6 +16,7 @@
#include <linux/gpio/consumer.h>
#include <uapi/linux/spi/spi.h>
+#include <linux/acpi.h>
struct dma_chan;
struct software_node;
@@ -136,9 +137,6 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer);
* for driver coldplugging, and in uevents used for hotplugging
* @driver_override: If the name of a driver is written to this attribute, then
* the device will bind to the named driver and only the named driver.
- * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when
- * not using a GPIO line) use cs_gpiod in new drivers by opting in on
- * the spi_master.
* @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
* not using a GPIO line)
* @word_delay: delay to be inserted between consecutive
@@ -185,7 +183,6 @@ struct spi_device {
void *controller_data;
char modalias[SPI_NAME_SIZE];
const char *driver_override;
- int cs_gpio; /* LEGACY: chip select gpio */
struct gpio_desc *cs_gpiod; /* chip select gpio desc */
struct spi_delay word_delay; /* inter-word delay */
/* CS delays */
@@ -280,7 +277,7 @@ struct spi_message;
struct spi_driver {
const struct spi_device_id *id_table;
int (*probe)(struct spi_device *spi);
- int (*remove)(struct spi_device *spi);
+ void (*remove)(struct spi_device *spi);
void (*shutdown)(struct spi_device *spi);
struct device_driver driver;
};
@@ -373,7 +370,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @cur_msg_prepared: spi_prepare_message was called for the currently
* in-flight message
* @cur_msg_mapped: message has been mapped for DMA
- * @last_cs_enable: was enable true on the last call to set_cs.
+ * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip
+ * selected
* @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
* @xfer_completion: used by core transfer_one_message()
* @busy: message pump is busy
@@ -417,17 +415,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* controller has native support for memory like operations.
* @unprepare_message: undo any work done by prepare_message().
* @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per
- * CS number. Any individual value may be -ENOENT for CS lines that
- * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods
- * in new drivers.
* @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
* number. Any individual value may be NULL for CS lines that
* are not GPIOs (driven by the SPI controller itself).
* @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
- * GPIO descriptors rather than using global GPIO numbers grabbed by the
- * driver. This will fill in @cs_gpiods and @cs_gpios should not be used,
- * and SPI devices will have the cs_gpiod assigned rather than cs_gpio.
+ * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have
+ * the cs_gpiod assigned if a GPIO line is found for the chipselect.
* @unused_native_cs: When cs_gpiods is used, spi_register_controller() will
* fill in this field with the first unused native CS, to be used by SPI
* controller drivers that need to drive a native CS when using GPIO CS.
@@ -611,7 +604,7 @@ struct spi_controller {
bool auto_runtime_pm;
bool cur_msg_prepared;
bool cur_msg_mapped;
- bool last_cs_enable;
+ char last_cs;
bool last_cs_mode_high;
bool fallback;
struct completion xfer_completion;
@@ -641,7 +634,6 @@ struct spi_controller {
const struct spi_controller_mem_ops *mem_ops;
/* gpio chip select */
- int *cs_gpios;
struct gpio_desc **cs_gpiods;
bool use_gpio_descriptors;
s8 unused_native_cs;
@@ -759,6 +751,13 @@ extern int devm_spi_register_controller(struct device *dev,
struct spi_controller *ctlr);
extern void spi_unregister_controller(struct spi_controller *ctlr);
+#if IS_ENABLED(CONFIG_ACPI)
+extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+ struct acpi_device *adev,
+ int index);
+int acpi_spi_count_resources(struct acpi_device *adev);
+#endif
+
/*
* SPI resource management while processing a SPI message
*/
@@ -1452,8 +1451,20 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
* use spi_new_device() to describe each device. You can also call
* spi_unregister_device() to start making that device vanish, but
* normally that would be handled by spi_unregister_controller().
+ *
+ * You can also use spi_alloc_device() and spi_add_device() to use a two
+ * stage registration sequence for each spi_device. This gives the caller
+ * some more control over the spi_device structure before it is registered,
+ * but requires that caller to initialize fields that would otherwise
+ * be defined using the board info.
*/
extern struct spi_device *
+spi_alloc_device(struct spi_controller *ctlr);
+
+extern int
+spi_add_device(struct spi_device *spi);
+
+extern struct spi_device *
spi_new_device(struct spi_controller *, struct spi_board_info *);
extern void spi_unregister_device(struct spi_device *spi);
diff --git a/include/linux/spinlock_api.h b/include/linux/spinlock_api.h
new file mode 100644
index 000000000000..6338b27f98df
--- /dev/null
+++ b/include/linux/spinlock_api.h
@@ -0,0 +1 @@
+#include <linux/spinlock.h>
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f35c22b3355f..a5dda4987e8b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -52,24 +52,6 @@ struct svc_pool {
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
-struct svc_serv;
-
-struct svc_serv_ops {
- /* Callback to use when last thread exits. */
- void (*svo_shutdown)(struct svc_serv *, struct net *);
-
- /* function for service threads to run */
- int (*svo_function)(void *);
-
- /* queue up a transport for servicing */
- void (*svo_enqueue_xprt)(struct svc_xprt *);
-
- /* optional module to count when adding threads.
- * Thread function must call module_put_and_kthread_exit() to exit.
- */
- struct module *svo_module;
-};
-
/*
* RPC service.
*
@@ -102,7 +84,8 @@ struct svc_serv {
unsigned int sv_nrpools; /* number of thread pools */
struct svc_pool * sv_pools; /* array of thread pools */
- const struct svc_serv_ops *sv_ops; /* server operations */
+ int (*sv_threadfn)(void *data);
+
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
@@ -503,7 +486,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
int svc_bind(struct svc_serv *serv, struct net *net);
struct svc_serv *svc_create(struct svc_program *, unsigned int,
- const struct svc_serv_ops *);
+ int (*threadfn)(void *data));
struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
void svc_rqst_replace_page(struct svc_rqst *rqstp,
@@ -511,10 +494,9 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
- const struct svc_serv_ops *);
+ int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
-void svc_shutdown_net(struct svc_serv *, struct net *);
int svc_process(struct svc_rqst *);
int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
struct svc_rqst *);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 571f605bc91e..20068ccfd0cc 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -28,6 +28,7 @@ struct svc_xprt_ops {
void (*xpo_free)(struct svc_xprt *);
void (*xpo_secure_port)(struct svc_rqst *rqstp);
void (*xpo_kill_temp_xprt)(struct svc_xprt *);
+ void (*xpo_start_tls)(struct svc_xprt *);
};
struct svc_xprt_class {
@@ -127,15 +128,16 @@ int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
-int svc_create_xprt(struct svc_serv *, const char *, struct net *,
- const int, const unsigned short, int,
- const struct cred *);
+int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags,
+ const struct cred *cred);
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
void svc_xprt_received(struct svc_xprt *xprt);
-void svc_xprt_do_enqueue(struct svc_xprt *xprt);
void svc_xprt_enqueue(struct svc_xprt *xprt);
void svc_xprt_put(struct svc_xprt *xprt);
void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
-void svc_close_xprt(struct svc_xprt *xprt);
+void svc_xprt_close(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b519609af1d0..4417f667c757 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
return -EBADMSG;
+ if (len > SIZE_MAX / sizeof(*p))
+ return -EBADMSG;
p = xdr_inline_decode(xdr, len * sizeof(*p));
if (unlikely(!p))
return -EBADMSG;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5785d909c321..300273ff40cc 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -430,15 +430,7 @@ struct platform_hibernation_ops {
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
-extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
-static inline void __init register_nosave_region(unsigned long b, unsigned long e)
-{
- __register_nosave_region(b, e, 0);
-}
-static inline void __init register_nosave_region_late(unsigned long b, unsigned long e)
-{
- __register_nosave_region(b, e, 1);
-}
+extern void register_nosave_region(unsigned long b, unsigned long e);
extern int swsusp_page_is_forbidden(struct page *);
extern void swsusp_set_page_free(struct page *);
extern void swsusp_unset_page_free(struct page *);
@@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn);
int hibernate_quiet_exec(int (*func)(void *data), void *data);
#else /* CONFIG_HIBERNATION */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
-static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
static inline void swsusp_set_page_free(struct page *p) {}
static inline void swsusp_unset_page_free(struct page *p) {}
@@ -506,14 +497,14 @@ extern void ksys_sync_helper(void);
/* drivers/base/power/wakeup.c */
extern bool events_check_enabled;
-extern unsigned int pm_wakeup_irq;
extern suspend_state_t pm_suspend_target_state;
extern bool pm_wakeup_pending(void);
extern void pm_system_wakeup(void);
extern void pm_system_cancel_wakeup(void);
-extern void pm_wakeup_clear(bool reset);
+extern void pm_wakeup_clear(unsigned int irq_number);
extern void pm_system_irq_wakeup(unsigned int irq_number);
+extern unsigned int pm_wakeup_irq(void);
extern bool pm_get_wakeup_count(unsigned int *count, bool block);
extern bool pm_save_wakeup_count(unsigned int count);
extern void pm_wakep_autosleep_enabled(bool set);
diff --git a/include/linux/swait_api.h b/include/linux/swait_api.h
new file mode 100644
index 000000000000..1eeaaaaa5ea7
--- /dev/null
+++ b/include/linux/swait_api.h
@@ -0,0 +1 @@
+#include <linux/swait.h>
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1d38d9475c4d..f37837c614c5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -328,15 +328,18 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
-void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
+void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
void workingset_activation(struct folio *folio);
/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
+extern struct list_lru shadow_nodes;
#define mapping_set_update(xas, mapping) do { \
- if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \
+ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \
xas_set_update(xas, workingset_update_node); \
+ xas_set_lru(xas, &shadow_nodes); \
+ } \
} while (0)
/* linux/mm/page_alloc.c */
@@ -372,7 +375,6 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
-extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);
@@ -384,7 +386,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
@@ -395,7 +396,7 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
unsigned long *nr_scanned);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
-extern int remove_mapping(struct address_space *mapping, struct page *page);
+long remove_mapping(struct address_space *mapping, struct folio *folio);
extern unsigned long reclaim_pages(struct list_head *page_list);
#ifdef CONFIG_NUMA
@@ -427,7 +428,7 @@ extern int swap_writepage(struct page *page, struct writeback_control *wbc);
extern void end_swap_bio_write(struct bio *bio);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
bio_end_io_t end_write_func);
-extern int swap_set_page_dirty(struct page *page);
+bool swap_dirty_folio(struct address_space *mapping, struct folio *folio);
int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
unsigned long nr_pages, sector_t start_block);
@@ -741,7 +742,7 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
#endif
#ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
{
@@ -761,7 +762,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
extern bool mem_cgroup_swap_full(struct page *page);
#else
-static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
{
}
diff --git a/include/linux/syscalls_api.h b/include/linux/syscalls_api.h
new file mode 100644
index 000000000000..23e012b04db4
--- /dev/null
+++ b/include/linux/syscalls_api.h
@@ -0,0 +1 @@
+#include <linux/syscalls.h>
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 180adf7da785..6353d6db69b2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -265,7 +265,7 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table *
return NULL;
}
-static inline struct sysctl_header *register_sysctl_mount_point(const char *path)
+static inline struct ctl_table_header *register_sysctl_mount_point(const char *path)
{
return NULL;
}
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 73a6f34b3847..9f392ec76f2b 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -209,9 +209,12 @@ __bad_copy_from(void);
extern void __compiletime_error("copy destination size is too small")
__bad_copy_to(void);
+void __copy_overflow(int size, unsigned long count);
+
static inline void copy_overflow(int size, unsigned long count)
{
- WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+ if (IS_ENABLED(CONFIG_BUG))
+ __copy_overflow(size, count);
}
static __always_inline __must_check bool
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a6e201758ae9..f19bc3626297 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_drawer_id
#define topology_drawer_id(cpu) ((void)(cpu), -1)
#endif
+#ifndef topology_ppin
+#define topology_ppin(cpu) ((void)(cpu), 0ull)
+#endif
#ifndef topology_sibling_cpumask
#define topology_sibling_cpumask(cpu) cpumask_of(cpu)
#endif
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 9b09fd633d48..6c7ae3c2ba9b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -706,6 +706,8 @@ event_triggers_post_call(struct trace_event_file *file,
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
+bool __trace_trigger_soft_disabled(struct trace_event_file *file);
+
/**
* trace_trigger_soft_disabled - do triggers and test if soft disabled
* @file: The file pointer of the event to test
@@ -715,20 +717,20 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
* triggers that require testing the fields, it will return true,
* otherwise false.
*/
-static inline bool
+static __always_inline bool
trace_trigger_soft_disabled(struct trace_event_file *file)
{
unsigned long eflags = file->flags;
- if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
- if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
- event_triggers_call(file, NULL, NULL, NULL);
- if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
- return true;
- if (eflags & EVENT_FILE_FL_PID_FILTER)
- return trace_event_ignore_this_pid(file);
- }
- return false;
+ if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE |
+ EVENT_FILE_FL_SOFT_DISABLED |
+ EVENT_FILE_FL_PID_FILTER))))
+ return false;
+
+ if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND))
+ return false;
+
+ return __trace_trigger_soft_disabled(file);
}
#ifdef CONFIG_BPF_EVENTS
diff --git a/include/linux/u64_stats_sync_api.h b/include/linux/u64_stats_sync_api.h
new file mode 100644
index 000000000000..c72ca63da44b
--- /dev/null
+++ b/include/linux/u64_stats_sync_api.h
@@ -0,0 +1 @@
+#include <linux/u64_stats_sync.h>
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ac0394087f7d..bca27b4e5eb2 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -401,8 +401,6 @@ static inline void user_access_restore(unsigned long flags) { }
#endif
#ifdef CONFIG_HARDENED_USERCOPY
-void usercopy_warn(const char *name, const char *detail, bool to_user,
- unsigned long offset, unsigned long len);
void __noreturn usercopy_abort(const char *name, const char *detail,
bool to_user, unsigned long offset,
unsigned long len);
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index 031f148ab373..b5deafd91f67 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -92,6 +92,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node)
static inline void usb_role_switch_put(struct usb_role_switch *sw) { }
static inline struct usb_role_switch *
+usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ return NULL;
+}
+
+static inline struct usb_role_switch *
usb_role_switch_register(struct device *parent,
const struct usb_role_switch_desc *desc)
{
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 2de442ececae..721089bb4c84 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev)
return ret;
}
-static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked)
+static inline int vdpa_set_features_unlocked(struct vdpa_device *vdev, u64 features)
{
const struct vdpa_config_ops *ops = vdev->config;
int ret;
- if (!locked)
- mutex_lock(&vdev->cf_mutex);
-
vdev->features_valid = true;
ret = ops->set_driver_features(vdev, features);
- if (!locked)
- mutex_unlock(&vdev->cf_mutex);
+
+ return ret;
+}
+
+static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
+{
+ int ret;
+
+ mutex_lock(&vdev->cf_mutex);
+ ret = vdpa_set_features_unlocked(vdev, features);
+ mutex_unlock(&vdev->cf_mutex);
return ret;
}
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 72292a62cd90..5464f398912a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev);
void virtio_break_device(struct virtio_device *dev);
void virtio_config_changed(struct virtio_device *dev);
-int virtio_finalize_features(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev);
int virtio_device_restore(struct virtio_device *dev);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 4d107ad31149..dafdc7f48c01 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -64,8 +64,9 @@ struct virtio_shm_region {
* Returns the first 64 feature bits (all we currently need).
* @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
- * This gives the final feature bits for the device: it can change
+ * This sends the driver feature bits to the device: it can change
* the dev->feature bits if it wants.
+ * Note: despite the name this can be called any number of times.
* Returns 0 on success or error status
* @bus_name: return the bus name associated with the device (optional)
* vdev: the virtio_device
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 7b2363388bfa..16a0a4fd000b 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -129,6 +129,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_SWAP
SWAP_RA,
SWAP_RA_HIT,
+#ifdef CONFIG_KSM
+ KSM_SWPIN_COPY,
+#endif
#endif
#ifdef CONFIG_X86
DIRECT_MAP_LEVEL2_SPLIT,
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 880227b9f044..05065915edd7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -80,8 +80,8 @@ struct vmap_area {
/*
* The following two variables can be packed, because
* a vmap_area object can be either:
- * 1) in "free" tree (root is vmap_area_root)
- * 2) or "busy" tree (root is free_vmap_area_root)
+ * 1) in "free" tree (root is free_vmap_area_root)
+ * 2) or "busy" tree (root is vmap_area_root)
*/
union {
unsigned long subtree_max_size; /* in "free" tree */
diff --git a/include/linux/wait_api.h b/include/linux/wait_api.h
new file mode 100644
index 000000000000..4e930548935a
--- /dev/null
+++ b/include/linux/wait_api.h
@@ -0,0 +1 @@
+#include <linux/wait.h>
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index c994d1b2cdba..3b9a40ae8bdb 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -28,7 +28,8 @@ struct watch_type_filter {
struct watch_filter {
union {
struct rcu_head rcu;
- unsigned long type_filter[2]; /* Bitmask of accepted types */
+ /* Bitmask of accepted types */
+ DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
};
u32 nr_filters; /* Number of filters */
struct watch_type_filter filters[];
diff --git a/include/linux/workqueue_api.h b/include/linux/workqueue_api.h
new file mode 100644
index 000000000000..77debb5d2760
--- /dev/null
+++ b/include/linux/workqueue_api.h
@@ -0,0 +1 @@
+#include <linux/workqueue.h>
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index d6d5da6ed735..bb52b786be1b 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1317,6 +1317,7 @@ struct xa_state {
struct xa_node *xa_node;
struct xa_node *xa_alloc;
xa_update_node_t xa_update;
+ struct list_lru *xa_lru;
};
/*
@@ -1336,7 +1337,8 @@ struct xa_state {
.xa_pad = 0, \
.xa_node = XAS_RESTART, \
.xa_alloc = NULL, \
- .xa_update = NULL \
+ .xa_update = NULL, \
+ .xa_lru = NULL, \
}
/**
@@ -1631,6 +1633,11 @@ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
xas->xa_update = update;
}
+static inline void xas_set_lru(struct xa_state *xas, struct list_lru *lru)
+{
+ xas->xa_lru = lru;
+}
+
/**
* xas_next_entry() - Advance iterator to next present entry.
* @xas: XArray operation state.
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 78ea3e332688..59940e230b78 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -6,6 +6,8 @@
#define RTR_SOLICITATION_INTERVAL (4*HZ)
#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */
+#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
+
#define TEMP_VALID_LIFETIME (7*86400)
#define TEMP_PREFERRED_LIFETIME (86400)
#define REGEN_MAX_RETRY (3)
@@ -107,8 +109,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
struct in6_addr *saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..f742e50207fb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+ void (*fn)(struct sock *sk));
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
bool vsock_find_cid(unsigned int cid);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 526e49589197..8221af1811df 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -239,6 +239,7 @@ typedef struct ax25_dev {
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_dama_info dama;
#endif
+ refcount_t refcount;
} ax25_dev;
typedef struct ax25_cb {
@@ -293,6 +294,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
}
}
+static inline void ax25_dev_hold(ax25_dev *ax25_dev)
+{
+ refcount_inc(&ax25_dev->refcount);
+}
+
+static inline void ax25_dev_put(ax25_dev *ax25_dev)
+{
+ if (refcount_dec_and_test(&ax25_dev->refcount)) {
+ kfree(ax25_dev);
+ }
+}
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
skb->dev = dev;
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 4b3d0b16c185..a647e5fabdbd 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -506,8 +506,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
if (IS_ERR(tmp)) {
- kfree_skb(skb);
- return tmp;
+ return skb;
}
len -= tmp->len;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 586f69d084a2..e336e9c1dda4 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1489,6 +1489,14 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789:
+ *
+ * C24: Mandatory if the LE Controller supports Connection State and either
+ * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported
+ */
+#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \
+ ext_adv_capable(dev))
+
/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER 0x01
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 38785d48baff..184105d68294 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -262,7 +262,7 @@ struct ad_system {
struct ad_bond_info {
struct ad_system system; /* 802.3ad system structure */
struct bond_3ad_stats stats;
- u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
+ atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
u16 aggregator_identifier;
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index f6ae3a4baea4..83cfd2d70247 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -346,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond)
static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond)
{
- struct slave *slave = rcu_dereference(bond->curr_active_slave);
+ struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave);
return bond_uses_primary(bond) && slave ? slave->dev : NULL;
}
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5218041e5c8f..79c67f14c448 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -22,7 +22,7 @@
#include <asm/checksum.h>
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
-static inline
+static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
int len)
{
@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst,
#endif
#ifndef HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user
+static __always_inline __wsum csum_and_copy_to_user
(const void *src, void __user *dst, int len)
{
__wsum sum = csum_partial(src, len, ~0U);
@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user
#endif
#ifndef _HAVE_ARCH_CSUM_AND_COPY
-static inline __wsum
+static __always_inline __wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
memcpy(dst, src, len);
@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
#endif
#ifndef HAVE_ARCH_CSUM_ADD
-static inline __wsum csum_add(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
{
u32 res = (__force u32)csum;
res += (__force u32)addend;
@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
}
#endif
-static inline __wsum csum_sub(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
{
return csum_add(csum, ~addend);
}
-static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
{
u16 res = (__force u16)csum;
@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
return (__force __sum16)(res + (res < (__force u16)addend));
}
-static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
{
return csum16_add(csum, ~addend);
}
-static inline __wsum csum_shift(__wsum sum, int offset)
+static __always_inline __wsum csum_shift(__wsum sum, int offset)
{
/* rotate sum to align it with a 16b boundary */
if (offset & 1)
@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset)
return sum;
}
-static inline __wsum
+static __always_inline __wsum
csum_block_add(__wsum csum, __wsum csum2, int offset)
{
return csum_add(csum, csum_shift(csum2, offset));
}
-static inline __wsum
+static __always_inline __wsum
csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
{
return csum_block_add(csum, csum2, offset);
}
-static inline __wsum
+static __always_inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
return csum_block_add(csum, ~csum2, offset);
}
-static inline __wsum csum_unfold(__sum16 n)
+static __always_inline __wsum csum_unfold(__sum16 n)
{
return (__force __wsum)n;
}
-static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
+static __always_inline
+__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
{
return csum_partial(buff, len, sum);
}
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
-static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
{
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
}
-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
+static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
@@ -136,11 +137,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
* m : old value of a 16bit field
* m' : new value of a 16bit field
*/
-static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
+static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
{
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
}
+static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
+{
+ *csum = csum_add(csum_sub(*csum, old), new);
+}
+
struct sk_buff;
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, bool pseudohdr);
@@ -150,16 +156,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr);
-static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
- __be16 from, __be16 to,
- bool pseudohdr)
+static __always_inline
+void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+ __be16 from, __be16 to, bool pseudohdr)
{
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
(__force __be32)to, pseudohdr);
}
-static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
- int start, int offset)
+static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
+ int start, int offset)
{
__sum16 *psum = (__sum16 *)(ptr + offset);
__wsum delta;
@@ -175,12 +181,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
return delta;
}
-static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
+static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
{
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
}
-static inline __wsum wsum_negate(__wsum val)
+static __always_inline __wsum wsum_negate(__wsum val)
{
return (__force __wsum)-((__force u32)val);
}
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 57b3e4e7413b..85a5ba3772f5 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -1187,6 +1187,7 @@ void dsa_unregister_switch(struct dsa_switch *ds);
int dsa_register_switch(struct dsa_switch *ds);
void dsa_switch_shutdown(struct dsa_switch *ds);
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
+void dsa_flush_workqueue(void);
#ifdef CONFIG_PM_SLEEP
int dsa_switch_suspend(struct dsa_switch *ds);
int dsa_switch_resume(struct dsa_switch *ds);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 14efa0ded75d..adab27ba1ecb 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
sizeof(struct ip_tunnel_info) + md_size);
+#ifdef CONFIG_DST_CACHE
+ /* Unclone the dst cache if there is one */
+ if (new_md->u.tun_info.dst_cache.cache) {
+ int ret;
+
+ ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC);
+ if (ret) {
+ metadata_dst_free(new_md);
+ return ERR_PTR(ret);
+ }
+ }
+#endif
+
skb_dst_drop(skb);
- dst_hold(&new_md->dst);
skb_dst_set(skb, &new_md->dst);
return new_md;
}
diff --git a/include/net/esp.h b/include/net/esp.h
index 9c5637d41d95..90cd02ff77ef 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,6 +4,8 @@
#include <linux/skbuff.h>
+#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
+
struct ip_esp_hdr;
static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/include/net/ip.h b/include/net/ip.h
index 81e23a102a0d..b51bae43b0dd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
{
struct iphdr *iph = ip_hdr(skb);
+ /* We had many attacks based on IPID, use the private
+ * generator as much as we can.
+ */
+ if (sk && inet_sk(sk)->inet_daddr) {
+ iph->id = htons(inet_sk(sk)->inet_id);
+ inet_sk(sk)->inet_id += segs;
+ return;
+ }
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
- /* This is only to work around buggy Windows95/2000
- * VJ compression implementations. If the ID field
- * does not change, they drop every other packet in
- * a TCP stream using header compression.
- */
- if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
- } else {
- iph->id = 0;
- }
+ iph->id = 0;
} else {
+ /* Unfortunately we need the big hammer to get a suitable IPID */
__ip_select_ident(net, iph, segs);
}
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a9a4ccc0cdb5..2048bc8748cb 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -190,14 +190,16 @@ struct fib6_info {
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
+
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
+
u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
fib6_destroying:1,
- offload:1,
- trap:1,
- offload_failed:1,
- unused:1;
+ unused:4;
struct rcu_head rcu;
struct nexthop *nh;
@@ -282,7 +284,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
fn = rcu_dereference(f6i->fib6_node);
if (fn) {
- *cookie = fn->fn_sernum;
+ *cookie = READ_ONCE(fn->fn_sernum);
/* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
smp_rmb();
status = true;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3afcb128e064..92eec13d1693 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -393,17 +393,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
kfree_rcu(opt, rcu);
}
+#if IS_ENABLED(CONFIG_IPV6)
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);
extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
__be32 label)
{
- if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
+ if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
+ READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
return NULL;
}
+#endif
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 53cb8de0e589..47ffb360ddfa 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -475,9 +475,9 @@ int igmp6_late_init(void);
void igmp6_cleanup(void);
void igmp6_late_cleanup(void);
-int igmp6_event_query(struct sk_buff *skb);
+void igmp6_event_query(struct sk_buff *skb);
-int igmp6_event_report(struct sk_buff *skb);
+void igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 937389e04c8e..87419f7f5421 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -350,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl,
return __neigh_create(tbl, pkey, dev, true);
}
void neigh_destroy(struct neighbour *neigh);
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok);
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
u32 nlmsg_pid);
void __neigh_set_probe_once(struct neighbour *neigh);
@@ -460,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh)
#define neigh_hold(n) refcount_inc(&(n)->refcnt)
-static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+static __always_inline int neigh_event_send_probe(struct neighbour *neigh,
+ struct sk_buff *skb,
+ const bool immediate_ok)
{
unsigned long now = jiffies;
-
+
if (READ_ONCE(neigh->used) != now)
WRITE_ONCE(neigh->used, now);
- if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
- return __neigh_event_send(neigh, skb);
+ if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
+ return __neigh_event_send(neigh, skb, immediate_ok);
return 0;
}
+static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+ return neigh_event_send_probe(neigh, skb, true);
+}
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8731d5bcb47d..b08b70989d2c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -97,7 +97,6 @@ struct nf_conn {
unsigned long status;
u16 cpu;
- u16 local_origin:1;
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..bd59e950f4d6 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -96,6 +96,7 @@ enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_NEIGH,
FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
+ FLOW_OFFLOAD_XMIT_TC,
};
#define NF_FLOW_TABLE_ENCAP_MAX 2
@@ -127,7 +128,7 @@ struct flow_offload_tuple {
struct { } __hash;
u8 dir:2,
- xmit_type:2,
+ xmit_type:3,
encap_num:2,
in_vlan_ingress:2;
u16 mtu;
@@ -142,6 +143,9 @@ struct flow_offload_tuple {
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
} out;
+ struct {
+ u32 iifidx;
+ } tc;
};
};
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 9eed51e920e8..980daa6e1e3a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index eaf55da9a205..c4c0861deac1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -905,9 +905,9 @@ struct nft_expr_ops {
int (*offload)(struct nft_offload_ctx *ctx,
struct nft_flow_rule *flow,
const struct nft_expr *expr);
+ bool (*offload_action)(const struct nft_expr *expr);
void (*offload_stats)(struct nft_expr *expr,
const struct flow_stats *stats);
- u32 offload_flags;
const struct nft_expr_type *type;
void *data;
};
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index f9d95ff82df8..797147843958 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -67,8 +67,6 @@ struct nft_flow_rule {
struct flow_rule *rule;
};
-#define NFT_OFFLOAD_F_ACTION (1 << 0)
-
void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
enum flow_dissector_key_id addr_type);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index a4b550380316..6bd7e5a85ce7 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -77,9 +77,10 @@ struct netns_ipv6 {
spinlock_t fib6_gc_lock;
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
+ unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- unsigned int fib6_rules_require_fldissect;
bool fib6_has_custom_rules;
+ unsigned int fib6_rules_require_fldissect;
#ifdef CONFIG_IPV6_SUBTREES
unsigned int fib6_routes_require_src;
#endif
diff --git a/include/net/route.h b/include/net/route.h
index 4c858dcf1aa8..25404fc2b483 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -370,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
{
struct neighbour *neigh;
- neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
diff --git a/include/net/sock.h b/include/net/sock.h
index ff9b508d9c5f..50aecd28b355 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -507,7 +507,7 @@ struct sock {
#endif
u16 sk_tsflags;
u8 sk_shutdown;
- u32 sk_tskey;
+ atomic_t sk_tskey;
atomic_t sk_zckey;
u8 sk_clockid;
@@ -2667,7 +2667,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
__sock_tx_timestamp(tsflags, tx_flags);
if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
- *tskey = sk->sk_tskey++;
+ *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
}
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 44e442bf23f9..b9fc978fb2ca 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1369,6 +1369,7 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+#ifdef CONFIG_INET
void __sk_defer_free_flush(struct sock *sk);
static inline void sk_defer_free_flush(struct sock *sk)
@@ -1377,6 +1378,9 @@ static inline void sk_defer_free_flush(struct sock *sk)
return;
__sk_defer_free_flush(sk);
}
+#else
+static inline void sk_defer_free_flush(struct sock *sk) {}
+#endif
int tcp_filter(struct sock *sk, struct sk_buff *skb);
void tcp_set_state(struct sock *sk, int state);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fdb41e8bb626..76aa6f11a540 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1568,7 +1568,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x);
-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
@@ -1681,14 +1680,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
const struct xfrm_kmaddress *k,
const struct xfrm_encap_tmpl *encap);
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net);
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m,
struct xfrm_encap_tmpl *encap);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap);
+ struct xfrm_encap_tmpl *encap, u32 if_id);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index d0337a41141c..1e694fd239b9 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_BUDDY, "free buddy page" ) \
EM ( MF_MSG_DAX, "dax page" ) \
EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
+ EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" ) \
EMe ( MF_MSG_UNKNOWN, "unknown page" )
/*
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 6794d7322cbd..e3a4c67794b1 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -13,7 +13,6 @@
#include <scsi/scsi_request.h>
struct Scsi_Host;
-struct scsi_driver;
/*
* MAX_COMMAND_SIZE is:
@@ -159,14 +158,6 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
return cmd + 1;
}
-/* make sure not to use it with passthrough commands */
-static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
-{
- struct request *rq = scsi_cmd_to_rq(cmd);
-
- return *(struct scsi_driver **)rq->q->disk->private_data;
-}
-
void scsi_done(struct scsi_cmnd *cmd);
extern void scsi_finish_command(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 6dffa8555a39..4ce1988b2ba0 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -4,11 +4,10 @@
#include <linux/blk_types.h>
#include <linux/device.h>
+#include <scsi/scsi_cmnd.h>
struct module;
struct request;
-struct scsi_cmnd;
-struct scsi_device;
struct scsi_driver {
struct device_driver gendrv;
@@ -31,4 +30,10 @@ extern int scsi_register_interface(struct class_interface *);
#define scsi_unregister_interface(intf) \
class_interface_unregister(intf)
+/* make sure not to use it with passthrough commands */
+static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
+{
+ return to_scsi_driver(cmd->device->sdev_gendev.driver);
+}
+
#endif /* _SCSI_SCSI_DRIVER_H */
diff --git a/include/soc/fsl/dpaa2-fd.h b/include/soc/fsl/dpaa2-fd.h
index 90ae8d191f1a..bae490cac0aa 100644
--- a/include/soc/fsl/dpaa2-fd.h
+++ b/include/soc/fsl/dpaa2-fd.h
@@ -7,7 +7,8 @@
#ifndef __FSL_DPAA2_FD_H
#define __FSL_DPAA2_FD_H
-#include <linux/kernel.h>
+#include <linux/byteorder/generic.h>
+#include <linux/types.h>
/**
* DOC: DPAA2 FD - Frame Descriptor APIs for DPAA2
diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h
index 7614fee532f1..edd601f53f5d 100644
--- a/include/soc/fsl/qe/immap_qe.h
+++ b/include/soc/fsl/qe/immap_qe.h
@@ -13,7 +13,8 @@
#define _ASM_POWERPC_IMMAP_QE_H
#ifdef __KERNEL__
-#include <linux/kernel.h>
+#include <linux/types.h>
+
#include <asm/io.h>
#define QE_IMMAP_SIZE (1024 * 1024) /* 1MB from 1MB+IMMR */
diff --git a/include/soc/fsl/qe/qe_tdm.h b/include/soc/fsl/qe/qe_tdm.h
index b6febe225071..43ea830cfe1f 100644
--- a/include/soc/fsl/qe/qe_tdm.h
+++ b/include/soc/fsl/qe/qe_tdm.h
@@ -10,8 +10,8 @@
#ifndef _QE_TDM_H_
#define _QE_TDM_H_
-#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/types.h>
#include <soc/fsl/qe/immap_qe.h>
#include <soc/fsl/qe/qe.h>
@@ -19,6 +19,8 @@
#include <soc/fsl/qe/ucc.h>
#include <soc/fsl/qe/ucc_fast.h>
+struct device_node;
+
/* SI RAM entries */
#define SIR_LAST 0x0001
#define SIR_BYTE 0x0002
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index 9696a5b9b5d1..ad60b87a3c69 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -10,7 +10,7 @@
#ifndef __UCC_FAST_H__
#define __UCC_FAST_H__
-#include <linux/kernel.h>
+#include <linux/types.h>
#include <soc/fsl/qe/immap_qe.h>
#include <soc/fsl/qe/qe.h>
diff --git a/include/soc/fsl/qe/ucc_slow.h b/include/soc/fsl/qe/ucc_slow.h
index 11a216e4e919..7548ce8a202d 100644
--- a/include/soc/fsl/qe/ucc_slow.h
+++ b/include/soc/fsl/qe/ucc_slow.h
@@ -11,7 +11,7 @@
#ifndef __UCC_SLOW_H__
#define __UCC_SLOW_H__
-#include <linux/kernel.h>
+#include <linux/types.h>
#include <soc/fsl/qe/immap_qe.h>
#include <soc/fsl/qe/qe.h>
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 9b187d86e1bd..36da42cd0774 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -617,6 +617,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream);
void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream);
void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream);
unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream);
+unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream);
/**
* snd_pcm_stream_lock_irqsave - Lock the PCM stream
@@ -636,6 +637,20 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
unsigned long flags);
/**
+ * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking
+ * @substream: PCM substream
+ * @flags: irq flags
+ *
+ * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with
+ * the single-depth lockdep subclass.
+ */
+#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _snd_pcm_stream_lock_irqsave_nested(substream); \
+ } while (0)
+
+/**
* snd_pcm_group_for_each_entry - iterate over the linked substreams
* @s: the iterator
* @substream: the substream
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 7660a7846586..6a13220d2d27 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -21,6 +21,9 @@
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
#undef __get_rel_dynamic_array
#define __get_rel_dynamic_array(field) \
((void *)(&__entry->__rel_loc_##field) + \
@@ -37,6 +40,9 @@
#undef __get_rel_bitmask
#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
#undef __perf_count
#define __perf_count(c) (c)
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 27170e40e8c9..7f4dfbdf12a6 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -100,19 +100,7 @@ TRACE_EVENT(block_rq_requeue,
__entry->nr_sector, 0)
);
-/**
- * block_rq_complete - block IO operation completed by device driver
- * @rq: block operations request
- * @error: status code
- * @nr_bytes: number of completed bytes
- *
- * The block_rq_complete tracepoint event indicates that some portion
- * of operation request has been completed by the device driver. If
- * the @rq->bio is %NULL, then there is absolutely no additional work to
- * do for the request. If @rq->bio is non-NULL then there is
- * additional work required to complete the request.
- */
-TRACE_EVENT(block_rq_complete,
+DECLARE_EVENT_CLASS(block_rq_completion,
TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
@@ -144,6 +132,41 @@ TRACE_EVENT(block_rq_complete,
__entry->nr_sector, __entry->error)
);
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver. If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_complete,
+
+ TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+ TP_ARGS(rq, error, nr_bytes)
+);
+
+/**
+ * block_rq_error - block IO operation error reported by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_error tracepoint event indicates that some portion
+ * of operation request has failed as reported by the device driver.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_error,
+
+ TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+ TP_ARGS(rq, error, nr_bytes)
+);
+
DECLARE_EVENT_CLASS(block_rq,
TP_PROTO(struct request *rq),
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0d729664b4b4..f068ff30d654 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -53,6 +53,7 @@ struct btrfs_space_info;
{ BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \
+ { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
#define show_root_type(obj) \
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index c6f5aa74db89..2c530637e10a 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -56,6 +56,7 @@ enum cachefiles_coherency_trace {
cachefiles_coherency_set_ok,
cachefiles_coherency_vol_check_cmp,
cachefiles_coherency_vol_check_ok,
+ cachefiles_coherency_vol_check_resv,
cachefiles_coherency_vol_check_xattr,
cachefiles_coherency_vol_set_fail,
cachefiles_coherency_vol_set_ok,
@@ -139,6 +140,7 @@ enum cachefiles_error_trace {
EM(cachefiles_coherency_set_ok, "SET ok ") \
EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \
EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \
+ EM(cachefiles_coherency_vol_check_resv, "VOL BAD resv") \
EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \
EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \
E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ")
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 7d48e7079e48..c6d5d70dc7a5 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -67,10 +67,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
#ifdef CONFIG_COMPACTION
TRACE_EVENT(mm_compaction_migratepages,
- TP_PROTO(unsigned long nr_all,
+ TP_PROTO(struct compact_control *cc,
unsigned int nr_succeeded),
- TP_ARGS(nr_all, nr_succeeded),
+ TP_ARGS(cc, nr_succeeded),
TP_STRUCT__entry(
__field(unsigned long, nr_migrated)
@@ -79,7 +79,7 @@ TRACE_EVENT(mm_compaction_migratepages,
TP_fast_assign(
__entry->nr_migrated = nr_succeeded;
- __entry->nr_failed = nr_all - nr_succeeded;
+ __entry->nr_failed = cc->nr_migratepages - nr_succeeded;
),
TP_printk("nr_migrated=%lu nr_failed=%lu",
@@ -88,10 +88,10 @@ TRACE_EVENT(mm_compaction_migratepages,
);
TRACE_EVENT(mm_compaction_begin,
- TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
- unsigned long free_pfn, unsigned long zone_end, bool sync),
+ TP_PROTO(struct compact_control *cc, unsigned long zone_start,
+ unsigned long zone_end, bool sync),
- TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync),
+ TP_ARGS(cc, zone_start, zone_end, sync),
TP_STRUCT__entry(
__field(unsigned long, zone_start)
@@ -103,8 +103,8 @@ TRACE_EVENT(mm_compaction_begin,
TP_fast_assign(
__entry->zone_start = zone_start;
- __entry->migrate_pfn = migrate_pfn;
- __entry->free_pfn = free_pfn;
+ __entry->migrate_pfn = cc->migrate_pfn;
+ __entry->free_pfn = cc->free_pfn;
__entry->zone_end = zone_end;
__entry->sync = sync;
),
@@ -118,11 +118,11 @@ TRACE_EVENT(mm_compaction_begin,
);
TRACE_EVENT(mm_compaction_end,
- TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
- unsigned long free_pfn, unsigned long zone_end, bool sync,
+ TP_PROTO(struct compact_control *cc, unsigned long zone_start,
+ unsigned long zone_end, bool sync,
int status),
- TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status),
+ TP_ARGS(cc, zone_start, zone_end, sync, status),
TP_STRUCT__entry(
__field(unsigned long, zone_start)
@@ -135,8 +135,8 @@ TRACE_EVENT(mm_compaction_end,
TP_fast_assign(
__entry->zone_start = zone_start;
- __entry->migrate_pfn = migrate_pfn;
- __entry->free_pfn = free_pfn;
+ __entry->migrate_pfn = cc->migrate_pfn;
+ __entry->free_pfn = cc->free_pfn;
__entry->zone_end = zone_end;
__entry->sync = sync;
__entry->status = status;
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 19e957b7f941..d06ffffad434 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
+
#define show_fc_reason(reason) \
__print_symbolic(reason, \
{ EXT4_FC_REASON_XATTR, "XATTR"}, \
@@ -597,44 +608,44 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage,
TP_ARGS(page)
);
-DECLARE_EVENT_CLASS(ext4_invalidatepage_op,
- TP_PROTO(struct page *page, unsigned int offset, unsigned int length),
+DECLARE_EVENT_CLASS(ext4_invalidate_folio_op,
+ TP_PROTO(struct folio *folio, size_t offset, size_t length),
- TP_ARGS(page, offset, length),
+ TP_ARGS(folio, offset, length),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
- __field( unsigned int, offset )
- __field( unsigned int, length )
+ __field( size_t, offset )
+ __field( size_t, length )
),
TP_fast_assign(
- __entry->dev = page->mapping->host->i_sb->s_dev;
- __entry->ino = page->mapping->host->i_ino;
- __entry->index = page->index;
+ __entry->dev = folio->mapping->host->i_sb->s_dev;
+ __entry->ino = folio->mapping->host->i_ino;
+ __entry->index = folio->index;
__entry->offset = offset;
__entry->length = length;
),
- TP_printk("dev %d,%d ino %lu page_index %lu offset %u length %u",
+ TP_printk("dev %d,%d ino %lu folio_index %lu offset %zu length %zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long) __entry->index,
__entry->offset, __entry->length)
);
-DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage,
- TP_PROTO(struct page *page, unsigned int offset, unsigned int length),
+DEFINE_EVENT(ext4_invalidate_folio_op, ext4_invalidate_folio,
+ TP_PROTO(struct folio *folio, size_t offset, size_t length),
- TP_ARGS(page, offset, length)
+ TP_ARGS(folio, offset, length)
);
-DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage,
- TP_PROTO(struct page *page, unsigned int offset, unsigned int length),
+DEFINE_EVENT(ext4_invalidate_folio_op, ext4_journalled_invalidate_folio,
+ TP_PROTO(struct folio *folio, size_t offset, size_t length),
- TP_ARGS(page, offset, length)
+ TP_ARGS(folio, offset, length)
);
TRACE_EVENT(ext4_discard_blocks,
@@ -2643,7 +2654,7 @@ TRACE_EVENT(ext4_fc_replay_scan,
__entry->off = off;
),
- TP_printk("FC scan pass on dev %d,%d: error %d, off %d",
+ TP_printk("dev %d,%d error %d, off %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->error, __entry->off)
);
@@ -2669,32 +2680,35 @@ TRACE_EVENT(ext4_fc_replay,
__entry->priv2 = priv2;
),
- TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d",
+ TP_printk("dev %d,%d: tag %d, ino %d, data1 %d, data2 %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tag, __entry->ino, __entry->priv1, __entry->priv2)
);
TRACE_EVENT(ext4_fc_commit_start,
- TP_PROTO(struct super_block *sb),
+ TP_PROTO(struct super_block *sb, tid_t commit_tid),
- TP_ARGS(sb),
+ TP_ARGS(sb, commit_tid),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(tid_t, tid)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
+ __entry->tid = commit_tid;
),
- TP_printk("fast_commit started on dev %d,%d",
- MAJOR(__entry->dev), MINOR(__entry->dev))
+ TP_printk("dev %d,%d tid %u", MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->tid)
);
TRACE_EVENT(ext4_fc_commit_stop,
- TP_PROTO(struct super_block *sb, int nblks, int reason),
+ TP_PROTO(struct super_block *sb, int nblks, int reason,
+ tid_t commit_tid),
- TP_ARGS(sb, nblks, reason),
+ TP_ARGS(sb, nblks, reason, commit_tid),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -2703,6 +2717,7 @@ TRACE_EVENT(ext4_fc_commit_stop,
__field(int, num_fc)
__field(int, num_fc_ineligible)
__field(int, nblks_agg)
+ __field(tid_t, tid)
),
TP_fast_assign(
@@ -2713,128 +2728,193 @@ TRACE_EVENT(ext4_fc_commit_stop,
__entry->num_fc_ineligible =
EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
__entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks;
+ __entry->tid = commit_tid;
),
- TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d",
+ TP_printk("dev %d,%d nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d, tid %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->nblks, __entry->reason, __entry->num_fc,
- __entry->num_fc_ineligible, __entry->nblks_agg)
+ __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
);
#define FC_REASON_NAME_STAT(reason) \
show_fc_reason(reason), \
- __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason]
+ __entry->fc_ineligible_rc[reason]
TRACE_EVENT(ext4_fc_stats,
- TP_PROTO(struct super_block *sb),
-
- TP_ARGS(sb),
-
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(struct ext4_sb_info *, sbi)
- __field(int, count)
- ),
-
- TP_fast_assign(
- __entry->dev = sb->s_dev;
- __entry->sbi = EXT4_SB(sb);
- ),
-
- TP_printk("dev %d:%d fc ineligible reasons:\n"
- "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
- "num_commits:%ld, ineligible: %ld, numblks: %ld",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
- FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
- __entry->sbi->s_fc_stats.fc_num_commits,
- __entry->sbi->s_fc_stats.fc_ineligible_commits,
- __entry->sbi->s_fc_stats.fc_numblks)
-
-);
-
-#define DEFINE_TRACE_DENTRY_EVENT(__type) \
- TRACE_EVENT(ext4_fc_track_##__type, \
- TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \
- \
- TP_ARGS(inode, dentry, ret), \
- \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(int, ino) \
- __field(int, error) \
- ), \
- \
- TP_fast_assign( \
- __entry->dev = inode->i_sb->s_dev; \
- __entry->ino = inode->i_ino; \
- __entry->error = ret; \
- ), \
- \
- TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, __entry->error, \
- #__type) \
+ TP_PROTO(struct super_block *sb),
+
+ TP_ARGS(sb),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX)
+ __field(unsigned long, fc_commits)
+ __field(unsigned long, fc_ineligible_commits)
+ __field(unsigned long, fc_numblks)
+ ),
+
+ TP_fast_assign(
+ int i;
+
+ __entry->dev = sb->s_dev;
+ for (i = 0; i < EXT4_FC_REASON_MAX; i++) {
+ __entry->fc_ineligible_rc[i] =
+ EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i];
+ }
+ __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits;
+ __entry->fc_ineligible_commits =
+ EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
+ __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks;
+ ),
+
+ TP_printk("dev %d,%d fc ineligible reasons:\n"
+ "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u "
+ "num_commits:%lu, ineligible: %lu, numblks: %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+ FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
+ __entry->fc_commits, __entry->fc_ineligible_commits,
+ __entry->fc_numblks)
+);
+
+DECLARE_EVENT_CLASS(ext4_fc_track_dentry,
+
+ TP_PROTO(handle_t *handle, struct inode *inode,
+ struct dentry *dentry, int ret),
+
+ TP_ARGS(handle, inode, dentry, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(tid_t, t_tid)
+ __field(ino_t, i_ino)
+ __field(tid_t, i_sync_tid)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->t_tid = handle->h_transaction->t_tid;
+ __entry->i_ino = inode->i_ino;
+ __entry->i_sync_tid = ei->i_sync_tid;
+ __entry->error = ret;
+ ),
+
+ TP_printk("dev %d,%d, t_tid %u, ino %lu, i_sync_tid %u, error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+ __entry->error
)
+);
-DEFINE_TRACE_DENTRY_EVENT(create);
-DEFINE_TRACE_DENTRY_EVENT(link);
-DEFINE_TRACE_DENTRY_EVENT(unlink);
+#define DEFINE_EVENT_CLASS_DENTRY(__type) \
+DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type, \
+ TP_PROTO(handle_t *handle, struct inode *inode, \
+ struct dentry *dentry, int ret), \
+ TP_ARGS(handle, inode, dentry, ret) \
+)
+
+DEFINE_EVENT_CLASS_DENTRY(create);
+DEFINE_EVENT_CLASS_DENTRY(link);
+DEFINE_EVENT_CLASS_DENTRY(unlink);
TRACE_EVENT(ext4_fc_track_inode,
- TP_PROTO(struct inode *inode, int ret),
+ TP_PROTO(handle_t *handle, struct inode *inode, int ret),
- TP_ARGS(inode, ret),
+ TP_ARGS(handle, inode, ret),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(int, ino)
- __field(int, error)
- ),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(tid_t, t_tid)
+ __field(ino_t, i_ino)
+ __field(tid_t, i_sync_tid)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ struct ext4_inode_info *ei = EXT4_I(inode);
- TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = inode->i_ino;
- __entry->error = ret;
- ),
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->t_tid = handle->h_transaction->t_tid;
+ __entry->i_ino = inode->i_ino;
+ __entry->i_sync_tid = ei->i_sync_tid;
+ __entry->error = ret;
+ ),
- TP_printk("dev %d:%d, inode %d, error %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino, __entry->error)
+ TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+ __entry->error)
);
TRACE_EVENT(ext4_fc_track_range,
- TP_PROTO(struct inode *inode, long start, long end, int ret),
-
- TP_ARGS(inode, start, end, ret),
-
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(int, ino)
- __field(long, start)
- __field(long, end)
- __field(int, error)
- ),
-
- TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = inode->i_ino;
- __entry->start = start;
- __entry->end = end;
- __entry->error = ret;
- ),
-
- TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino, __entry->error, __entry->start,
- __entry->end)
+ TP_PROTO(handle_t *handle, struct inode *inode,
+ long start, long end, int ret),
+
+ TP_ARGS(handle, inode, start, end, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(tid_t, t_tid)
+ __field(ino_t, i_ino)
+ __field(tid_t, i_sync_tid)
+ __field(long, start)
+ __field(long, end)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->t_tid = handle->h_transaction->t_tid;
+ __entry->i_ino = inode->i_ino;
+ __entry->i_sync_tid = ei->i_sync_tid;
+ __entry->start = start;
+ __entry->end = end;
+ __entry->error = ret;
+ ),
+
+ TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d, start %ld, end %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+ __entry->error, __entry->start, __entry->end)
+ );
+
+TRACE_EVENT(ext4_fc_cleanup,
+ TP_PROTO(journal_t *journal, int full, tid_t tid),
+
+ TP_ARGS(journal, full, tid),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, j_fc_off)
+ __field(int, full)
+ __field(tid_t, tid)
+ ),
+
+ TP_fast_assign(
+ struct super_block *sb = journal->j_private;
+
+ __entry->dev = sb->s_dev;
+ __entry->j_fc_off = journal->j_fc_off;
+ __entry->full = full;
+ __entry->tid = tid;
+ ),
+
+ TP_printk("dev %d,%d, j_fc_off %d, full %d, tid %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->j_fc_off, __entry->full, __entry->tid)
);
TRACE_EVENT(ext4_update_sb,
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 7346f0164cf4..cddf5b6fbeb4 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -29,22 +29,22 @@ TRACE_EVENT(io_uring_create,
TP_ARGS(fd, ctx, sq_entries, cq_entries, flags),
TP_STRUCT__entry (
- __field( int, fd )
- __field( void *, ctx )
+ __field( int, fd )
+ __field( void *, ctx )
__field( u32, sq_entries )
__field( u32, cq_entries )
__field( u32, flags )
),
TP_fast_assign(
- __entry->fd = fd;
+ __entry->fd = fd;
__entry->ctx = ctx;
__entry->sq_entries = sq_entries;
__entry->cq_entries = cq_entries;
__entry->flags = flags;
),
- TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d",
+ TP_printk("ring %p, fd %d sq size %d, cq size %d, flags 0x%x",
__entry->ctx, __entry->fd, __entry->sq_entries,
__entry->cq_entries, __entry->flags)
);
@@ -57,10 +57,9 @@ TRACE_EVENT(io_uring_create,
* @opcode: describes which operation to perform
* @nr_user_files: number of registered files
* @nr_user_bufs: number of registered buffers
- * @cq_ev_fd: whether eventfs registered or not
* @ret: return code
*
- * Allows to trace fixed files/buffers/eventfds, that could be registered to
+ * Allows to trace fixed files/buffers, that could be registered to
* avoid an overhead of getting references to them for every operation. This
* event, together with io_uring_file_get, can provide a full picture of how
* much overhead one can reduce via fixing.
@@ -68,17 +67,16 @@ TRACE_EVENT(io_uring_create,
TRACE_EVENT(io_uring_register,
TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files,
- unsigned nr_bufs, bool eventfd, long ret),
+ unsigned nr_bufs, long ret),
- TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret),
+ TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( unsigned, opcode )
- __field( unsigned, nr_files )
- __field( unsigned, nr_bufs )
- __field( bool, eventfd )
- __field( long, ret )
+ __field( void *, ctx )
+ __field( unsigned, opcode )
+ __field( unsigned, nr_files)
+ __field( unsigned, nr_bufs )
+ __field( long, ret )
),
TP_fast_assign(
@@ -86,20 +84,21 @@ TRACE_EVENT(io_uring_register,
__entry->opcode = opcode;
__entry->nr_files = nr_files;
__entry->nr_bufs = nr_bufs;
- __entry->eventfd = eventfd;
__entry->ret = ret;
),
TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, "
- "eventfd %d, ret %ld",
+ "ret %ld",
__entry->ctx, __entry->opcode, __entry->nr_files,
- __entry->nr_bufs, __entry->eventfd, __entry->ret)
+ __entry->nr_bufs, __entry->ret)
);
/**
* io_uring_file_get - called before getting references to an SQE file
*
* @ctx: pointer to a ring context structure
+ * @req: pointer to a submitted request
+ * @user_data: user data associated with the request
* @fd: SQE file descriptor
*
* Allows to trace out how often an SQE file reference is obtained, which can
@@ -108,59 +107,71 @@ TRACE_EVENT(io_uring_register,
*/
TRACE_EVENT(io_uring_file_get,
- TP_PROTO(void *ctx, int fd),
+ TP_PROTO(void *ctx, void *req, unsigned long long user_data, int fd),
- TP_ARGS(ctx, fd),
+ TP_ARGS(ctx, req, user_data, fd),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( int, fd )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( u64, user_data )
+ __field( int, fd )
),
TP_fast_assign(
- __entry->ctx = ctx;
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->user_data = user_data;
__entry->fd = fd;
),
- TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd)
+ TP_printk("ring %p, req %p, user_data 0x%llx, fd %d",
+ __entry->ctx, __entry->req, __entry->user_data, __entry->fd)
);
/**
* io_uring_queue_async_work - called before submitting a new async work
*
* @ctx: pointer to a ring context structure
- * @hashed: type of workqueue, hashed or normal
* @req: pointer to a submitted request
+ * @user_data: user data associated with the request
+ * @opcode: opcode of request
+ * @flags request flags
* @work: pointer to a submitted io_wq_work
+ * @rw: type of workqueue, hashed or normal
*
* Allows to trace asynchronous work submission.
*/
TRACE_EVENT(io_uring_queue_async_work,
- TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work,
- unsigned int flags),
+ TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
+ unsigned int flags, struct io_wq_work *work, int rw),
- TP_ARGS(ctx, rw, req, work, flags),
+ TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( int, rw )
- __field( void *, req )
- __field( struct io_wq_work *, work )
- __field( unsigned int, flags )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( u64, user_data )
+ __field( u8, opcode )
+ __field( unsigned int, flags )
+ __field( struct io_wq_work *, work )
+ __field( int, rw )
),
TP_fast_assign(
- __entry->ctx = ctx;
- __entry->rw = rw;
- __entry->req = req;
- __entry->work = work;
- __entry->flags = flags;
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->user_data = user_data;
+ __entry->flags = flags;
+ __entry->opcode = opcode;
+ __entry->work = work;
+ __entry->rw = rw;
),
- TP_printk("ring %p, request %p, flags %d, %s queue, work %p",
- __entry->ctx, __entry->req, __entry->flags,
- __entry->rw ? "hashed" : "normal", __entry->work)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
+ __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
);
/**
@@ -169,30 +180,33 @@ TRACE_EVENT(io_uring_queue_async_work,
* @ctx: pointer to a ring context structure
* @req: pointer to a deferred request
* @user_data: user data associated with the request
+ * @opcode: opcode of request
*
* Allows to track deferred requests, to get an insight about what requests are
* not started immediately.
*/
TRACE_EVENT(io_uring_defer,
- TP_PROTO(void *ctx, void *req, unsigned long long user_data),
+ TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode),
- TP_ARGS(ctx, req, user_data),
+ TP_ARGS(ctx, req, user_data, opcode),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
- __field( unsigned long long, data )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, data )
+ __field( u8, opcode )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->req = req;
__entry->data = user_data;
+ __entry->opcode = opcode;
),
- TP_printk("ring %p, request %p user_data %llu", __entry->ctx,
- __entry->req, __entry->data)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
+ __entry->ctx, __entry->req, __entry->data, __entry->opcode)
);
/**
@@ -250,7 +264,7 @@ TRACE_EVENT(io_uring_cqring_wait,
),
TP_fast_assign(
- __entry->ctx = ctx;
+ __entry->ctx = ctx;
__entry->min_events = min_events;
),
@@ -260,7 +274,10 @@ TRACE_EVENT(io_uring_cqring_wait,
/**
* io_uring_fail_link - called before failing a linked request
*
+ * @ctx: pointer to a ring context structure
* @req: request, which links were cancelled
+ * @user_data: user data associated with the request
+ * @opcode: opcode of request
* @link: cancelled link
*
* Allows to track linked requests cancellation, to see not only that some work
@@ -268,27 +285,36 @@ TRACE_EVENT(io_uring_cqring_wait,
*/
TRACE_EVENT(io_uring_fail_link,
- TP_PROTO(void *req, void *link),
+ TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, void *link),
- TP_ARGS(req, link),
+ TP_ARGS(ctx, req, user_data, opcode, link),
TP_STRUCT__entry (
- __field( void *, req )
- __field( void *, link )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, user_data )
+ __field( u8, opcode )
+ __field( void *, link )
),
TP_fast_assign(
- __entry->req = req;
- __entry->link = link;
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->user_data = user_data;
+ __entry->opcode = opcode;
+ __entry->link = link;
),
- TP_printk("request %p, link %p", __entry->req, __entry->link)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
+ __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ __entry->link)
);
/**
* io_uring_complete - called when completing an SQE
*
* @ctx: pointer to a ring context structure
+ * @req: pointer to a submitted request
* @user_data: user data associated with the request
* @res: result of the request
* @cflags: completion flags
@@ -296,12 +322,13 @@ TRACE_EVENT(io_uring_fail_link,
*/
TRACE_EVENT(io_uring_complete,
- TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
+ TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
- TP_ARGS(ctx, user_data, res, cflags),
+ TP_ARGS(ctx, req, user_data, res, cflags),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u64, user_data )
__field( int, res )
__field( unsigned, cflags )
@@ -309,14 +336,16 @@ TRACE_EVENT(io_uring_complete,
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->user_data = user_data;
__entry->res = res;
__entry->cflags = cflags;
),
- TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
- __entry->ctx, (unsigned long long)__entry->user_data,
- __entry->res, __entry->cflags)
+ TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
+ __entry->ctx, __entry->req,
+ __entry->user_data,
+ __entry->res, __entry->cflags)
);
/**
@@ -324,8 +353,8 @@ TRACE_EVENT(io_uring_complete,
*
* @ctx: pointer to a ring context structure
* @req: pointer to a submitted request
- * @opcode: opcode of request
* @user_data: user data associated with the request
+ * @opcode: opcode of request
* @flags request flags
* @force_nonblock: whether a context blocking or not
* @sq_thread: true if sq_thread has submitted this SQE
@@ -335,34 +364,34 @@ TRACE_EVENT(io_uring_complete,
*/
TRACE_EVENT(io_uring_submit_sqe,
- TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
+ TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, u32 flags,
bool force_nonblock, bool sq_thread),
- TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
+ TP_ARGS(ctx, req, user_data, opcode, flags, force_nonblock, sq_thread),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
- __field( u8, opcode )
- __field( u64, user_data )
- __field( u32, flags )
- __field( bool, force_nonblock )
- __field( bool, sq_thread )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, user_data )
+ __field( u8, opcode )
+ __field( u32, flags )
+ __field( bool, force_nonblock )
+ __field( bool, sq_thread )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->req = req;
- __entry->opcode = opcode;
__entry->user_data = user_data;
+ __entry->opcode = opcode;
__entry->flags = flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread;
),
- TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
- __entry->opcode, (unsigned long long)__entry->user_data,
+ __entry->user_data, __entry->opcode,
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
@@ -371,8 +400,8 @@ TRACE_EVENT(io_uring_submit_sqe,
*
* @ctx: pointer to a ring context structure
* @req: pointer to the armed request
- * @opcode: opcode of request
* @user_data: user data associated with the request
+ * @opcode: opcode of request
* @mask: request poll events mask
* @events: registered events of interest
*
@@ -381,155 +410,110 @@ TRACE_EVENT(io_uring_submit_sqe,
*/
TRACE_EVENT(io_uring_poll_arm,
- TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
+ TP_PROTO(void *ctx, void *req, u64 user_data, u8 opcode,
int mask, int events),
- TP_ARGS(ctx, req, opcode, user_data, mask, events),
+ TP_ARGS(ctx, req, user_data, opcode, mask, events),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
- __field( u8, opcode )
- __field( u64, user_data )
- __field( int, mask )
- __field( int, events )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, user_data )
+ __field( u8, opcode )
+ __field( int, mask )
+ __field( int, events )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->req = req;
- __entry->opcode = opcode;
__entry->user_data = user_data;
+ __entry->opcode = opcode;
__entry->mask = mask;
__entry->events = events;
),
- TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
- __entry->ctx, __entry->req, __entry->opcode,
- (unsigned long long) __entry->user_data,
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
__entry->mask, __entry->events)
);
-TRACE_EVENT(io_uring_poll_wake,
-
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
-
- TP_ARGS(ctx, opcode, user_data, mask),
-
- TP_STRUCT__entry (
- __field( void *, ctx )
- __field( u8, opcode )
- __field( u64, user_data )
- __field( int, mask )
- ),
-
- TP_fast_assign(
- __entry->ctx = ctx;
- __entry->opcode = opcode;
- __entry->user_data = user_data;
- __entry->mask = mask;
- ),
-
- TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->mask)
-);
-
-TRACE_EVENT(io_uring_task_add,
-
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
-
- TP_ARGS(ctx, opcode, user_data, mask),
-
- TP_STRUCT__entry (
- __field( void *, ctx )
- __field( u8, opcode )
- __field( u64, user_data )
- __field( int, mask )
- ),
-
- TP_fast_assign(
- __entry->ctx = ctx;
- __entry->opcode = opcode;
- __entry->user_data = user_data;
- __entry->mask = mask;
- ),
-
- TP_printk("ring %p, op %d, data 0x%llx, mask %x",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->mask)
-);
-
/*
- * io_uring_task_run - called when task_work_run() executes the poll events
- * notification callbacks
+ * io_uring_task_add - called after adding a task
*
* @ctx: pointer to a ring context structure
- * @req: pointer to the armed request
- * @opcode: opcode of request
+ * @req: pointer to request
* @user_data: user data associated with the request
+ * @opcode: opcode of request
+ * @mask: request poll events mask
*
- * Allows to track when notified poll events are processed
*/
-TRACE_EVENT(io_uring_task_run,
+TRACE_EVENT(io_uring_task_add,
- TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
+ TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, int mask),
- TP_ARGS(ctx, req, opcode, user_data),
+ TP_ARGS(ctx, req, user_data, opcode, mask),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
- __field( u8, opcode )
- __field( u64, user_data )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, user_data )
+ __field( u8, opcode )
+ __field( int, mask )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->req = req;
- __entry->opcode = opcode;
__entry->user_data = user_data;
+ __entry->opcode = opcode;
+ __entry->mask = mask;
),
- TP_printk("ring %p, req %p, op %d, data 0x%llx",
- __entry->ctx, __entry->req, __entry->opcode,
- (unsigned long long) __entry->user_data)
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
+ __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ __entry->mask)
);
/*
* io_uring_req_failed - called when an sqe is errored dring submission
*
* @sqe: pointer to the io_uring_sqe that failed
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to request
* @error: error it failed with
*
* Allows easier diagnosing of malformed requests in production systems.
*/
TRACE_EVENT(io_uring_req_failed,
- TP_PROTO(const struct io_uring_sqe *sqe, int error),
+ TP_PROTO(const struct io_uring_sqe *sqe, void *ctx, void *req, int error),
- TP_ARGS(sqe, error),
+ TP_ARGS(sqe, ctx, req, error),
TP_STRUCT__entry (
- __field( u8, opcode )
- __field( u8, flags )
- __field( u8, ioprio )
- __field( u64, off )
- __field( u64, addr )
- __field( u32, len )
- __field( u32, op_flags )
- __field( u64, user_data )
- __field( u16, buf_index )
- __field( u16, personality )
- __field( u32, file_index )
- __field( u64, pad1 )
- __field( u64, pad2 )
- __field( int, error )
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( unsigned long long, user_data )
+ __field( u8, opcode )
+ __field( u8, flags )
+ __field( u8, ioprio )
+ __field( u64, off )
+ __field( u64, addr )
+ __field( u32, len )
+ __field( u32, op_flags )
+ __field( u16, buf_index )
+ __field( u16, personality )
+ __field( u32, file_index )
+ __field( u64, pad1 )
+ __field( u64, pad2 )
+ __field( int, error )
),
TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->user_data = sqe->user_data;
__entry->opcode = sqe->opcode;
__entry->flags = sqe->flags;
__entry->ioprio = sqe->ioprio;
@@ -537,7 +521,6 @@ TRACE_EVENT(io_uring_req_failed,
__entry->addr = sqe->addr;
__entry->len = sqe->len;
__entry->op_flags = sqe->rw_flags;
- __entry->user_data = sqe->user_data;
__entry->buf_index = sqe->buf_index;
__entry->personality = sqe->personality;
__entry->file_index = sqe->file_index;
@@ -546,13 +529,15 @@ TRACE_EVENT(io_uring_req_failed,
__entry->error = error;
),
- TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
- "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
+ TP_printk("ring %p, req %p, user_data 0x%llx, "
+ "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
+ "len=%u, rw_flags=0x%x, buf_index=%d, "
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+ __entry->ctx, __entry->req, __entry->user_data,
__entry->opcode, __entry->flags, __entry->ioprio,
(unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len,
- __entry->op_flags, (unsigned long long) __entry->user_data,
+ __entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1,
(unsigned long long) __entry->pad2, __entry->error)
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
deleted file mode 100644
index a2d9aa16a5d7..000000000000
--- a/include/trace/events/random.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM random
-
-#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_RANDOM_H
-
-#include <linux/writeback.h>
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(add_device_randomness,
- TP_PROTO(int bytes, unsigned long IP),
-
- TP_ARGS(bytes, IP),
-
- TP_STRUCT__entry(
- __field( int, bytes )
- __field(unsigned long, IP )
- ),
-
- TP_fast_assign(
- __entry->bytes = bytes;
- __entry->IP = IP;
- ),
-
- TP_printk("bytes %d caller %pS",
- __entry->bytes, (void *)__entry->IP)
-);
-
-DECLARE_EVENT_CLASS(random__mix_pool_bytes,
- TP_PROTO(int bytes, unsigned long IP),
-
- TP_ARGS(bytes, IP),
-
- TP_STRUCT__entry(
- __field( int, bytes )
- __field(unsigned long, IP )
- ),
-
- TP_fast_assign(
- __entry->bytes = bytes;
- __entry->IP = IP;
- ),
-
- TP_printk("input pool: bytes %d caller %pS",
- __entry->bytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
- TP_PROTO(int bytes, unsigned long IP),
-
- TP_ARGS(bytes, IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
- TP_PROTO(int bytes, unsigned long IP),
-
- TP_ARGS(bytes, IP)
-);
-
-TRACE_EVENT(credit_entropy_bits,
- TP_PROTO(int bits, int entropy_count, unsigned long IP),
-
- TP_ARGS(bits, entropy_count, IP),
-
- TP_STRUCT__entry(
- __field( int, bits )
- __field( int, entropy_count )
- __field(unsigned long, IP )
- ),
-
- TP_fast_assign(
- __entry->bits = bits;
- __entry->entropy_count = entropy_count;
- __entry->IP = IP;
- ),
-
- TP_printk("input pool: bits %d entropy_count %d caller %pS",
- __entry->bits, __entry->entropy_count, (void *)__entry->IP)
-);
-
-TRACE_EVENT(debit_entropy,
- TP_PROTO(int debit_bits),
-
- TP_ARGS( debit_bits),
-
- TP_STRUCT__entry(
- __field( int, debit_bits )
- ),
-
- TP_fast_assign(
- __entry->debit_bits = debit_bits;
- ),
-
- TP_printk("input pool: debit_bits %d", __entry->debit_bits)
-);
-
-TRACE_EVENT(add_input_randomness,
- TP_PROTO(int input_bits),
-
- TP_ARGS(input_bits),
-
- TP_STRUCT__entry(
- __field( int, input_bits )
- ),
-
- TP_fast_assign(
- __entry->input_bits = input_bits;
- ),
-
- TP_printk("input_pool_bits %d", __entry->input_bits)
-);
-
-TRACE_EVENT(add_disk_randomness,
- TP_PROTO(dev_t dev, int input_bits),
-
- TP_ARGS(dev, input_bits),
-
- TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( int, input_bits )
- ),
-
- TP_fast_assign(
- __entry->dev = dev;
- __entry->input_bits = input_bits;
- ),
-
- TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev),
- MINOR(__entry->dev), __entry->input_bits)
-);
-
-DECLARE_EVENT_CLASS(random__get_random_bytes,
- TP_PROTO(int nbytes, unsigned long IP),
-
- TP_ARGS(nbytes, IP),
-
- TP_STRUCT__entry(
- __field( int, nbytes )
- __field(unsigned long, IP )
- ),
-
- TP_fast_assign(
- __entry->nbytes = nbytes;
- __entry->IP = IP;
- ),
-
- TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
- TP_PROTO(int nbytes, unsigned long IP),
-
- TP_ARGS(nbytes, IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
- TP_PROTO(int nbytes, unsigned long IP),
-
- TP_ARGS(nbytes, IP)
-);
-
-DECLARE_EVENT_CLASS(random__extract_entropy,
- TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
-
- TP_ARGS(nbytes, entropy_count, IP),
-
- TP_STRUCT__entry(
- __field( int, nbytes )
- __field( int, entropy_count )
- __field(unsigned long, IP )
- ),
-
- TP_fast_assign(
- __entry->nbytes = nbytes;
- __entry->entropy_count = entropy_count;
- __entry->IP = IP;
- ),
-
- TP_printk("input pool: nbytes %d entropy_count %d caller %pS",
- __entry->nbytes, __entry->entropy_count, (void *)__entry->IP)
-);
-
-
-DEFINE_EVENT(random__extract_entropy, extract_entropy,
- TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
-
- TP_ARGS(nbytes, entropy_count, IP)
-);
-
-TRACE_EVENT(urandom_read,
- TP_PROTO(int got_bits, int pool_left, int input_left),
-
- TP_ARGS(got_bits, pool_left, input_left),
-
- TP_STRUCT__entry(
- __field( int, got_bits )
- __field( int, pool_left )
- __field( int, input_left )
- ),
-
- TP_fast_assign(
- __entry->got_bits = got_bits;
- __entry->pool_left = pool_left;
- __entry->input_left = input_left;
- ),
-
- TP_printk("got_bits %d nonblocking_pool_entropy_left %d "
- "input_entropy_left %d", __entry->got_bits,
- __entry->pool_left, __entry->input_left)
-);
-
-TRACE_EVENT(prandom_u32,
-
- TP_PROTO(unsigned int ret),
-
- TP_ARGS(ret),
-
- TP_STRUCT__entry(
- __field( unsigned int, ret)
- ),
-
- TP_fast_assign(
- __entry->ret = ret;
- ),
-
- TP_printk("ret=%u" , __entry->ret)
-);
-
-#endif /* _TRACE_RANDOM_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 670e41783edd..90b2fb0292cb 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read,
* Tracepoint for rcu_barrier() execution. The string "s" describes
* the rcu_barrier phase:
* "Begin": rcu_barrier() started.
+ * "CB": An rcu_barrier_callback() invoked a callback, not the last.
* "EarlyExit": rcu_barrier() piggybacked, thus early exit.
* "Inc1": rcu_barrier() piggyback check counter incremented.
- * "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks.
- * "OnlineQ": rcu_barrier() found online CPU with callbacks.
- * "OnlineNQ": rcu_barrier() found online CPU, no callbacks.
+ * "Inc2": rcu_barrier() piggyback check counter incremented.
* "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
* "IRQNQ": An rcu_barrier_callback() callback found no callbacks.
- * "CB": An rcu_barrier_callback() invoked a callback, not the last.
* "LastCB": An rcu_barrier_callback() invoked the last callback.
- * "Inc2": rcu_barrier() piggyback check counter incremented.
+ * "NQ": rcu_barrier() found a CPU with no callbacks.
+ * "OnlineQ": rcu_barrier() found online CPU with callbacks.
* The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
* is the count of remaining callbacks, and "done" is the piggybacking count.
*/
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 94640482cfe7..65e786756321 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p)
{
unsigned int state;
@@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
* it for left shift operation to get the correct task->state
* mapping.
*/
- state = task_state_index(p);
+ state = __task_state_index(prev_state, p->exit_state);
return state ? (1 << (state - 1)) : state;
}
@@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
+ unsigned int prev_state,
struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(preempt, prev, next),
+ TP_ARGS(preempt, prev_state, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
@@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
- __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 3e042ca2cedb..a8a64b97504d 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -14,7 +14,7 @@
EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \
EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \
EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
- EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \
+ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \
EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
EMe(SKB_DROP_REASON_MAX, MAX)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 1e566ac4b812..ab8ae1f6ba84 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -794,6 +794,9 @@ RPC_SHOW_SOCKET
RPC_SHOW_SOCK
+
+#include <trace/events/net_probe_common.h>
+
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
@@ -816,27 +819,32 @@ DECLARE_EVENT_CLASS(xs_socket_event,
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
- __string(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT])
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
+ const struct sock *sk = socket->sk;
+ const struct inet_sock *inet = inet_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
- __assign_str(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT]);
+
),
TP_printk(
- "socket:[%llu] dstaddr=%s/%s "
+ "socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc "
"state=%u (%s) sk_state=%u (%s)",
- __entry->ino, __get_str(dstaddr), __get_str(dstport),
+ __entry->ino,
+ __entry->saddr,
+ __entry->daddr,
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
@@ -866,29 +874,33 @@ DECLARE_EVENT_CLASS(xs_socket_event_done,
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
- __string(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT])
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
+ const struct sock *sk = socket->sk;
+ const struct inet_sock *inet = inet_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
__entry->error = error;
- __assign_str(dstaddr,
- xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(dstport,
- xprt->address_strings[RPC_DISPLAY_PORT]);
),
TP_printk(
- "error=%d socket:[%llu] dstaddr=%s/%s "
+ "error=%d socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc "
"state=%u (%s) sk_state=%u (%s)",
__entry->error,
- __entry->ino, __get_str(dstaddr), __get_str(dstport),
+ __entry->ino,
+ __entry->saddr,
+ __entry->daddr,
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
@@ -953,7 +965,8 @@ TRACE_EVENT(rpc_socket_nospace,
{ BIT(XPRT_REMOVE), "REMOVE" }, \
{ BIT(XPRT_CONGESTED), "CONGESTED" }, \
{ BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \
- { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" })
+ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \
+ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" })
DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
TP_PROTO(
@@ -1150,8 +1163,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
__entry->task_id = -1;
__entry->client_id = -1;
}
- __entry->snd_task_id = xprt->snd_task ?
- xprt->snd_task->tk_pid : -1;
+ if (xprt->snd_task &&
+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ __entry->snd_task_id = xprt->snd_task->tk_pid;
+ else
+ __entry->snd_task_id = -1;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
@@ -1196,8 +1212,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
__entry->task_id = -1;
__entry->client_id = -1;
}
- __entry->snd_task_id = xprt->snd_task ?
- xprt->snd_task->tk_pid : -1;
+ if (xprt->snd_task &&
+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ __entry->snd_task_id = xprt->snd_task->tk_pid;
+ else
+ __entry->snd_task_id = -1;
+
__entry->cong = xprt->cong;
__entry->cwnd = xprt->cwnd;
__entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
@@ -1605,26 +1625,53 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
{ SVC_PENDING, "SVC_PENDING" }, \
{ SVC_COMPLETE, "SVC_COMPLETE" })
+#define SVC_RQST_ENDPOINT_FIELDS(r) \
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen) \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid)
+
+#define SVC_RQST_ENDPOINT_ASSIGNMENTS(r) \
+ do { \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
+ __entry->netns_ino = xprt->xpt_net->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
+ } while (0)
+
+#define SVC_RQST_ENDPOINT_FORMAT \
+ "xid=0x%08x server=%pISpc client=%pISpc"
+
+#define SVC_RQST_ENDPOINT_VARARGS \
+ __entry->xid, __get_sockaddr(server), __get_sockaddr(client)
+
TRACE_EVENT(svc_authenticate,
TP_PROTO(const struct svc_rqst *rqst, int auth_res),
TP_ARGS(rqst, auth_res),
TP_STRUCT__entry(
- __field(u32, xid)
+ SVC_RQST_ENDPOINT_FIELDS(rqst)
+
__field(unsigned long, svc_status)
__field(unsigned long, auth_stat)
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqst->rq_xid);
+ SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
__entry->svc_status = auth_res;
__entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat);
),
- TP_printk("xid=0x%08x auth_res=%s auth_stat=%s",
- __entry->xid, svc_show_status(__entry->svc_status),
- rpc_show_auth_stat(__entry->auth_stat))
+ TP_printk(SVC_RQST_ENDPOINT_FORMAT
+ " auth_res=%s auth_stat=%s",
+ SVC_RQST_ENDPOINT_VARARGS,
+ svc_show_status(__entry->svc_status),
+ rpc_show_auth_stat(__entry->auth_stat))
);
TRACE_EVENT(svc_process,
@@ -1660,7 +1707,6 @@ TRACE_EVENT(svc_process,
);
DECLARE_EVENT_CLASS(svc_rqst_event,
-
TP_PROTO(
const struct svc_rqst *rqst
),
@@ -1668,20 +1714,20 @@ DECLARE_EVENT_CLASS(svc_rqst_event,
TP_ARGS(rqst),
TP_STRUCT__entry(
- __field(u32, xid)
+ SVC_RQST_ENDPOINT_FIELDS(rqst)
+
__field(unsigned long, flags)
- __string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqst->rq_xid);
+ SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
__entry->flags = rqst->rq_flags;
- __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
- TP_printk("addr=%s xid=0x%08x flags=%s",
- __get_str(addr), __entry->xid,
- show_rqstp_flags(__entry->flags))
+ TP_printk(SVC_RQST_ENDPOINT_FORMAT " flags=%s",
+ SVC_RQST_ENDPOINT_VARARGS,
+ show_rqstp_flags(__entry->flags))
);
#define DEFINE_SVC_RQST_EVENT(name) \
DEFINE_EVENT(svc_rqst_event, svc_##name, \
@@ -1694,34 +1740,63 @@ DEFINE_SVC_RQST_EVENT(defer);
DEFINE_SVC_RQST_EVENT(drop);
DECLARE_EVENT_CLASS(svc_rqst_status,
-
- TP_PROTO(struct svc_rqst *rqst, int status),
+ TP_PROTO(
+ const struct svc_rqst *rqst,
+ int status
+ ),
TP_ARGS(rqst, status),
TP_STRUCT__entry(
- __field(u32, xid)
+ SVC_RQST_ENDPOINT_FIELDS(rqst)
+
__field(int, status)
__field(unsigned long, flags)
- __string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqst->rq_xid);
+ SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
__entry->status = status;
__entry->flags = rqst->rq_flags;
- __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
- TP_printk("addr=%s xid=0x%08x status=%d flags=%s",
- __get_str(addr), __entry->xid,
- __entry->status, show_rqstp_flags(__entry->flags))
+ TP_printk(SVC_RQST_ENDPOINT_FORMAT " status=%d flags=%s",
+ SVC_RQST_ENDPOINT_VARARGS,
+ __entry->status, show_rqstp_flags(__entry->flags))
);
DEFINE_EVENT(svc_rqst_status, svc_send,
- TP_PROTO(struct svc_rqst *rqst, int status),
+ TP_PROTO(const struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status));
+TRACE_EVENT(svc_stats_latency,
+ TP_PROTO(
+ const struct svc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ SVC_RQST_ENDPOINT_FIELDS(rqst)
+
+ __field(unsigned long, execute)
+ __string(procedure, svc_proc_name(rqst))
+ ),
+
+ TP_fast_assign(
+ SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
+ __entry->execute = ktime_to_us(ktime_sub(ktime_get(),
+ rqst->rq_stime));
+ __assign_str(procedure, svc_proc_name(rqst));
+ ),
+
+ TP_printk(SVC_RQST_ENDPOINT_FORMAT " proc=%s execute-us=%lu",
+ SVC_RQST_ENDPOINT_VARARGS,
+ __get_str(procedure), __entry->execute)
+);
+
#define show_svc_xprt_flags(flags) \
__print_flags(flags, "|", \
{ (1UL << XPT_BUSY), "XPT_BUSY"}, \
@@ -1754,65 +1829,114 @@ TRACE_EVENT(svc_xprt_create_err,
__field(long, error)
__string(program, program)
__string(protocol, protocol)
- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __sockaddr(addr, salen)
),
TP_fast_assign(
__entry->error = PTR_ERR(xprt);
__assign_str(program, program);
__assign_str(protocol, protocol);
- memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr)));
+ __assign_sockaddr(addr, sap, salen);
),
TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
- __entry->addr, __get_str(program), __get_str(protocol),
+ __get_sockaddr(addr), __get_str(program), __get_str(protocol),
__entry->error)
);
+#define SVC_XPRT_ENDPOINT_FIELDS(x) \
+ __sockaddr(server, (x)->xpt_locallen) \
+ __sockaddr(client, (x)->xpt_remotelen) \
+ __field(unsigned long, flags) \
+ __field(unsigned int, netns_ino)
+
+#define SVC_XPRT_ENDPOINT_ASSIGNMENTS(x) \
+ do { \
+ __assign_sockaddr(server, &(x)->xpt_local, \
+ (x)->xpt_locallen); \
+ __assign_sockaddr(client, &(x)->xpt_remote, \
+ (x)->xpt_remotelen); \
+ __entry->flags = (x)->xpt_flags; \
+ __entry->netns_ino = (x)->xpt_net->ns.inum; \
+ } while (0)
+
+#define SVC_XPRT_ENDPOINT_FORMAT \
+ "server=%pISpc client=%pISpc flags=%s"
+
+#define SVC_XPRT_ENDPOINT_VARARGS \
+ __get_sockaddr(server), __get_sockaddr(client), \
+ show_svc_xprt_flags(__entry->flags)
+
TRACE_EVENT(svc_xprt_enqueue,
- TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
+ TP_PROTO(
+ const struct svc_xprt *xprt,
+ const struct svc_rqst *rqst
+ ),
TP_ARGS(xprt, rqst),
TP_STRUCT__entry(
+ SVC_XPRT_ENDPOINT_FIELDS(xprt)
+
__field(int, pid)
- __field(unsigned long, flags)
- __string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
+ SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
+
__entry->pid = rqst? rqst->rq_task->pid : 0;
- __entry->flags = xprt->xpt_flags;
- __assign_str(addr, xprt->xpt_remotebuf);
),
- TP_printk("addr=%s pid=%d flags=%s", __get_str(addr),
- __entry->pid, show_svc_xprt_flags(__entry->flags))
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d",
+ SVC_XPRT_ENDPOINT_VARARGS, __entry->pid)
+);
+
+TRACE_EVENT(svc_xprt_dequeue,
+ TP_PROTO(
+ const struct svc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt)
+
+ __field(unsigned long, wakeup)
+ ),
+
+ TP_fast_assign(
+ SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt);
+
+ __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
+ rqst->rq_qtime));
+ ),
+
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu",
+ SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup)
);
DECLARE_EVENT_CLASS(svc_xprt_event,
- TP_PROTO(struct svc_xprt *xprt),
+ TP_PROTO(
+ const struct svc_xprt *xprt
+ ),
TP_ARGS(xprt),
TP_STRUCT__entry(
- __field(unsigned long, flags)
- __string(addr, xprt->xpt_remotebuf)
+ SVC_XPRT_ENDPOINT_FIELDS(xprt)
),
TP_fast_assign(
- __entry->flags = xprt->xpt_flags;
- __assign_str(addr, xprt->xpt_remotebuf);
+ SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
),
- TP_printk("addr=%s flags=%s", __get_str(addr),
- show_svc_xprt_flags(__entry->flags))
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS)
);
#define DEFINE_SVC_XPRT_EVENT(name) \
DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \
TP_PROTO( \
- struct svc_xprt *xprt \
+ const struct svc_xprt *xprt \
), \
TP_ARGS(xprt))
@@ -1830,44 +1954,25 @@ TRACE_EVENT(svc_xprt_accept,
TP_ARGS(xprt, service),
TP_STRUCT__entry(
- __string(addr, xprt->xpt_remotebuf)
+ SVC_XPRT_ENDPOINT_FIELDS(xprt)
+
__string(protocol, xprt->xpt_class->xcl_name)
__string(service, service)
),
TP_fast_assign(
- __assign_str(addr, xprt->xpt_remotebuf);
+ SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
+
__assign_str(protocol, xprt->xpt_class->xcl_name);
__assign_str(service, service);
),
- TP_printk("addr=%s protocol=%s service=%s",
- __get_str(addr), __get_str(protocol), __get_str(service)
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT " protocol=%s service=%s",
+ SVC_XPRT_ENDPOINT_VARARGS,
+ __get_str(protocol), __get_str(service)
)
);
-TRACE_EVENT(svc_xprt_dequeue,
- TP_PROTO(struct svc_rqst *rqst),
-
- TP_ARGS(rqst),
-
- TP_STRUCT__entry(
- __field(unsigned long, flags)
- __field(unsigned long, wakeup)
- __string(addr, rqst->rq_xprt->xpt_remotebuf)
- ),
-
- TP_fast_assign(
- __entry->flags = rqst->rq_xprt->xpt_flags;
- __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
- rqst->rq_qtime));
- __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
- ),
-
- TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr),
- show_svc_xprt_flags(__entry->flags), __entry->wakeup)
-);
-
TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid),
@@ -1902,31 +2007,6 @@ TRACE_EVENT(svc_alloc_arg_err,
TP_printk("pages=%u", __entry->pages)
);
-TRACE_EVENT(svc_stats_latency,
- TP_PROTO(const struct svc_rqst *rqst),
-
- TP_ARGS(rqst),
-
- TP_STRUCT__entry(
- __field(u32, xid)
- __field(unsigned long, execute)
- __string(procedure, svc_proc_name(rqst))
- __string(addr, rqst->rq_xprt->xpt_remotebuf)
- ),
-
- TP_fast_assign(
- __entry->xid = be32_to_cpu(rqst->rq_xid);
- __entry->execute = ktime_to_us(ktime_sub(ktime_get(),
- rqst->rq_stime));
- __assign_str(procedure, svc_proc_name(rqst));
- __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
- ),
-
- TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu",
- __get_str(addr), __entry->xid, __get_str(procedure),
- __entry->execute)
-);
-
DECLARE_EVENT_CLASS(svc_deferred_event,
TP_PROTO(
const struct svc_deferred_req *dr
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ca2e9009a651..de136dbd623a 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -327,11 +327,11 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
__print_symbolic(__entry->lru, LRU_NAMES))
);
-TRACE_EVENT(mm_vmscan_writepage,
+TRACE_EVENT(mm_vmscan_write_folio,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
__field(unsigned long, pfn)
@@ -339,9 +339,9 @@ TRACE_EVENT(mm_vmscan_writepage,
),
TP_fast_assign(
- __entry->pfn = page_to_pfn(page);
+ __entry->pfn = folio_pfn(folio);
__entry->reclaim_flags = trace_reclaim_flags(
- page_is_file_lru(page));
+ folio_is_file_lru(folio));
),
TP_printk("page=%p pfn=0x%lx flags=%s",
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index a345b1e12daf..86b2a82da546 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -735,34 +735,6 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
)
);
-DECLARE_EVENT_CLASS(writeback_congest_waited_template,
-
- TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
-
- TP_ARGS(usec_timeout, usec_delayed),
-
- TP_STRUCT__entry(
- __field( unsigned int, usec_timeout )
- __field( unsigned int, usec_delayed )
- ),
-
- TP_fast_assign(
- __entry->usec_timeout = usec_timeout;
- __entry->usec_delayed = usec_delayed;
- ),
-
- TP_printk("usec_timeout=%u usec_delayed=%u",
- __entry->usec_timeout,
- __entry->usec_delayed)
-);
-
-DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait,
-
- TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
-
- TP_ARGS(usec_timeout, usec_delayed)
-);
-
DECLARE_EVENT_CLASS(writeback_single_inode_template,
TP_PROTO(struct inode *inode,
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 5d48c46a3008..5800d13146c3 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -21,6 +21,9 @@
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
#undef __get_rel_dynamic_array
#define __get_rel_dynamic_array(field) \
((void *)__entry + \
@@ -38,6 +41,9 @@
#undef __get_rel_bitmask
#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
#undef __perf_count
#define __perf_count(c) (__count = (c))
diff --git a/include/trace/stages/stage1_defines.h b/include/trace/stages/stage1_defines.h
index 8ab88c766d2b..a16783419687 100644
--- a/include/trace/stages/stage1_defines.h
+++ b/include/trace/stages/stage1_defines.h
@@ -29,6 +29,9 @@
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
#undef __rel_dynamic_array
#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item;
@@ -41,5 +44,8 @@
#undef __rel_bitmask
#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
diff --git a/include/trace/stages/stage2_defines.h b/include/trace/stages/stage2_defines.h
index 9f2341df40da..42fd1e8813ec 100644
--- a/include/trace/stages/stage2_defines.h
+++ b/include/trace/stages/stage2_defines.h
@@ -29,11 +29,14 @@
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
-#undef __string_len
-#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
#undef __rel_dynamic_array
#define __rel_dynamic_array(type, item, len) u32 item;
@@ -46,3 +49,6 @@
#undef __rel_bitmask
#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
diff --git a/include/trace/stages/stage3_defines.h b/include/trace/stages/stage3_defines.h
index 0bc131993b7a..e3b183e9d18e 100644
--- a/include/trace/stages/stage3_defines.h
+++ b/include/trace/stages/stage3_defines.h
@@ -51,6 +51,12 @@
trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \
})
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
#undef __print_flags
#define __print_flags(flag, delim, flag_array...) \
({ \
diff --git a/include/trace/stages/stage4_defines.h b/include/trace/stages/stage4_defines.h
index 780a10fa5279..e80cdc397a43 100644
--- a/include/trace/stages/stage4_defines.h
+++ b/include/trace/stages/stage4_defines.h
@@ -41,6 +41,9 @@
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
#undef __rel_dynamic_array
#define __rel_dynamic_array(_type, _item, _len) { \
.type = "__rel_loc " #_type "[]", .name = #_item, \
@@ -55,3 +58,6 @@
#undef __rel_bitmask
#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
+
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
diff --git a/include/trace/stages/stage5_defines.h b/include/trace/stages/stage5_defines.h
index fb15394aae31..7ee5931300e6 100644
--- a/include/trace/stages/stage5_defines.h
+++ b/include/trace/stages/stage5_defines.h
@@ -81,3 +81,9 @@
#undef __rel_bitmask
#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \
__bitmask_size_in_longs(nr_bits))
+
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
diff --git a/include/trace/stages/stage6_defines.h b/include/trace/stages/stage6_defines.h
index b3a1f26026be..e1724f73594b 100644
--- a/include/trace/stages/stage6_defines.h
+++ b/include/trace/stages/stage6_defines.h
@@ -45,6 +45,16 @@
#define __assign_bitmask(dst, src, nr_bits) \
memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
+#undef __assign_sockaddr
+#define __assign_sockaddr(dest, src, len) \
+ memcpy(__get_dynamic_array(dest), src, len)
+
#undef __rel_dynamic_array
#define __rel_dynamic_array(type, item, len) \
__entry->__rel_loc_##item = __data_offsets.item;
@@ -76,6 +86,16 @@
#define __assign_rel_bitmask(dst, src, nr_bits) \
memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
+#undef __assign_rel_sockaddr
+#define __assign_rel_sockaddr(dest, src, len) \
+ memcpy(__get_rel_dynamic_array(dest), src, len)
+
#undef TP_fast_assign
#define TP_fast_assign(args...) args
diff --git a/include/trace/stages/stage7_defines.h b/include/trace/stages/stage7_defines.h
index d65445328f18..8a7ec24c246d 100644
--- a/include/trace/stages/stage7_defines.h
+++ b/include/trace/stages/stage7_defines.h
@@ -13,10 +13,12 @@
#undef __get_dynamic_array_len
#undef __get_str
#undef __get_bitmask
+#undef __get_sockaddr
#undef __get_rel_dynamic_array
#undef __get_rel_dynamic_array_len
#undef __get_rel_str
#undef __get_rel_bitmask
+#undef __get_rel_sockaddr
#undef __print_array
#undef __print_hex_dump
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 738619994e26..d956b2993970 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
+#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
struct btrfs_ioctl_feature_flags {
__u64 compat_flags;
@@ -868,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
__u8 align[7];
};
+/*
+ * Data and metadata for an encoded read or write.
+ *
+ * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
+ * compression). This can be used to read the compressed contents of a file or
+ * write pre-compressed data directly to a file.
+ *
+ * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
+ * preadv/pwritev with additional metadata about how the data is encoded and the
+ * size of the unencoded data.
+ *
+ * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
+ * the metadata fields, and returns the size of the encoded data. It reads one
+ * extent per call. It can also read data which is not encoded.
+ *
+ * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
+ * from the iovecs, and returns the size of the encoded data. Note that the
+ * encoded data is not validated when it is written; if it is not valid (e.g.,
+ * it cannot be decompressed), then a subsequent read may return an error.
+ *
+ * Since the filesystem page cache contains decoded data, encoded I/O bypasses
+ * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
+ */
+struct btrfs_ioctl_encoded_io_args {
+ /* Input parameters for both reads and writes. */
+
+ /*
+ * iovecs containing encoded data.
+ *
+ * For reads, if the size of the encoded data is larger than the sum of
+ * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
+ * ENOBUFS.
+ *
+ * For writes, the size of the encoded data is the sum of iov[n].iov_len
+ * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
+ * increase in the future). This must also be less than or equal to
+ * unencoded_len.
+ */
+ const struct iovec __user *iov;
+ /* Number of iovecs. */
+ unsigned long iovcnt;
+ /*
+ * Offset in file.
+ *
+ * For writes, must be aligned to the sector size of the filesystem.
+ */
+ __s64 offset;
+ /* Currently must be zero. */
+ __u64 flags;
+
+ /*
+ * For reads, the following members are output parameters that will
+ * contain the returned metadata for the encoded data.
+ * For writes, the following members must be set to the metadata for the
+ * encoded data.
+ */
+
+ /*
+ * Length of the data in the file.
+ *
+ * Must be less than or equal to unencoded_len - unencoded_offset. For
+ * writes, must be aligned to the sector size of the filesystem unless
+ * the data ends at or beyond the current end of the file.
+ */
+ __u64 len;
+ /*
+ * Length of the unencoded (i.e., decrypted and decompressed) data.
+ *
+ * For writes, must be no more than 128 KiB (this limit may increase in
+ * the future). If the unencoded data is actually longer than
+ * unencoded_len, then it is truncated; if it is shorter, then it is
+ * extended with zeroes.
+ */
+ __u64 unencoded_len;
+ /*
+ * Offset from the first byte of the unencoded data to the first byte of
+ * logical data in the file.
+ *
+ * Must be less than unencoded_len.
+ */
+ __u64 unencoded_offset;
+ /*
+ * BTRFS_ENCODED_IO_COMPRESSION_* type.
+ *
+ * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
+ */
+ __u32 compression;
+ /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
+ __u32 encryption;
+ /*
+ * Reserved for future expansion.
+ *
+ * For reads, always returned as zero. Users should check for non-zero
+ * bytes. If there are any, then the kernel has a newer version of this
+ * structure with additional information that the user definition is
+ * missing.
+ *
+ * For writes, must be zeroed.
+ */
+ __u8 reserved[64];
+};
+
+/* Data is not compressed. */
+#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
+/* Data is compressed as a single zlib stream. */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
+/*
+ * Data is compressed as a single zstd frame with the windowLog compression
+ * parameter set to no more than 17.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
+/*
+ * Data is compressed sector by sector (using the sector size indicated by the
+ * name of the constant) with LZO1X and wrapped in the format documented in
+ * fs/btrfs/lzo.c. For writes, the compression sector size must match the
+ * filesystem sector size.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
+#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
+
+/* Data is not encrypted. */
+#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
+#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
+
/* Error codes as returned by the kernel */
enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -996,5 +1125,9 @@ enum btrfs_err_code {
struct btrfs_ioctl_ino_lookup_user_args)
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
+ struct btrfs_ioctl_encoded_io_args)
+#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
+ struct btrfs_ioctl_encoded_io_args)
#endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 5416f1f1a77a..b069752a8ecf 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -53,6 +53,9 @@
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+/* Holds the block group items for extent tree v2. */
+#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
+
/* device stats in the device tree */
#define BTRFS_DEV_STATS_OBJECTID 0ULL
diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h
new file mode 100644
index 000000000000..6225c5aebe06
--- /dev/null
+++ b/include/uapi/linux/cyclades.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_CYCLADES_H
+#define _UAPI_LINUX_CYCLADES_H
+
+#warning "Support for features provided by this header has been removed"
+#warning "Please consider updating your code"
+
+struct cyclades_monitor {
+ unsigned long int_count;
+ unsigned long char_count;
+ unsigned long char_max;
+ unsigned long char_last;
+};
+
+#define CYGETMON 0x435901
+#define CYGETTHRESH 0x435902
+#define CYSETTHRESH 0x435903
+#define CYGETDEFTHRESH 0x435904
+#define CYSETDEFTHRESH 0x435905
+#define CYGETTIMEOUT 0x435906
+#define CYSETTIMEOUT 0x435907
+#define CYGETDEFTIMEOUT 0x435908
+#define CYSETDEFTIMEOUT 0x435909
+#define CYSETRFLOW 0x43590a
+#define CYGETRFLOW 0x43590b
+#define CYSETRTSDTR_INV 0x43590c
+#define CYGETRTSDTR_INV 0x43590d
+#define CYZSETPOLLCYCLE 0x43590e
+#define CYZGETPOLLCYCLE 0x43590f
+#define CYGETCD1400VER 0x435910
+#define CYSETWAIT 0x435912
+#define CYGETWAIT 0x435913
+
+#endif /* _UAPI_LINUX_CYCLADES_H */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2..787c657bfae8 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -35,10 +35,14 @@ typedef __s64 Elf64_Sxword;
#define PT_HIOS 0x6fffffff /* OS-specific */
#define PT_LOPROC 0x70000000
#define PT_HIPROC 0x7fffffff
-#define PT_GNU_EH_FRAME 0x6474e550
-#define PT_GNU_PROPERTY 0x6474e553
-
+#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550)
#define PT_GNU_STACK (PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO (PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1)
/*
* Extended Numbering
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 225ec87d4f22..7989d9483ea7 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -278,7 +278,8 @@
#define KEY_PAUSECD 201
#define KEY_PROG3 202
#define KEY_PROG4 203
-#define KEY_DASHBOARD 204 /* AL Dashboard */
+#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */
+#define KEY_DASHBOARD KEY_ALL_APPLICATIONS
#define KEY_SUSPEND 205
#define KEY_CLOSE 206 /* AC Close */
#define KEY_PLAY 207
@@ -612,6 +613,7 @@
#define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */
#define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */
#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */
+#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */
#define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */
#define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..d2be4eb22008 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -101,6 +101,7 @@ enum {
#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
enum {
IORING_OP_NOP,
@@ -143,6 +144,7 @@ enum {
IORING_OP_MKDIRAT,
IORING_OP_SYMLINKAT,
IORING_OP_LINKAT,
+ IORING_OP_MSG_RING,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -199,9 +201,11 @@ struct io_uring_cqe {
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_MSG (1U << 2)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
@@ -257,10 +261,11 @@ struct io_cqring_offsets {
/*
* io_uring_enter(2) flags
*/
-#define IORING_ENTER_GETEVENTS (1U << 0)
-#define IORING_ENTER_SQ_WAKEUP (1U << 1)
-#define IORING_ENTER_SQ_WAIT (1U << 2)
-#define IORING_ENTER_EXT_ARG (1U << 3)
+#define IORING_ENTER_GETEVENTS (1U << 0)
+#define IORING_ENTER_SQ_WAKEUP (1U << 1)
+#define IORING_ENTER_SQ_WAIT (1U << 2)
+#define IORING_ENTER_EXT_ARG (1U << 3)
+#define IORING_ENTER_REGISTERED_RING (1U << 4)
/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -325,6 +330,10 @@ enum {
/* set/get max number of io-wq workers */
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+ /* register/unregister io_uring fd with the ring */
+ IORING_REGISTER_RING_FDS = 20,
+ IORING_UNREGISTER_RING_FDS = 21,
+
/* this goes last */
IORING_REGISTER_LAST
};
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9563d294f181..507ee1f2aa96 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1133,6 +1133,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
+#define KVM_CAP_PPC_AIL_MODE_3 210
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1623,9 +1625,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
-
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
@@ -2047,4 +2046,7 @@ struct kvm_stats_desc {
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
#endif /* __LINUX_KVM_H */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 0425cd79af9a..f724129c0425 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -36,6 +36,7 @@
#define EFIVARFS_MAGIC 0xde5e81e4
#define HOSTFS_SUPER_MAGIC 0x00c0ffee
#define OVERLAYFS_SUPER_MAGIC 0x794c7630
+#define FUSE_SUPER_MAGIC 0x65735546
#define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */
#define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 4b3395082d15..26071021e986 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -106,7 +106,7 @@ enum ip_conntrack_status {
IPS_NAT_CLASH = IPS_UNTRACKED,
#endif
- /* Conntrack got a helper explicitly attached via CT target. */
+ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index d99b5a772698..b2e43185e3b5 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 {
__u64 metadata;
__u64 addr;
__u32 metadata_len;
- __u32 data_len;
+ union {
+ __u32 data_len; /* for non-vectored io */
+ __u32 vec_cnt; /* for vectored io */
+ };
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
@@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 {
#define NVME_IOCTL_RESCAN _IO('N', 0x46)
#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64)
#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
+#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h
index 87b55755f4ff..d9db7ad43890 100644
--- a/include/uapi/linux/omap3isp.h
+++ b/include/uapi/linux/omap3isp.h
@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config {
* struct omap3isp_stat_data - Statistic data sent to or received from user
* @ts: Timestamp of returned framestats.
* @buf: Pointer to pass to user.
+ * @buf_size: Size of buffer.
* @frame_number: Frame number of requested stats.
* @cur_frame: Current frame number being processed.
* @config_counter: Number of the configuration associated with the data.
@@ -176,10 +177,12 @@ struct omap3isp_stat_data {
struct timeval ts;
#endif
void __user *buf;
- __u32 buf_size;
- __u16 frame_number;
- __u16 cur_frame;
- __u16 config_counter;
+ __struct_group(/* no tag */, frame, /* no attrs */,
+ __u32 buf_size;
+ __u16 frame_number;
+ __u16 cur_frame;
+ __u16 config_counter;
+ );
};
#ifdef __KERNEL__
@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 {
__s32 tv_usec;
} ts;
__u32 buf;
- __u32 buf_size;
- __u16 frame_number;
- __u16 cur_frame;
- __u16 config_counter;
+ __struct_group(/* no tag */, frame, /* no attrs */,
+ __u32 buf_size;
+ __u16 frame_number;
+ __u16 cur_frame;
+ __u16 config_counter;
+ );
};
#endif
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1b65042ab1db..d37629dbad72 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -251,6 +251,8 @@ enum {
PERF_BR_SYSRET = 8, /* syscall return */
PERF_BR_COND_CALL = 9, /* conditional function call */
PERF_BR_COND_RET = 10, /* conditional function return */
+ PERF_BR_ERET = 11, /* exception return */
+ PERF_BR_IRQ = 12, /* irq */
PERF_BR_MAX,
};
@@ -465,6 +467,8 @@ struct perf_event_attr {
/*
* User provided data if sigtrap=1, passed back to user via
* siginfo_t::si_perf_data, e.g. to permit user to identify the event.
+ * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be
+ * truncated accordingly on 32 bit architectures.
*/
__u64 sig_data;
};
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 9a402fdb60e9..77ee207623a9 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -105,23 +105,11 @@ struct rseq {
* Read and set by the kernel. Set by user-space with single-copy
* atomicity semantics. This field should only be updated by the
* thread which registered this data structure. Aligned on 64-bit.
+ *
+ * 32-bit architectures should update the low order bits of the
+ * rseq_cs field, leaving the high order bits initialized to 0.
*/
- union {
- __u64 ptr64;
-#ifdef __LP64__
- __u64 ptr;
-#else
- struct {
-#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
- __u32 padding; /* Initialized to zero. */
- __u32 ptr32;
-#else /* LITTLE */
- __u32 ptr32;
- __u32 padding; /* Initialized to zero. */
-#endif /* ENDIAN */
- } ptr;
-#endif
- } rseq_cs;
+ __u64 rseq_cs;
/*
* Restartable sequences flags field.
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index c7008d87f1a4..8cb3a6fef553 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -84,12 +84,11 @@ struct smc_diag_conninfo {
/* SMC_DIAG_LINKINFO */
struct smc_diag_linkinfo {
- __u8 link_id; /* link identifier */
- __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
- __u8 ibport; /* RDMA device port number */
- __u8 gid[40]; /* local GID */
- __u8 peer_gid[40]; /* peer GID */
- __aligned_u64 net_cookie; /* RDMA device net namespace */
+ __u8 link_id; /* link identifier */
+ __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
+ __u8 ibport; /* RDMA device port number */
+ __u8 gid[40]; /* local GID */
+ __u8 peer_gid[40]; /* peer GID */
};
struct smc_diag_lgrinfo {
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 9aa2fedfa309..fc78bf3aead7 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
THERMAL_GENL_ATTR_CDEV_MAX_STATE,
THERMAL_GENL_ATTR_CDEV_NAME,
THERMAL_GENL_ATTR_GOV_NAME,
-
+ THERMAL_GENL_ATTR_CPU_CAPABILITY,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
__THERMAL_GENL_ATTR_MAX,
};
#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */
THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */
THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */
+ THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */
__THERMAL_GENL_EVENT_MAX,
};
#define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 05b31d60acf6..ef739054cb1c 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -32,7 +32,8 @@
UFFD_FEATURE_SIGBUS | \
UFFD_FEATURE_THREAD_ID | \
UFFD_FEATURE_MINOR_HUGETLBFS | \
- UFFD_FEATURE_MINOR_SHMEM)
+ UFFD_FEATURE_MINOR_SHMEM | \
+ UFFD_FEATURE_EXACT_ADDRESS)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -189,6 +190,10 @@ struct uffdio_api {
*
* UFFD_FEATURE_MINOR_SHMEM indicates the same support as
* UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+ *
+ * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+ * faults would be provided and the offset within the page would not be
+ * masked.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -201,6 +206,7 @@ struct uffdio_api {
#define UFFD_FEATURE_THREAD_ID (1<<8)
#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
__u64 features;
__u64 ioctls;
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 4e29d7851890..65e13a099b1a 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -511,6 +511,12 @@ struct xfrm_user_offload {
int ifindex;
__u8 flags;
};
+/* This flag was exposed without any kernel code that supporting it.
+ * Unfortunately, strongswan has the code that uses sets this flag,
+ * which makes impossible to reuse this bit.
+ *
+ * So leave it here to make sure that it won't be reused by mistake.
+ */
#define XFRM_OFFLOAD_IPV6 1
#define XFRM_OFFLOAD_INBOUND 2
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index ef0cafe295b2..2d3e5df39a59 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -56,8 +56,10 @@
* *
****************************************************************************/
+#define AES_IEC958_STATUS_SIZE 24
+
struct snd_aes_iec958 {
- unsigned char status[24]; /* AES/IEC958 channel status bits */
+ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
unsigned char subcode[147]; /* AES/IEC958 subcode bits */
unsigned char pad; /* nothing */
unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h
index 9ac5515b9bc2..7a7145395c09 100644
--- a/include/uapi/xen/gntdev.h
+++ b/include/uapi/xen/gntdev.h
@@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref {
/*
* Inserts the grant references into the mapping table of an instance
* of gntdev. N.B. This does not perform the mapping, which is deferred
- * until mmap() is called with @index as the offset.
+ * until mmap() is called with @index as the offset. @index should be
+ * considered opaque to userspace, with one exception: if no grant
+ * references have ever been inserted into the mapping table of this
+ * instance, @index will be set to 0. This is necessary to use gntdev
+ * with userspace APIs that expect a file descriptor that can be
+ * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this
+ * ioctl will fail.
*/
#define IOCTL_GNTDEV_MAP_GRANT_REF \
_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index cb854df031ce..c9fea9389ebe 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
* access has been ended, free the given page too. Access will be ended
* immediately iff the grant entry is not in use, otherwise it will happen
* some time later. page may be 0, in which case no freeing will occur.
+ * Note that the granted page might still be accessed (read or write) by the
+ * other side after gnttab_end_foreign_access() returns, so even if page was
+ * specified as 0 it is not allowed to just reuse the page for other
+ * purposes immediately. gnttab_end_foreign_access() will take an additional
+ * reference to the granted page in this case, which is dropped only after
+ * the grant is no longer in use.
+ * This requires that multi page allocations for areas subject to
+ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing
+ * via free_pages_exact()) in order to avoid high order pages.
*/
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page);
+/*
+ * End access through the given grant reference, iff the grant entry is
+ * no longer in use. In case of success ending foreign access, the
+ * grant reference is deallocated.
+ * Return 1 if the grant entry was freed, 0 if it is still in use.
+ */
+int gnttab_try_end_foreign_access(grant_ref_t ref);
+
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
-int gnttab_query_foreign_access(grant_ref_t ref);
-
/*
* operations on reserved batches of grant references
*/
diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h
index bbee8c6a349d..4dc45a51c044 100644
--- a/include/xen/xenbus_dev.h
+++ b/include/xen/xenbus_dev.h
@@ -1,6 +1,4 @@
/******************************************************************************
- * evtchn.h
- *
* Interface to /dev/xen/xenbus_backend.
*
* Copyright (c) 2011 Bastian Blank <waldi@debian.org>
diff --git a/init/Kconfig b/init/Kconfig
index e9119bf54b1f..beb5b866c318 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2054,6 +2054,7 @@ source "arch/Kconfig"
config RT_MUTEXES
bool
+ default y if PREEMPT_RT
config BASE_SMALL
int
diff --git a/init/Makefile b/init/Makefile
index 06326e304384..d82623d7fc8e 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -31,7 +31,8 @@ quiet_cmd_compile.h = CHK $@
cmd_compile.h = \
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
- "$(CONFIG_PREEMPT_RT)" "$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
+ "$(CONFIG_PREEMPT_DYNAMIC)" "$(CONFIG_PREEMPT_RT)" \
+ "$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
include/generated/compile.h: FORCE
$(call cmd,compile.h)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 762b534978d9..7058e14ad5f7 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -8,7 +8,6 @@
#include <linux/root_dev.h>
#include <linux/security.h>
#include <linux/delay.h>
-#include <linux/genhd.h>
#include <linux/mount.h>
#include <linux/device.h>
#include <linux/init.h>
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c0..560f45c27ffe 100644
--- a/init/main.c
+++ b/init/main.c
@@ -853,7 +853,7 @@ static void __init mm_init(void)
pti_init();
}
-#ifdef CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
randomize_kstack_offset);
DEFINE_PER_CPU(u32, kstack_offset);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 5becca9be867..7c08eb3c258d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -486,7 +486,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
{
struct mqueue_inode_info *ei;
- ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
diff --git a/ipc/sem.c b/ipc/sem.c
index 6693daf4fe11..0dbdb98fdf2d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
*/
un = lookup_undo(ulp, semid);
if (un) {
+ spin_unlock(&ulp->lock);
kvfree(new);
goto success;
}
@@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
ipc_assert_locked_object(&sma->sem_perm);
list_add(&new->list_id, &sma->list_id);
un = new;
-
-success:
spin_unlock(&ulp->lock);
+success:
sem_unlock(sma, -1);
out:
return un;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index ce77f0265660..8c6de5a9ecc4 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -96,8 +96,9 @@ config PREEMPTION
config PREEMPT_DYNAMIC
bool "Preemption behaviour defined on boot"
depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+ select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
select PREEMPT_BUILD
- default y
+ default y if HAVE_PREEMPT_DYNAMIC_CALL
help
This option allows to define the preemption model on the kernel
command line parameter and thus override the default preemption
@@ -132,4 +133,14 @@ config SCHED_CORE
which is the likely usage by Linux distributions, there should
be no measurable impact on performance.
-
+config ARCH_WANTS_RT_DELAYED_SIGNALS
+ bool
+ help
+ This option is selected by architectures where raising signals
+ can happen in atomic contexts on PREEMPT_RT enabled kernels. This
+ option delays raising the signal until the return to user space
+ loop where it is also delivered. X86 requires this to deliver
+ signals from trap handlers which run on IST stacks.
+
+config RT_DELAYED_SIGNALS
+ def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS
diff --git a/kernel/async.c b/kernel/async.c
index b8d7a663497f..b2c4ba5686ee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
atomic_inc(&entry_count);
spin_unlock_irqrestore(&async_lock, flags);
- /* mark that this task has queued an async job, used by module init */
- current->flags |= PF_USED_ASYNC;
-
/* schedule for execution */
queue_work_node(node, system_unbound_wq, &entry->work);
diff --git a/kernel/audit.c b/kernel/audit.c
index e4bbe2c70c26..7690c29d4ee4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb)
/**
* kauditd_rehold_skb - Handle a audit record send failure in the hold queue
* @skb: audit record
+ * @error: error code (unused)
*
* Description:
* This should only be used by the kauditd_thread when it fails to flush the
* hold queue.
*/
-static void kauditd_rehold_skb(struct sk_buff *skb)
+static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
{
- /* put the record back in the queue at the same place */
- skb_queue_head(&audit_hold_queue, skb);
+ /* put the record back in the queue */
+ skb_queue_tail(&audit_hold_queue, skb);
}
/**
* kauditd_hold_skb - Queue an audit record, waiting for auditd
* @skb: audit record
+ * @error: error code
*
* Description:
* Queue the audit record, waiting for an instance of auditd. When this
@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
* and queue it, if we have room. If we want to hold on to the record, but we
* don't have room, record a record lost message.
*/
-static void kauditd_hold_skb(struct sk_buff *skb)
+static void kauditd_hold_skb(struct sk_buff *skb, int error)
{
/* at this point it is uncertain if we will ever send this to auditd so
* try to send the message via printk before we go any further */
kauditd_printk_skb(skb);
/* can we just silently drop the message? */
- if (!audit_default) {
- kfree_skb(skb);
- return;
+ if (!audit_default)
+ goto drop;
+
+ /* the hold queue is only for when the daemon goes away completely,
+ * not -EAGAIN failures; if we are in a -EAGAIN state requeue the
+ * record on the retry queue unless it's full, in which case drop it
+ */
+ if (error == -EAGAIN) {
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_retry_queue, skb);
+ return;
+ }
+ audit_log_lost("kauditd retry queue overflow");
+ goto drop;
}
- /* if we have room, queue the message */
+ /* if we have room in the hold queue, queue the message */
if (!audit_backlog_limit ||
skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
skb_queue_tail(&audit_hold_queue, skb);
@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb)
/* we have no other options - drop the message */
audit_log_lost("kauditd hold queue overflow");
+drop:
kfree_skb(skb);
}
/**
* kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
* @skb: audit record
+ * @error: error code (unused)
*
* Description:
* Not as serious as kauditd_hold_skb() as we still have a connected auditd,
* but for some reason we are having problems sending it audit records so
* queue the given record and attempt to resend.
*/
-static void kauditd_retry_skb(struct sk_buff *skb)
+static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
{
- /* NOTE: because records should only live in the retry queue for a
- * short period of time, before either being sent or moved to the hold
- * queue, we don't currently enforce a limit on this queue */
- skb_queue_tail(&audit_retry_queue, skb);
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_retry_queue, skb);
+ return;
+ }
+
+ /* we have to drop the record, send it via printk as a last effort */
+ kauditd_printk_skb(skb);
+ audit_log_lost("kauditd retry queue overflow");
+ kfree_skb(skb);
}
/**
@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac)
/* flush the retry queue to the hold queue, but don't touch the main
* queue since we need to process that normally for multicast */
while ((skb = skb_dequeue(&audit_retry_queue)))
- kauditd_hold_skb(skb);
+ kauditd_hold_skb(skb, -ECONNREFUSED);
}
/**
@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
struct sk_buff_head *queue,
unsigned int retry_limit,
void (*skb_hook)(struct sk_buff *skb),
- void (*err_hook)(struct sk_buff *skb))
+ void (*err_hook)(struct sk_buff *skb, int error))
{
int rc = 0;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ struct sk_buff *skb_tail;
unsigned int failed = 0;
/* NOTE: kauditd_thread takes care of all our locking, we just use
* the netlink info passed to us (e.g. sk and portid) */
- while ((skb = skb_dequeue(queue))) {
+ skb_tail = skb_peek_tail(queue);
+ while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
/* call the skb_hook for each skb we touch */
if (skb_hook)
(*skb_hook)(skb);
@@ -731,7 +755,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
/* can we send to anyone via unicast? */
if (!sk) {
if (err_hook)
- (*err_hook)(skb);
+ (*err_hook)(skb, -ECONNREFUSED);
continue;
}
@@ -745,7 +769,7 @@ retry:
rc == -ECONNREFUSED || rc == -EPERM) {
sk = NULL;
if (err_hook)
- (*err_hook)(skb);
+ (*err_hook)(skb, rc);
if (rc == -EAGAIN)
rc = 0;
/* continue to drain the queue */
diff --git a/kernel/audit.h b/kernel/audit.h
index c4498090a5bd..58b66543b4d5 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -201,6 +201,10 @@ struct audit_context {
struct {
char *name;
} module;
+ struct {
+ struct audit_ntp_data ntp_data;
+ struct timespec64 tk_injoffset;
+ } time;
};
int fds[2];
struct audit_proctitle proctitle;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fce5d43a933f..ea2ee1181921 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -185,7 +185,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
case AUDITSC_EXECVE:
return mask & AUDIT_PERM_EXEC;
case AUDITSC_OPENAT2:
- return mask & ACC_MODE((u32)((struct open_how *)ctx->argv[2])->flags);
+ return mask & ACC_MODE((u32)ctx->openat2.flags);
default:
return 0;
}
@@ -1340,6 +1340,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
from_kuid(&init_user_ns, name->fcap.rootid));
}
+static void audit_log_time(struct audit_context *context, struct audit_buffer **ab)
+{
+ const struct audit_ntp_data *ntp = &context->time.ntp_data;
+ const struct timespec64 *tk = &context->time.tk_injoffset;
+ static const char * const ntp_name[] = {
+ "offset",
+ "freq",
+ "status",
+ "tai",
+ "tick",
+ "adjust",
+ };
+ int type;
+
+ if (context->type == AUDIT_TIME_ADJNTPVAL) {
+ for (type = 0; type < AUDIT_NTP_NVALS; type++) {
+ if (ntp->vals[type].newval != ntp->vals[type].oldval) {
+ if (!*ab) {
+ *ab = audit_log_start(context,
+ GFP_KERNEL,
+ AUDIT_TIME_ADJNTPVAL);
+ if (!*ab)
+ return;
+ }
+ audit_log_format(*ab, "op=%s old=%lli new=%lli",
+ ntp_name[type],
+ ntp->vals[type].oldval,
+ ntp->vals[type].newval);
+ audit_log_end(*ab);
+ *ab = NULL;
+ }
+ }
+ }
+ if (tk->tv_sec != 0 || tk->tv_nsec != 0) {
+ if (!*ab) {
+ *ab = audit_log_start(context, GFP_KERNEL,
+ AUDIT_TIME_INJOFFSET);
+ if (!*ab)
+ return;
+ }
+ audit_log_format(*ab, "sec=%lli nsec=%li",
+ (long long)tk->tv_sec, tk->tv_nsec);
+ audit_log_end(*ab);
+ *ab = NULL;
+ }
+}
+
static void show_special(struct audit_context *context, int *call_panic)
{
struct audit_buffer *ab;
@@ -1454,6 +1501,11 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_format(ab, "(null)");
break;
+ case AUDIT_TIME_ADJNTPVAL:
+ case AUDIT_TIME_INJOFFSET:
+ /* this call deviates from the rest, eating the buffer */
+ audit_log_time(context, &ab);
+ break;
}
audit_log_end(ab);
}
@@ -2849,31 +2901,26 @@ void __audit_fanotify(unsigned int response)
void __audit_tk_injoffset(struct timespec64 offset)
{
- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET,
- "sec=%lli nsec=%li",
- (long long)offset.tv_sec, offset.tv_nsec);
-}
-
-static void audit_log_ntp_val(const struct audit_ntp_data *ad,
- const char *op, enum audit_ntp_type type)
-{
- const struct audit_ntp_val *val = &ad->vals[type];
-
- if (val->newval == val->oldval)
- return;
+ struct audit_context *context = audit_context();
- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL,
- "op=%s old=%lli new=%lli", op, val->oldval, val->newval);
+ /* only set type if not already set by NTP */
+ if (!context->type)
+ context->type = AUDIT_TIME_INJOFFSET;
+ memcpy(&context->time.tk_injoffset, &offset, sizeof(offset));
}
void __audit_ntp_log(const struct audit_ntp_data *ad)
{
- audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET);
- audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ);
- audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS);
- audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI);
- audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK);
- audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST);
+ struct audit_context *context = audit_context();
+ int type;
+
+ for (type = 0; type < AUDIT_NTP_NVALS; type++)
+ if (ad->vals[type].newval != ad->vals[type].oldval) {
+ /* unconditionally set type, overwriting TK */
+ context->type = AUDIT_TIME_ADJNTPVAL;
+ memcpy(&context->time.ntp_data, ad, sizeof(*ad));
+ break;
+ }
}
void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 06062370c3b8..9e4ecc990647 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair)
BTF_ID(func, bpf_lsm_syslog)
BTF_ID(func, bpf_lsm_task_alloc)
-BTF_ID(func, bpf_lsm_task_getsecid_subj)
+BTF_ID(func, bpf_lsm_current_getsecid_subj)
BTF_ID(func, bpf_lsm_task_getsecid_obj)
BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e16dafeb2450..3e23b3fa79ff 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5688,7 +5688,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
}
if (check_ptr_off_reg(env, reg, regno))
return -EINVAL;
- } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
+ } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
+ (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
const struct btf_type *reg_ref_t;
const struct btf *reg_btf;
const char *reg_ref_tname;
@@ -5706,7 +5707,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_id = reg->btf_id;
} else {
reg_btf = btf_vmlinux;
- reg_ref_id = *reg2btf_ids[reg->type];
+ reg_ref_id = *reg2btf_ids[base_type(reg->type)];
}
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..55c084251fab 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*/
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/bpf-cgroup.h>
#include <linux/rcupdate.h>
#include <linux/random.h>
@@ -1075,6 +1076,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
void *key;
u32 idx;
+ BTF_TYPE_EMIT(struct bpf_timer);
callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
if (!callback_fn)
goto out;
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 638d7fd7b375..710ba9de12ce 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
}
rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
- VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
+ VM_MAP | VM_USERMAP, PAGE_KERNEL);
if (rb) {
kmemleak_not_leak(pages);
rb->pages = pages;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 49e567209c6b..22c8ae94e4c1 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
u32, size, u64, flags)
{
struct pt_regs *regs;
- long res;
+ long res = -EINVAL;
if (!try_get_task_stack(task))
return -EFAULT;
regs = task_pt_regs(task);
- res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
+ if (regs)
+ res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
put_task_stack(task);
return res;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fa4505f9b611..ca70fe6fba38 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1355,6 +1355,7 @@ int generic_map_delete_batch(struct bpf_map *map,
maybe_wait_bpf_programs(map);
if (err)
break;
+ cond_resched();
}
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
@@ -1412,6 +1413,7 @@ int generic_map_update_batch(struct bpf_map *map,
if (err)
break;
+ cond_resched();
}
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
@@ -1509,6 +1511,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
swap(prev_key, key);
retry = MAP_LOOKUP_RETRIES;
cp++;
+ cond_resched();
}
if (err == -EFAULT)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 4b6974a195c1..5e7edf913060 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -550,11 +550,12 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
static void notrace inc_misses_counter(struct bpf_prog *prog)
{
struct bpf_prog_stats *stats;
+ unsigned int flags;
stats = this_cpu_ptr(prog->stats);
- u64_stats_update_begin(&stats->syncp);
+ flags = u64_stats_update_begin_irqsave(&stats->syncp);
u64_stats_inc(&stats->misses);
- u64_stats_update_end(&stats->syncp);
+ u64_stats_update_end_irqrestore(&stats->syncp, flags);
}
/* The logic is similar to bpf_prog_run(), but with an explicit
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 41e0837a5a0b..afc6c0e9c966 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -546,9 +546,19 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct cgroup *cgrp;
+ struct cgroup_file_ctx *ctx;
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ /*
+ * Release agent gets called with all capabilities,
+ * require capabilities to set release agent.
+ */
+ ctx = of->priv;
+ if ((ctx->ns->user_ns != &init_user_ns) ||
+ !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
@@ -954,6 +964,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
/* Specifying two release agents is forbidden */
if (ctx->release_agent)
return invalfc(fc, "release_agent respecified");
+ /*
+ * Release agent gets called with all capabilities,
+ * require capabilities to set release agent.
+ */
+ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
+ return invalfc(fc, "Setting release_agent not allowed");
ctx->release_agent = param->string;
param->string = NULL;
break;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index b31e1465868a..a557eea7166f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);
+ /* Allow only one trigger per file descriptor */
+ if (ctx->psi.trigger) {
+ cgroup_put(cgrp);
+ return -EBUSY;
+ }
+
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
new = psi_trigger_create(psi, buf, nbytes, res);
if (IS_ERR(new)) {
@@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
return PTR_ERR(new);
}
- psi_trigger_replace(&ctx->psi.trigger, new);
-
+ smp_store_release(&ctx->psi.trigger, new);
cgroup_put(cgrp);
return nbytes;
@@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
- psi_trigger_replace(&ctx->psi.trigger, NULL);
+ psi_trigger_destroy(ctx->psi.trigger);
}
bool cgroup_psi_enabled(void)
@@ -6161,6 +6166,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
if (ret)
goto err;
+ /*
+ * Spawning a task directly into a cgroup works by passing a file
+ * descriptor to the target cgroup directory. This can even be an O_PATH
+ * file descriptor. But it can never be a cgroup.procs file descriptor.
+ * This was done on purpose so spawning into a cgroup could be
+ * conceptualized as an atomic
+ *
+ * fd = openat(dfd_cgroup, "cgroup.procs", ...);
+ * write(fd, <child-pid>, ...);
+ *
+ * sequence, i.e. it's a shorthand for the caller opening and writing
+ * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us
+ * to always use the caller's credentials.
+ */
ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
!(kargs->flags & CLONE_THREAD),
current->nsproxy->cgroup_ns);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index dc653ab26e50..ef88cc366bb8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -591,6 +591,35 @@ static inline void free_cpuset(struct cpuset *cs)
}
/*
+ * validate_change_legacy() - Validate conditions specific to legacy (v1)
+ * behavior.
+ */
+static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial)
+{
+ struct cgroup_subsys_state *css;
+ struct cpuset *c, *par;
+ int ret;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ /* Each of our child cpusets must be a subset of us */
+ ret = -EBUSY;
+ cpuset_for_each_child(c, css, cur)
+ if (!is_cpuset_subset(c, trial))
+ goto out;
+
+ /* On legacy hierarchy, we must be a subset of our parent cpuset. */
+ ret = -EACCES;
+ par = parent_cs(cur);
+ if (par && !is_cpuset_subset(trial, par))
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
* validate_change() - Used to validate that any proposed cpuset change
* follows the structural rules for cpusets.
*
@@ -614,20 +643,21 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
{
struct cgroup_subsys_state *css;
struct cpuset *c, *par;
- int ret;
-
- /* The checks don't apply to root cpuset */
- if (cur == &top_cpuset)
- return 0;
+ int ret = 0;
rcu_read_lock();
- par = parent_cs(cur);
- /* On legacy hierarchy, we must be a subset of our parent cpuset. */
- ret = -EACCES;
- if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
+ if (!is_in_v2_mode())
+ ret = validate_change_legacy(cur, trial);
+ if (ret)
+ goto out;
+
+ /* Remaining checks don't apply to root cpuset */
+ if (cur == &top_cpuset)
goto out;
+ par = parent_cs(cur);
+
/*
* If either I or some sibling (!= me) is exclusive, we can't
* overlap
@@ -803,7 +833,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
update_domain_attr_tree(dattr, &top_cpuset);
}
cpumask_and(doms[0], top_cpuset.effective_cpus,
- housekeeping_cpumask(HK_FLAG_DOMAIN));
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
goto done;
}
@@ -833,7 +863,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
if (!cpumask_empty(cp->cpus_allowed) &&
!(is_sched_load_balance(cp) &&
cpumask_intersects(cp->cpus_allowed,
- housekeeping_cpumask(HK_FLAG_DOMAIN))))
+ housekeeping_cpumask(HK_TYPE_DOMAIN))))
continue;
if (root_load_balance &&
@@ -922,7 +952,7 @@ restart:
if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
- cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
+ cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -1175,9 +1205,7 @@ enum subparts_cmd {
*
* Because of the implicit cpu exclusive nature of a partition root,
* cpumask changes that violates the cpu exclusivity rule will not be
- * permitted when checked by validate_change(). The validate_change()
- * function will also prevent any changes to the cpu list if it is not
- * a superset of children's cpu lists.
+ * permitted when checked by validate_change().
*/
static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
struct cpumask *newmask,
@@ -1522,10 +1550,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
struct cpuset *sibling;
struct cgroup_subsys_state *pos_css;
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
/*
* Check all its siblings and call update_cpumasks_hier()
* if their use_parent_ecpus flag is set in order for them
* to use the right effective_cpus value.
+ *
+ * The update_cpumasks_hier() function may sleep. So we have to
+ * release the RCU read lock before calling it.
*/
rcu_read_lock();
cpuset_for_each_child(sibling, pos_css, parent) {
@@ -1533,8 +1566,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
continue;
if (!sibling->use_parent_ecpus)
continue;
+ if (!css_tryget_online(&sibling->css))
+ continue;
+ rcu_read_unlock();
update_cpumasks_hier(sibling, tmp);
+ rcu_read_lock();
+ css_put(&sibling->css);
}
rcu_read_unlock();
}
@@ -1607,8 +1645,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* Make sure that subparts_cpus is a subset of cpus_allowed.
*/
if (cs->nr_subparts_cpus) {
- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
- cs->cpus_allowed);
+ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
}
spin_unlock_irq(&callback_lock);
@@ -2252,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
+ cpus_read_lock();
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@@ -2305,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
+ cpus_read_unlock();
}
/* The various types of files and directories in a cpuset file system */
@@ -3485,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
return cs;
}
-/**
- * cpuset_node_allowed - Can we allocate on a memory node?
+/*
+ * __cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node?
* @gfp_mask: memory allocation flags
*
@@ -3657,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void)
int cpuset_memory_pressure_enabled __read_mostly;
-/**
- * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
+/*
+ * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
*
* Keep a running average of the rate of synchronous (direct)
* page reclaim efforts initiated by tasks in each cpuset.
@@ -3673,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly;
* "memory_pressure". Value displayed is an integer
* representing the recent rate of entry into the synchronous
* (direct) page reclaim by any task attached to the cpuset.
- **/
+ */
void __cpuset_memory_pressure_bump(void)
{
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index e9ffb0cc1eec..e8db8d938661 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -17,6 +17,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
# Compile-time checks and compiler options
#
CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_FRAME_WARN=2048
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 407a2568f35e..5797c2a7a93f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,6 +34,7 @@
#include <linux/scs.h>
#include <linux/percpu-rwsem.h>
#include <linux/cpuset.h>
+#include <linux/random.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -1488,8 +1489,8 @@ int freeze_secondary_cpus(int primary)
cpu_maps_update_begin();
if (primary == -1) {
primary = cpumask_first(cpu_online_mask);
- if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
- primary = housekeeping_any_cpu(HK_FLAG_TIMER);
+ if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
+ primary = housekeeping_any_cpu(HK_TYPE_TIMER);
} else {
if (!cpu_online(primary))
primary = cpumask_first(cpu_online_mask);
@@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = perf_event_init_cpu,
.teardown.single = perf_event_exit_cpu,
},
+ [CPUHP_RANDOM_PREPARE] = {
+ .name = "random:prepare",
+ .startup.single = random_prepare_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_WORKQUEUE_PREP] = {
.name = "workqueue:prepare",
.startup.single = workqueue_prepare_cpu,
@@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = workqueue_online_cpu,
.teardown.single = workqueue_offline_cpu,
},
+ [CPUHP_AP_RANDOM_ONLINE] = {
+ .name = "random:online",
+ .startup.single = random_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_RCUTREE_ONLINE] = {
.name = "RCU/tree:online",
.startup.single = rcutree_online_cpu,
diff --git a/kernel/cred.c b/kernel/cred.c
index 473d17c431f3..933155c96922 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -665,21 +665,16 @@ EXPORT_SYMBOL(cred_fscmp);
int set_cred_ucounts(struct cred *new)
{
- struct task_struct *task = current;
- const struct cred *old = task->real_cred;
struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
- if (new->user == old->user && new->user_ns == old->user_ns)
- return 0;
-
/*
* This optimization is needed because alloc_ucounts() uses locks
* for table lookups.
*/
- if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
+ if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid))
return 0;
- if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
+ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid)))
return -EAGAIN;
new->ucounts = new_ucounts;
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 3d63d91cba5c..6ea80ae42622 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -399,8 +399,6 @@ static const struct reserved_mem_ops rmem_cma_ops = {
static int __init rmem_cma_setup(struct reserved_mem *rmem)
{
- phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
- phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
@@ -416,7 +414,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
- if ((rmem->base & mask) || (rmem->size & mask)) {
+ if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL;
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f1e7ea160b43..6db1c475ec82 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -627,9 +627,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
for (i = 0; i < nr_slots(alloc_size + offset); i++)
mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(mem->start, index) + offset;
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ /*
+ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+ * to the tlb buffer, if we knew for sure the device will
+ * overwirte the entire current content. But we don't. Thus
+ * unconditional bounce may prevent leaking swiotlb content (i.e.
+ * kernel memory) to user-space.
+ */
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
return tlb_addr;
}
@@ -696,10 +701,13 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir)
{
- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
- else
- BUG_ON(dir != DMA_FROM_DEVICE);
+ /*
+ * Unconditional bounce is necessary to avoid corruption on
+ * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite
+ * the whole lengt of the bounce buffer.
+ */
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ BUG_ON(!valid_dma_direction(dir));
}
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index bad713684c2e..ed10a95a6b1d 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -3,6 +3,7 @@
#include <linux/context_tracking.h>
#include <linux/entry-common.h>
#include <linux/highmem.h>
+#include <linux/jump_label.h>
#include <linux/livepatch.h>
#include <linux/audit.h>
#include <linux/tick.h>
@@ -148,6 +149,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
}
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline void raise_delayed_signal(void)
+{
+ if (unlikely(current->forced_info.si_signo)) {
+ force_sig_info(&current->forced_info);
+ current->forced_info.si_signo = 0;
+ }
+}
+#else
+static inline void raise_delayed_signal(void) { }
+#endif
+
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work)
{
@@ -162,6 +175,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
if (ti_work & _TIF_NEED_RESCHED)
schedule();
+ raise_delayed_signal();
+
if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -380,7 +395,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
return ret;
}
-void irqentry_exit_cond_resched(void)
+void raw_irqentry_exit_cond_resched(void)
{
if (!preempt_count()) {
/* Sanity check RCU and thread stack */
@@ -392,7 +407,17 @@ void irqentry_exit_cond_resched(void)
}
}
#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+void dynamic_irqentry_exit_cond_resched(void)
+{
+ if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+ return;
+ raw_irqentry_exit_cond_resched();
+}
+#endif
#endif
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
@@ -420,13 +445,9 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
}
instrumentation_begin();
- if (IS_ENABLED(CONFIG_PREEMPTION)) {
-#ifdef CONFIG_PREEMPT_DYNAMIC
- static_call(irqentry_exit_cond_resched)();
-#else
+ if (IS_ENABLED(CONFIG_PREEMPTION))
irqentry_exit_cond_resched();
-#endif
- }
+
/* Covers both tracing and lockdep */
trace_hardirqs_on();
instrumentation_end();
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc18664f49b0..e1461f99881f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
WRITE_ONCE(event->state, state);
}
+/*
+ * UP store-release, load-acquire
+ */
+
+#define __store_release(ptr, val) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(ptr), (val)); \
+} while (0)
+
+#define __load_acquire(ptr) \
+({ \
+ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
+ barrier(); \
+ ___p; \
+})
+
#ifdef CONFIG_CGROUP_PERF
static inline bool
@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
return t->time;
}
-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
- struct perf_cgroup_info *info;
- u64 now;
-
- now = perf_clock();
+ struct perf_cgroup_info *t;
- info = this_cpu_ptr(cgrp->info);
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ if (!__load_acquire(&t->active))
+ return t->time;
+ now += READ_ONCE(t->timeoffset);
+ return now;
+}
- info->time += now - info->timestamp;
+static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
+{
+ if (adv)
+ info->time += now - info->timestamp;
info->timestamp = now;
+ /*
+ * see update_context_time()
+ */
+ WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
{
struct perf_cgroup *cgrp = cpuctx->cgrp;
struct cgroup_subsys_state *css;
+ struct perf_cgroup_info *info;
if (cgrp) {
+ u64 now = perf_clock();
+
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
- __update_cgrp_time(cgrp);
+ info = this_cpu_ptr(cgrp->info);
+
+ __update_cgrp_time(info, now, true);
+ if (final)
+ __store_release(&info->active, 0);
}
}
}
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
+ struct perf_cgroup_info *info;
struct perf_cgroup *cgrp;
/*
@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
/*
* Do not update time when cgroup is not active
*/
- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
- __update_cgrp_time(event->cgrp);
+ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
+ info = this_cpu_ptr(event->cgrp->info);
+ __update_cgrp_time(info, perf_clock(), true);
+ }
}
static inline void
@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
for (css = &cgrp->css; css; css = css->parent) {
cgrp = container_of(css, struct perf_cgroup, css);
info = this_cpu_ptr(cgrp->info);
- info->timestamp = ctx->timestamp;
+ __update_cgrp_time(info, ctx->timestamp, false);
+ __store_release(&info->active, 1);
}
}
@@ -802,7 +839,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
*/
static void perf_cgroup_switch(struct task_struct *task, int mode)
{
- struct perf_cpu_context *cpuctx;
+ struct perf_cpu_context *cpuctx, *tmp;
struct list_head *list;
unsigned long flags;
@@ -813,7 +850,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_save(flags);
list = this_cpu_ptr(&cgrp_cpuctx_list);
- list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
+ list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
@@ -982,14 +1019,6 @@ out:
}
static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
-{
- struct perf_cgroup_info *t;
- t = per_cpu_ptr(event->cgrp->info, event->cpu);
- event->shadow_ctx_time = now - t->timestamp;
-}
-
-static inline void
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
{
}
-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
+ bool final)
{
}
@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{
}
-static inline void
-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
+ return 0;
}
-static inline u64 perf_cgroup_event_time(struct perf_event *event)
+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
return 0;
}
@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
/*
* Update the record of the current time in a context.
*/
-static void update_context_time(struct perf_event_context *ctx)
+static void __update_context_time(struct perf_event_context *ctx, bool adv)
{
u64 now = perf_clock();
- ctx->time += now - ctx->timestamp;
+ if (adv)
+ ctx->time += now - ctx->timestamp;
ctx->timestamp = now;
+
+ /*
+ * The above: time' = time + (now - timestamp), can be re-arranged
+ * into: time` = now + (time - timestamp), which gives a single value
+ * offset to compute future time without locks on.
+ *
+ * See perf_event_time_now(), which can be used from NMI context where
+ * it's (obviously) not possible to acquire ctx->lock in order to read
+ * both the above values in a consistent manner.
+ */
+ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
+}
+
+static void update_context_time(struct perf_event_context *ctx)
+{
+ __update_context_time(ctx, true);
}
static u64 perf_event_time(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
+ if (unlikely(!ctx))
+ return 0;
+
if (is_cgroup_event(event))
return perf_cgroup_event_time(event);
- return ctx ? ctx->time : 0;
+ return ctx->time;
+}
+
+static u64 perf_event_time_now(struct perf_event *event, u64 now)
+{
+ struct perf_event_context *ctx = event->ctx;
+
+ if (unlikely(!ctx))
+ return 0;
+
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time_now(event, now);
+
+ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
+ return ctx->time;
+
+ now += READ_ONCE(ctx->timeoffset);
+ return now;
}
static enum event_type_t get_event_type(struct perf_event *event)
@@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event,
if (ctx->is_active & EVENT_TIME) {
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, false);
}
event_sched_out(event, cpuctx, ctx);
@@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event,
list_del_event(event, ctx);
if (!ctx->nr_events && ctx->is_active) {
+ if (ctx == &cpuctx->ctx)
+ update_cgrp_time_from_cpuctx(cpuctx, true);
+
ctx->is_active = 0;
ctx->rotate_necessary = 0;
if (ctx->task) {
@@ -2392,7 +2462,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
* event_function_call() user.
*/
raw_spin_lock_irq(&ctx->lock);
- if (!ctx->is_active) {
+ /*
+ * Cgroup events are per-cpu events, and must IPI because of
+ * cgrp_cpuctx_list.
+ */
+ if (!ctx->is_active && !is_cgroup_event(event)) {
__perf_remove_from_context(event, __get_cpu_context(ctx),
ctx, (void *)flags);
raw_spin_unlock_irq(&ctx->lock);
@@ -2482,40 +2556,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
irq_work_queue(&event->pending);
}
-static void perf_set_shadow_time(struct perf_event *event,
- struct perf_event_context *ctx)
-{
- /*
- * use the correct time source for the time snapshot
- *
- * We could get by without this by leveraging the
- * fact that to get to this function, the caller
- * has most likely already called update_context_time()
- * and update_cgrp_time_xx() and thus both timestamp
- * are identical (or very close). Given that tstamp is,
- * already adjusted for cgroup, we could say that:
- * tstamp - ctx->timestamp
- * is equivalent to
- * tstamp - cgrp->timestamp.
- *
- * Then, in perf_output_read(), the calculation would
- * work with no changes because:
- * - event is guaranteed scheduled in
- * - no scheduled out in between
- * - thus the timestamp would be the same
- *
- * But this is a bit hairy.
- *
- * So instead, we have an explicit cgroup call to remain
- * within the time source all along. We believe it
- * is cleaner and simpler to understand.
- */
- if (is_cgroup_event(event))
- perf_cgroup_set_shadow_time(event, event->tstamp);
- else
- event->shadow_ctx_time = event->tstamp - ctx->timestamp;
-}
-
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_event *event, int enable);
@@ -2556,8 +2596,6 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
- perf_set_shadow_time(event, ctx);
-
perf_log_itrace_start(event);
if (event->pmu->add(event, PERF_EF_START)) {
@@ -2861,11 +2899,14 @@ perf_install_in_context(struct perf_event_context *ctx,
* perf_event_attr::disabled events will not run and can be initialized
* without IPI. Except when this is the first event for the context, in
* that case we need the magic of the IPI to set ctx->is_active.
+ * Similarly, cgroup events for the context also needs the IPI to
+ * manipulate the cgrp_cpuctx_list.
*
* The IOC_ENABLE that is sure to follow the creation of a disabled
* event will issue the IPI and reprogram the hardware.
*/
- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
+ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
+ ctx->nr_events && !is_cgroup_event(event)) {
raw_spin_lock_irq(&ctx->lock);
if (ctx->task == TASK_TOMBSTONE) {
raw_spin_unlock_irq(&ctx->lock);
@@ -3197,6 +3238,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
return err;
}
+/*
+ * Copy event-type-independent attributes that may be modified.
+ */
+static void perf_event_modify_copy_attr(struct perf_event_attr *to,
+ const struct perf_event_attr *from)
+{
+ to->sig_data = from->sig_data;
+}
+
static int perf_event_modify_attr(struct perf_event *event,
struct perf_event_attr *attr)
{
@@ -3219,10 +3269,17 @@ static int perf_event_modify_attr(struct perf_event *event,
WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->child_mutex);
+ /*
+ * Event-type-independent attributes must be copied before event-type
+ * modification, which will validate that final attributes match the
+ * source attributes after all relevant attributes have been copied.
+ */
+ perf_event_modify_copy_attr(&event->attr, attr);
err = func(event, attr);
if (err)
goto out;
list_for_each_entry(child, &event->child_list, child_list) {
+ perf_event_modify_copy_attr(&child->attr, attr);
err = func(child, attr);
if (err)
goto out;
@@ -3251,16 +3308,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
return;
}
- ctx->is_active &= ~event_type;
- if (!(ctx->is_active & EVENT_ALL))
- ctx->is_active = 0;
-
- if (ctx->task) {
- WARN_ON_ONCE(cpuctx->task_ctx != ctx);
- if (!ctx->is_active)
- cpuctx->task_ctx = NULL;
- }
-
/*
* Always update time if it was set; not only when it changes.
* Otherwise we can 'forget' to update time for any but the last
@@ -3274,7 +3321,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (is_active & EVENT_TIME) {
/* update (and stop) ctx time */
update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
+ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
+ ctx->is_active &= ~event_type;
+ if (!(ctx->is_active & EVENT_ALL))
+ ctx->is_active = 0;
+
+ if (ctx->task) {
+ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+ if (!ctx->is_active)
+ cpuctx->task_ctx = NULL;
}
is_active ^= ctx->is_active; /* changed bits */
@@ -3711,13 +3773,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
return 0;
}
+/*
+ * Because the userpage is strictly per-event (there is no concept of context,
+ * so there cannot be a context indirection), every userpage must be updated
+ * when context time starts :-(
+ *
+ * IOW, we must not miss EVENT_TIME edges.
+ */
static inline bool event_update_userpage(struct perf_event *event)
{
if (likely(!atomic_read(&event->mmap_count)))
return false;
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_update_userpage(event);
return true;
@@ -3801,13 +3869,23 @@ ctx_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
int is_active = ctx->is_active;
- u64 now;
lockdep_assert_held(&ctx->lock);
if (likely(!ctx->nr_events))
return;
+ if (is_active ^ EVENT_TIME) {
+ /* start ctx time */
+ __update_context_time(ctx, false);
+ perf_cgroup_set_timestamp(task, ctx);
+ /*
+ * CPU-release for the below ->is_active store,
+ * see __load_acquire() in perf_event_time_now()
+ */
+ barrier();
+ }
+
ctx->is_active |= (event_type | EVENT_TIME);
if (ctx->task) {
if (!is_active)
@@ -3818,13 +3896,6 @@ ctx_sched_in(struct perf_event_context *ctx,
is_active ^= ctx->is_active; /* changed bits */
- if (is_active & EVENT_TIME) {
- /* start ctx time */
- now = perf_clock();
- ctx->timestamp = now;
- perf_cgroup_set_timestamp(task, ctx);
- }
-
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -4418,6 +4489,18 @@ static inline u64 perf_event_count(struct perf_event *event)
return local64_read(&event->count) + atomic64_read(&event->child_count);
}
+static void calc_timer_values(struct perf_event *event,
+ u64 *now,
+ u64 *enabled,
+ u64 *running)
+{
+ u64 ctx_time;
+
+ *now = perf_clock();
+ ctx_time = perf_event_time_now(event, *now);
+ __perf_update_times(event, ctx_time, enabled, running);
+}
+
/*
* NMI-safe method to read a local event, that is an event that
* is:
@@ -4477,10 +4560,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
*value = local64_read(&event->count);
if (enabled || running) {
- u64 now = event->shadow_ctx_time + perf_clock();
- u64 __enabled, __running;
+ u64 __enabled, __running, __now;;
- __perf_update_times(event, now, &__enabled, &__running);
+ calc_timer_values(event, &__now, &__enabled, &__running);
if (enabled)
*enabled = __enabled;
if (running)
@@ -5802,18 +5884,6 @@ static int perf_event_index(struct perf_event *event)
return event->pmu->event_idx(event);
}
-static void calc_timer_values(struct perf_event *event,
- u64 *now,
- u64 *enabled,
- u64 *running)
-{
- u64 ctx_time;
-
- *now = perf_clock();
- ctx_time = event->shadow_ctx_time + *now;
- __perf_update_times(event, ctx_time, enabled, running);
-}
-
static void perf_event_init_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
@@ -5938,6 +6008,8 @@ static void ring_buffer_attach(struct perf_event *event,
struct perf_buffer *old_rb = NULL;
unsigned long flags;
+ WARN_ON_ONCE(event->parent);
+
if (event->rb) {
/*
* Should be impossible, we set this when removing
@@ -5995,6 +6067,9 @@ static void ring_buffer_wakeup(struct perf_event *event)
{
struct perf_buffer *rb;
+ if (event->parent)
+ event = event->parent;
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
@@ -6008,6 +6083,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event)
{
struct perf_buffer *rb;
+ if (event->parent)
+ event = event->parent;
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
@@ -6353,7 +6431,6 @@ accounting:
ring_buffer_attach(event, rb);
perf_event_update_time(event);
- perf_set_shadow_time(event, event->ctx);
perf_event_init_userpage(event);
perf_event_update_userpage(event);
} else {
@@ -6717,7 +6794,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event,
if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
goto out;
- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
+ rb = ring_buffer_get(sampler);
if (!rb)
goto out;
@@ -6783,7 +6860,7 @@ static void perf_aux_sample_output(struct perf_event *event,
if (WARN_ON_ONCE(!sampler || !data->aux_size))
return;
- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
+ rb = ring_buffer_get(sampler);
if (!rb)
return;
@@ -10454,8 +10531,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
if (state == IF_STATE_END) {
ret = -EINVAL;
- if (kernel && event->attr.exclude_kernel)
- goto fail;
/*
* ACTION "filter" must have a non-zero length region
@@ -10497,8 +10572,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
}
/* ready to consume more filters */
+ kfree(filename);
+ filename = NULL;
state = IF_STATE_ACTION;
filter = NULL;
+ kernel = 0;
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6357c3580d07..6418083901d4 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -155,11 +155,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = compound_head(old_page),
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, page_folio(old_page), vma, addr, 0);
int err;
struct mmu_notifier_range range;
@@ -173,7 +169,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
return err;
}
- /* For try_to_free_swap() and munlock_vma_page() below */
+ /* For try_to_free_swap() below */
lock_page(old_page);
mmu_notifier_invalidate_range_start(&range);
@@ -201,13 +197,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
set_pte_at_notify(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
- page_remove_rmap(old_page, false);
+ page_remove_rmap(old_page, vma, false);
if (!page_mapped(old_page))
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
-
- if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
- munlock_vma_page(old_page);
put_page(old_page);
err = 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..192b90a9ce16 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -735,7 +735,7 @@ void __noreturn do_exit(long code)
struct task_struct *tsk = current;
int group_dead;
- WARN_ON(blk_needs_flush_plug(tsk));
+ WARN_ON(tsk->plug);
/*
* If do_dead is called because this processes oopsed, it's possible
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index d75a528f7b21..d74c30b6733b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,6 +97,7 @@
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#include <linux/sched/mm.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -185,7 +186,7 @@ static inline void free_task_struct(struct task_struct *tsk)
*/
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
-#ifdef CONFIG_VMAP_STACK
+# ifdef CONFIG_VMAP_STACK
/*
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
* flush. Try to minimize the number of calls by caching stacks.
@@ -193,6 +194,41 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -210,11 +246,35 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-#endif
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
-#ifdef CONFIG_VMAP_STACK
+ int i;
+ int ret;
+
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+ if (ret)
+ goto err;
+ }
+ return 0;
+err:
+ /*
+ * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+ * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+ * ignore this page.
+ */
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ return ret;
+}
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ struct vm_struct *vm;
void *stack;
int i;
@@ -232,9 +292,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
+ if (memcg_charge_kernel_stack(s)) {
+ vfree(s->addr);
+ return -ENOMEM;
+ }
+
tsk->stack_vm_area = s;
tsk->stack = s->addr;
- return s->addr;
+ return 0;
}
/*
@@ -247,71 +312,95 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
+ if (!stack)
+ return -ENOMEM;
+ vm = find_vm_area(stack);
+ if (memcg_charge_kernel_stack(vm)) {
+ vfree(stack);
+ return -ENOMEM;
+ }
/*
* We can't call find_vm_area() in interrupt context, and
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- if (stack) {
- tsk->stack_vm_area = find_vm_area(stack);
- tsk->stack = stack;
- }
- return stack;
-#else
+ tsk->stack_vm_area = vm;
+ tsk->stack = stack;
+ return 0;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+ if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+ thread_stack_delayed_free(tsk);
+
+ tsk->stack = NULL;
+ tsk->stack_vm_area = NULL;
+}
+
+# else /* !CONFIG_VMAP_STACK */
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
if (likely(page)) {
tsk->stack = kasan_reset_tag(page_address(page));
- return tsk->stack;
+ return 0;
}
- return NULL;
-#endif
+ return -ENOMEM;
}
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk)
{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
- int i;
+ thread_stack_delayed_free(tsk);
+ tsk->stack = NULL;
+}
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
+# endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i],
- NULL, tsk->stack_vm_area) != NULL)
- continue;
+static struct kmem_cache *thread_stack_cache;
- return;
- }
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
- vfree_atomic(tsk->stack);
- return;
- }
-#endif
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ call_rcu(rh, thread_stack_free_rcu);
}
-# else
-static struct kmem_cache *thread_stack_cache;
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
- int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
stack = kasan_reset_tag(stack);
tsk->stack = stack;
- return stack;
+ return stack ? 0 : -ENOMEM;
}
static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, tsk->stack);
+ thread_stack_delayed_free(tsk);
+ tsk->stack = NULL;
}
void thread_stack_cache_init(void)
@@ -321,8 +410,26 @@ void thread_stack_cache_init(void)
THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+ unsigned long *stack;
+
+ stack = arch_alloc_thread_stack_node(tsk, node);
+ tsk->stack = stack;
+ return stack ? 0 : -ENOMEM;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+ arch_free_thread_stack(tsk);
+ tsk->stack = NULL;
+}
+
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
@@ -366,63 +473,47 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
*new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL;
- dup_vma_anon_name(orig, new);
+ dup_anon_vma_name(orig, new);
}
return new;
}
void vm_area_free(struct vm_area_struct *vma)
{
- free_vma_anon_name(vma);
+ free_anon_vma_name(vma);
kmem_cache_free(vm_area_cachep, vma);
}
static void account_kernel_stack(struct task_struct *tsk, int account)
{
- void *stack = task_stack_page(tsk);
- struct vm_struct *vm = task_stack_vm_area(tsk);
-
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
account * (PAGE_SIZE / 1024));
} else {
+ void *stack = task_stack_page(tsk);
+
/* All stack pages are in the same node. */
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
}
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+void exit_task_stack_account(struct task_struct *tsk)
{
-#ifdef CONFIG_VMAP_STACK
- struct vm_struct *vm = task_stack_vm_area(tsk);
- int ret;
-
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ account_kernel_stack(tsk, -1);
- if (vm) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm;
int i;
- BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- /*
- * If memcg_kmem_charge_page() fails, page's
- * memory cgroup pointer is NULL, and
- * memcg_kmem_uncharge_page() in free_thread_stack()
- * will ignore this page.
- */
- ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
- 0);
- if (ret)
- return ret;
- }
+ vm = task_stack_vm_area(tsk);
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
}
-#endif
- return 0;
}
static void release_task_stack(struct task_struct *tsk)
@@ -430,12 +521,7 @@ static void release_task_stack(struct task_struct *tsk)
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
- tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = NULL;
-#endif
}
#ifdef CONFIG_THREAD_INFO_IN_TASK
@@ -874,8 +960,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
- unsigned long *stack;
- struct vm_struct *stack_vm_area __maybe_unused;
int err;
if (node == NUMA_NO_NODE)
@@ -884,32 +968,18 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!tsk)
return NULL;
- stack = alloc_thread_stack_node(tsk, node);
- if (!stack)
+ err = arch_dup_task_struct(tsk, orig);
+ if (err)
goto free_tsk;
- if (memcg_charge_kernel_stack(tsk))
- goto free_stack;
-
- stack_vm_area = task_stack_vm_area(tsk);
-
- err = arch_dup_task_struct(tsk, orig);
+ err = alloc_thread_stack_node(tsk, node);
+ if (err)
+ goto free_tsk;
- /*
- * arch_dup_task_struct() clobbers the stack-related fields. Make
- * sure they're properly initialized before using any stack-related
- * functions again.
- */
- tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
- tsk->stack_vm_area = stack_vm_area;
-#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
-
- if (err)
- goto free_stack;
+ account_kernel_stack(tsk, 1);
err = scs_prepare(tsk, node);
if (err)
@@ -953,8 +1023,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->wake_q.next = NULL;
tsk->worker_private = NULL;
- account_kernel_stack(tsk, 1);
-
kcov_task_init(tsk);
kmap_local_fork(tsk);
@@ -967,12 +1035,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->use_memdelay = 0;
#endif
+#ifdef CONFIG_IOMMU_SVA
+ tsk->pasid_activated = 0;
+#endif
+
#ifdef CONFIG_MEMCG
tsk->active_memcg = NULL;
#endif
return tsk;
free_stack:
+ exit_task_stack_account(tsk);
free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
@@ -1019,13 +1092,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
-static void mm_init_pasid(struct mm_struct *mm)
-{
-#ifdef CONFIG_IOMMU_SUPPORT
- mm->pasid = INIT_PASID;
-#endif
-}
-
static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
@@ -1054,7 +1120,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
- mm_init_pasid(mm);
+ mm_pasid_init(mm);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
@@ -1121,6 +1187,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ mm_pasid_drop(mm);
mmdrop(mm);
}
@@ -2021,18 +2088,18 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
+ retval = copy_creds(p, clone_flags);
+ if (retval < 0)
+ goto bad_fork_free;
+
retval = -EAGAIN;
if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
if (p->real_cred->user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
- goto bad_fork_free;
+ goto bad_fork_cleanup_count;
}
current->flags &= ~PF_NPROC_EXCEEDED;
- retval = copy_creds(p, clone_flags);
- if (retval < 0)
- goto bad_fork_free;
-
/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
@@ -2267,6 +2334,17 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_put_pidfd;
/*
+ * Now that the cgroups are pinned, re-clone the parent cgroup and put
+ * the new task on the correct runqueue. All this *before* the task
+ * becomes visible.
+ *
+ * This isn't part of ->can_fork() because while the re-cloning is
+ * cgroup specific, it unconditionally needs to place the task on a
+ * runqueue.
+ */
+ sched_cgroup_fork(p, args);
+
+ /*
* From this point on we must avoid any synchronous user-space
* communication until we take the tasklist-lock. In particular, we do
* not want user-space to be able to predict the process start-time by
@@ -2323,10 +2401,6 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
}
- /* past the last point of failure */
- if (pidfile)
- fd_install(pidfd, pidfile);
-
init_task_pid_links(p);
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -2375,8 +2449,11 @@ static __latent_entropy struct task_struct *copy_process(
syscall_tracepoint_update(p);
write_unlock_irq(&tasklist_lock);
+ if (pidfile)
+ fd_install(pidfd, pidfile);
+
proc_fork_connector(p);
- sched_post_fork(p, args);
+ sched_post_fork(p);
cgroup_post_fork(p, args);
perf_event_fork(p);
@@ -2441,6 +2518,7 @@ bad_fork_cleanup_count:
exit_creds(p);
bad_fork_free:
WRITE_ONCE(p->__state, TASK_DEAD);
+ exit_task_stack_account(p);
put_task_stack(p);
delayed_free_task(p);
fork_out:
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 51dd822a8060..b22ef1efe751 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -302,7 +302,7 @@ again:
* found it, but truncated or holepunched or subjected to
* invalidate_complete_page2 before we got the page lock (also
* cases which we are happy to fail). And we hold a reference,
- * so refcount care in invalidate_complete_page's remove_mapping
+ * so refcount care in invalidate_inode_page's remove_mapping
* prevents drop_caches from setting mapping to NULL beneath us.
*
* The case we do have to guard against is when memory pressure made
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c09324663088..54af0deb239b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -38,7 +38,7 @@ struct irqaction chained_action = {
* @irq: irq number
* @chip: pointer to irq chip description structure
*/
-int irq_set_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
{
unsigned long flags;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
@@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
if (!desc)
return -EINVAL;
- if (!chip)
- chip = &no_irq_chip;
-
- desc->irq_data.chip = chip;
+ desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
irq_put_desc_unlock(desc, flags);
/*
* For !CONFIG_SPARSE_IRQ make the irq show up in
@@ -1073,7 +1070,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
irq_flow_handler_t handle, const char *name)
{
irq_set_chip(irq, chip);
@@ -1558,6 +1555,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return 0;
}
+static struct device *irq_get_parent_device(struct irq_data *data)
+{
+ if (data->domain)
+ return data->domain->dev;
+
+ return NULL;
+}
+
/**
* irq_chip_pm_get - Enable power for an IRQ chip
* @data: Pointer to interrupt specific data
@@ -1567,12 +1572,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
*/
int irq_chip_pm_get(struct irq_data *data)
{
+ struct device *dev = irq_get_parent_device(data);
int retval;
- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
- retval = pm_runtime_get_sync(data->chip->parent_device);
+ if (IS_ENABLED(CONFIG_PM) && dev) {
+ retval = pm_runtime_get_sync(dev);
if (retval < 0) {
- pm_runtime_put_noidle(data->chip->parent_device);
+ pm_runtime_put_noidle(dev);
return retval;
}
}
@@ -1590,10 +1596,11 @@ int irq_chip_pm_get(struct irq_data *data)
*/
int irq_chip_pm_put(struct irq_data *data)
{
+ struct device *dev = irq_get_parent_device(data);
int retval = 0;
- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
- retval = pm_runtime_put(data->chip->parent_device);
+ if (IS_ENABLED(CONFIG_PM) && dev)
+ retval = pm_runtime_put(dev);
return (retval < 0) ? retval : 0;
}
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 39a41c56ad4f..1ed2b1739363 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -176,10 +176,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu)
{
const struct cpumask *hk_mask;
- if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
+ if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
return false;
- hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+ hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
return false;
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index e4cff358b437..2b43f5f5033d 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -69,8 +69,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
seq_printf(m, "chip: None\n");
return;
}
- seq_printf(m, "%*schip: %s\n", ind, "", chip->name);
- seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags);
+ seq_printf(m, "%*schip: ", ind, "");
+ if (chip->irq_print_chip)
+ chip->irq_print_chip(data, m);
+ else
+ seq_printf(m, "%s", chip->name);
+ seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags);
irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
ARRAY_SIZE(irqchip_flags));
}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2267e6527db3..939d21cd55c3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -640,7 +640,7 @@ int handle_irq_desc(struct irq_desc *desc)
return -EINVAL;
data = irq_desc_get_irq_data(desc);
- if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data)))
+ if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data)))
return -EPERM;
generic_handle_irq_desc(desc);
@@ -662,6 +662,29 @@ int generic_handle_irq(unsigned int irq)
}
EXPORT_SYMBOL_GPL(generic_handle_irq);
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ * context.
+ * @irq: The irq number to handle
+ *
+ * Returns: 0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = handle_irq_desc(irq_to_desc(irq));
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
+
#ifdef CONFIG_IRQ_DOMAIN
/**
* generic_handle_domain_irq - Invoke the handler for a HW irq belonging
@@ -676,7 +699,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
{
- WARN_ON_ONCE(!in_irq());
+ WARN_ON_ONCE(!in_hardirq());
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index bf38c546aa25..d5ce96510549 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1319,7 +1319,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
* @chip_data: The associated chip data
*/
int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq,
+ const struct irq_chip *chip,
void *chip_data)
{
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
@@ -1328,7 +1329,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
return -ENOENT;
irq_data->hwirq = hwirq;
- irq_data->chip = chip ? chip : &no_irq_chip;
+ irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
irq_data->chip_data = chip_data;
return 0;
@@ -1347,7 +1348,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
* @handler_name: The interrupt handler name
*/
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq, const struct irq_chip *chip,
void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name)
{
@@ -1853,7 +1854,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
* @handler_name: The interrupt handler name
*/
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq, const struct irq_chip *chip,
void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name)
{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f23ffd30385b..c03f71d5ec10 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -247,13 +247,13 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
* online.
*/
if (irqd_affinity_is_managed(data) &&
- housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
+ housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
const struct cpumask *hk_mask, *prog_mask;
static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
static struct cpumask tmp_mask;
- hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+ hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
raw_spin_lock(&tmp_mask_lock);
cpumask_and(&tmp_mask, mask, hk_mask);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 38c6dd822da8..d100d5a15b38 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -356,7 +356,7 @@ static int kthread(void *_create)
* back to default in case they have been changed.
*/
sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
- set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
+ set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -722,7 +722,7 @@ int kthreadd(void *unused)
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
- set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_FLAG_KTHREAD));
+ set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4a882f83aeb9..c06cab6546ed 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
unsigned long nr_lock_classes;
unsigned long nr_zapped_classes;
-#ifndef CONFIG_DEBUG_LOCKDEP
-static
-#endif
+unsigned long max_lock_class_idx;
struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
-static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
+DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
static inline struct lock_class *hlock_class(struct held_lock *hlock)
{
@@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock)
* elements. These elements are linked together by the lock_entry member in
* struct lock_class.
*/
-LIST_HEAD(all_lock_classes);
+static LIST_HEAD(all_lock_classes);
static LIST_HEAD(free_lock_classes);
/**
@@ -1252,6 +1250,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
struct lockdep_subclass_key *key;
struct hlist_head *hash_head;
struct lock_class *class;
+ int idx;
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -1317,6 +1316,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
* of classes.
*/
list_move_tail(&class->lock_entry, &all_lock_classes);
+ idx = class - lock_classes;
+ if (idx > max_lock_class_idx)
+ max_lock_class_idx = idx;
if (verbose(class)) {
graph_unlock();
@@ -3462,7 +3464,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
u16 chain_hlock = chain_hlocks[chain->base + i];
unsigned int class_idx = chain_hlock_class_idx(chain_hlock);
- return lock_classes + class_idx - 1;
+ return lock_classes + class_idx;
}
/*
@@ -3530,7 +3532,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
hlock_id = chain_hlocks[chain->base + i];
chain_key = print_chain_key_iteration(hlock_id, chain_key);
- print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1);
+ print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id));
printk("\n");
}
}
@@ -6000,6 +6002,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
WRITE_ONCE(class->name, NULL);
nr_lock_classes--;
__clear_bit(class - lock_classes, lock_classes_in_use);
+ if (class - lock_classes == max_lock_class_idx)
+ max_lock_class_idx--;
} else {
WARN_ONCE(true, "%s() failed for class %s\n", __func__,
class->name);
@@ -6011,13 +6015,10 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
static void reinit_class(struct lock_class *class)
{
- void *const p = class;
- const unsigned int offset = offsetof(struct lock_class, key);
-
WARN_ON_ONCE(!class->lock_entry.next);
WARN_ON_ONCE(!list_empty(&class->locks_after));
WARN_ON_ONCE(!list_empty(&class->locks_before));
- memset(p + offset, 0, sizeof(*class) - offset);
+ memset_startat(class, 0, key);
WARN_ON_ONCE(!class->lock_entry.next);
WARN_ON_ONCE(!list_empty(&class->locks_after));
WARN_ON_ONCE(!list_empty(&class->locks_before));
@@ -6290,7 +6291,13 @@ void lockdep_reset_lock(struct lockdep_map *lock)
lockdep_reset_lock_reg(lock);
}
-/* Unregister a dynamically allocated key. */
+/*
+ * Unregister a dynamically allocated key.
+ *
+ * Unlike lockdep_register_key(), a search is always done to find a matching
+ * key irrespective of debug_locks to avoid potential invalid access to freed
+ * memory in lock_class entry.
+ */
void lockdep_unregister_key(struct lock_class_key *key)
{
struct hlist_head *hash_head = keyhashentry(key);
@@ -6305,10 +6312,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
return;
raw_local_irq_save(flags);
- if (!graph_lock())
- goto out_irq;
+ lockdep_lock();
- pf = get_pending_free();
hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
if (k == key) {
hlist_del_rcu(&k->hash_entry);
@@ -6316,11 +6321,13 @@ void lockdep_unregister_key(struct lock_class_key *key)
break;
}
}
- WARN_ON_ONCE(!found);
- __lockdep_free_key_range(pf, key, 1);
- call_rcu_zapped(pf);
- graph_unlock();
-out_irq:
+ WARN_ON_ONCE(!found && debug_locks);
+ if (found) {
+ pf = get_pending_free();
+ __lockdep_free_key_range(pf, key, 1);
+ call_rcu_zapped(pf);
+ }
+ lockdep_unlock();
raw_local_irq_restore(flags);
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index ecb8662e7a4e..bbe9000260d0 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-extern struct list_head all_lock_classes;
extern struct lock_chain lock_chains[];
#define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1)
@@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks;
extern unsigned int max_lockdep_depth;
extern unsigned int max_bfs_queue_depth;
+extern unsigned long max_lock_class_idx;
+
+extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+extern unsigned long lock_classes_in_use[];
#ifdef CONFIG_PROVE_LOCKING
extern unsigned long lockdep_count_forward_deps(struct lock_class *);
@@ -205,7 +208,6 @@ struct lockdep_stats {
};
DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
-extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
#define __debug_atomic_inc(ptr) \
this_cpu_inc(lockdep_stats.ptr);
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index b8d9a050c337..15fdc7fa5c68 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -24,14 +24,33 @@
#include "lockdep_internals.h"
+/*
+ * Since iteration of lock_classes is done without holding the lockdep lock,
+ * it is not safe to iterate all_lock_classes list directly as the iteration
+ * may branch off to free_lock_classes or the zapped list. Iteration is done
+ * directly on the lock_classes array by checking the lock_classes_in_use
+ * bitmap and max_lock_class_idx.
+ */
+#define iterate_lock_classes(idx, class) \
+ for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \
+ idx++, class++)
+
static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
- return seq_list_next(v, &all_lock_classes, pos);
+ struct lock_class *class = v;
+
+ ++class;
+ *pos = class - lock_classes;
+ return (*pos > max_lock_class_idx) ? NULL : class;
}
static void *l_start(struct seq_file *m, loff_t *pos)
{
- return seq_list_start_head(&all_lock_classes, *pos);
+ unsigned long idx = *pos;
+
+ if (idx > max_lock_class_idx)
+ return NULL;
+ return lock_classes + idx;
}
static void l_stop(struct seq_file *m, void *v)
@@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class)
static int l_show(struct seq_file *m, void *v)
{
- struct lock_class *class = list_entry(v, struct lock_class, lock_entry);
+ struct lock_class *class = v;
struct lock_list *entry;
char usage[LOCK_USAGE_CHARS];
+ int idx = class - lock_classes;
- if (v == &all_lock_classes) {
+ if (v == lock_classes)
seq_printf(m, "all lock classes:\n");
+
+ if (!test_bit(idx, lock_classes_in_use))
return 0;
- }
seq_printf(m, "%p", class->key);
#ifdef CONFIG_DEBUG_LOCKDEP
@@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#ifdef CONFIG_PROVE_LOCKING
struct lock_class *class;
+ unsigned long idx;
- list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ iterate_lock_classes(idx, class) {
+ if (!test_bit(idx, lock_classes_in_use))
+ continue;
if (class->usage_mask == 0)
nr_unused++;
@@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
sum_forward_deps += lockdep_count_forward_deps(class);
}
+
#ifdef CONFIG_DEBUG_LOCKDEP
DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
#endif
@@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
seq_printf(m, " max bfs queue depth: %11u\n",
max_bfs_queue_depth);
#endif
+ seq_printf(m, " max lock class index: %11lu\n",
+ max_lock_class_idx);
lockdep_stats_debug_show(m);
seq_printf(m, " debug_locks: %11u\n",
debug_locks);
@@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file)
if (!res) {
struct lock_stat_data *iter = data->stats;
struct seq_file *m = file->private_data;
+ unsigned long idx;
- list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ iterate_lock_classes(idx, class) {
+ if (!test_bit(idx, lock_classes_in_use))
+ continue;
iter->class = class;
iter->stats = lock_stats(class);
iter++;
}
+
data->iter_end = iter;
sort(data->stats, data->iter_end - data->stats,
@@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct lock_class *class;
+ unsigned long idx;
char c;
if (count) {
@@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf,
if (c != '0')
return count;
- list_for_each_entry(class, &all_lock_classes, lock_entry)
+ iterate_lock_classes(idx, class) {
+ if (!test_bit(idx, lock_classes_in_use))
+ continue;
clear_lock_stats(class);
+ }
}
return count;
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 70a32a576f3f..c9fdae94e098 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -7,6 +7,7 @@
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
+#include <linux/sched/debug.h>
#include <linux/errno.h>
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
@@ -162,7 +163,7 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
__set_current_state(TASK_RUNNING);
}
-bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
{
if (__percpu_down_read_trylock(sem))
return true;
@@ -211,7 +212,7 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
return true;
}
-void percpu_down_write(struct percpu_rw_semaphore *sem)
+void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 69aba4abe104..acde5d6f1254 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1048,7 +1048,7 @@ out_nolock:
/*
* Wait until we successfully acquire the write lock
*/
-static struct rw_semaphore *
+static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{
long count;
diff --git a/kernel/module.c b/kernel/module.c
index 24dab046e16c..6cea788fd965 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -335,7 +335,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
/*
* A thread that wants to hold a reference to a module only while it
- * is running can call this to safely exit. nfsd and lockd use this.
+ * is running can call this to safely exit.
*/
void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
{
@@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod)
}
freeinit->module_init = mod->init_layout.base;
- /*
- * We want to find out whether @mod uses async during init. Clear
- * PF_USED_ASYNC. async_schedule*() will set it.
- */
- current->flags &= ~PF_USED_ASYNC;
-
do_mod_ctors(mod);
/* Start the module */
if (mod->init != NULL)
@@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod)
/*
* We need to finish all async code before the module init sequence
- * is done. This has potential to deadlock. For example, a newly
- * detected block device can trigger request_module() of the
- * default iosched from async probing task. Once userland helper
- * reaches here, async_synchronize_full() will wait on the async
- * task waiting on request_module() and deadlock.
- *
- * This deadlock is avoided by perfomring async_synchronize_full()
- * iff module init queued any async jobs. This isn't a full
- * solution as it will deadlock the same if module loading from
- * async jobs nests more than once; however, due to the various
- * constraints, this hack seems to be the best option for now.
- * Please refer to the following thread for details.
+ * is done. This has potential to deadlock if synchronous module
+ * loading is requested from async (which is not allowed!).
*
- * http://thread.gmane.org/gmane.linux.kernel/1420814
+ * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
+ * request_module() from async workers") for more details.
*/
- if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
+ if (!mod->async_probe_requested)
async_synchronize_full();
ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c
index b01c69c2ff99..ffef98a20320 100644
--- a/kernel/module_decompress.c
+++ b/kernel/module_decompress.c
@@ -250,6 +250,7 @@ void module_decompress_cleanup(struct load_info *info)
info->max_pages = info->used_pages = 0;
}
+#ifdef CONFIG_SYSFS
static ssize_t compression_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -269,3 +270,4 @@ static int __init module_decompress_sysfs_init(void)
return 0;
}
late_initcall(module_decompress_sysfs_init);
+#endif
diff --git a/kernel/padata.c b/kernel/padata.c
index 18d3a5c699d8..e5819bb8bd1d 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -181,7 +181,7 @@ int padata_do_parallel(struct padata_shell *ps,
goto out;
if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
- if (!cpumask_weight(pd->cpumask.cbcpu))
+ if (cpumask_empty(pd->cpumask.cbcpu))
goto out;
/* Select an alternate fallback CPU and notify the caller. */
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e6af502c2fd7..938d5c78b421 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -28,7 +28,6 @@
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
#include <linux/ctype.h>
-#include <linux/genhd.h>
#include <linux/ktime.h>
#include <linux/security.h>
#include <linux/secretmem.h>
@@ -689,8 +688,10 @@ static int load_image_and_restore(void)
lock_device_hotplug();
error = create_basic_memory_bitmaps();
- if (error)
+ if (error) {
+ swsusp_close(FMODE_READ | FMODE_EXCL);
goto Unlock;
+ }
error = swsusp_read(&flags);
swsusp_close(FMODE_READ | FMODE_EXCL);
@@ -1328,7 +1329,7 @@ static int __init resumedelay_setup(char *str)
int rc = kstrtouint(str, 0, &resume_delay);
if (rc)
- return rc;
+ pr_warn("resumedelay: bad option string '%s'\n", str);
return 1;
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 44169f3081fd..7e646079fbeb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -504,7 +504,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
- return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
+ if (!pm_wakeup_irq())
+ return -ENODATA;
+
+ return sprintf(buf, "%u\n", pm_wakeup_irq());
}
power_attr_ro(pm_wakeup_irq);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index b7e7798637b8..11b570fcf049 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -134,7 +134,7 @@ int freeze_processes(void)
if (!pm_freezing)
atomic_inc(&system_freezing_cnt);
- pm_wakeup_clear(true);
+ pm_wakeup_clear(0);
pr_info("Freezing user space processes ... ");
pm_freezing = true;
error = try_to_freeze_tasks(true);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index f7a986078213..330d49937692 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm)
* Register a range of page frames the contents of which should not be saved
* during hibernation (to be used in the early initialization code).
*/
-void __init __register_nosave_region(unsigned long start_pfn,
- unsigned long end_pfn, int use_kmalloc)
+void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
{
struct nosave_region *region;
@@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn,
goto Report;
}
}
- if (use_kmalloc) {
- /* During init, this shouldn't fail */
- region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
- BUG_ON(!region);
- } else {
- /* This allocation cannot fail */
- region = memblock_alloc(sizeof(struct nosave_region),
- SMP_CACHE_BYTES);
- if (!region)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- sizeof(struct nosave_region));
- }
+ /* This allocation cannot fail */
+ region = memblock_alloc(sizeof(struct nosave_region),
+ SMP_CACHE_BYTES);
+ if (!region)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct nosave_region));
region->start_pfn = start_pfn;
region->end_pfn = end_pfn;
list_add_tail(&region->list, &nosave_regions);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 80cc1f0f502b..6fcdee7e87a5 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -136,8 +136,6 @@ static void s2idle_loop(void)
break;
}
- pm_wakeup_clear(false);
-
s2idle_enter();
}
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index d20526c5be15..b663a97f5867 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value)
value++;
suspend_type = strsep(&value, ",");
if (!suspend_type)
- return 0;
+ return 1;
repeat = strsep(&value, ",");
if (repeat) {
if (kstrtou32(repeat, 0, &test_repeat_count_max))
- return 0;
+ return 1;
}
for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
if (!strcmp(pm_labels[i], suspend_type)) {
test_state_label = pm_labels[i];
- return 0;
+ return 1;
}
printk(warn_bad_state, suspend_type);
- return 0;
+ return 1;
}
__setup("test_suspend", setup_test_suspend);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index ad10359030a4..91fffdd2c7fb 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -16,7 +16,6 @@
#include <linux/file.h>
#include <linux/delay.h>
#include <linux/bitops.h>
-#include <linux/genhd.h>
#include <linux/device.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -89,7 +88,7 @@ struct swap_map_page_list {
struct swap_map_page_list *next;
};
-/**
+/*
* The swap_map_handle structure is used for handling swap in
* a file-alike way
*/
@@ -117,7 +116,7 @@ struct swsusp_header {
static struct swsusp_header *swsusp_header;
-/**
+/*
* The following functions are used for tracing the allocated
* swap pages, so that they can be freed in case of an error.
*/
@@ -171,7 +170,7 @@ static int swsusp_extents_insert(unsigned long swap_offset)
return 0;
}
-/**
+/*
* alloc_swapdev_block - allocate a swap page and register that it has
* been allocated, so that it can be freed in case of an error.
*/
@@ -190,7 +189,7 @@ sector_t alloc_swapdev_block(int swap)
return 0;
}
-/**
+/*
* free_all_swap_pages - free swap pages allocated for saving image data.
* It also frees the extents used to register which swap entries had been
* allocated.
@@ -277,10 +276,9 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
struct bio *bio;
int error = 0;
- bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1);
+ bio = bio_alloc(hib_resume_bdev, 1, op | op_flags,
+ GFP_NOIO | __GFP_HIGH);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
- bio_set_dev(bio, hib_resume_bdev);
- bio_set_op_attrs(bio, op, op_flags);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
pr_err("Adding page to bio failed at %llu\n",
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 105df4dfc783..52571dcad768 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
{
struct rb_node *node;
struct wakelock *wl;
- char *str = buf;
- char *end = buf + PAGE_SIZE;
+ int len = 0;
mutex_lock(&wakelocks_lock);
for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
wl = rb_entry(node, struct wakelock, node);
if (wl->ws->active == show_active)
- str += scnprintf(str, end - str, "%s ", wl->name);
+ len += sysfs_emit_at(buf, len, "%s ", wl->name);
}
- if (str > buf)
- str--;
- str += scnprintf(str, end - str, "\n");
+ len += sysfs_emit_at(buf, len, "\n");
mutex_unlock(&wakelocks_lock);
- return (str - buf);
+ return len;
}
#if CONFIG_PM_WAKELOCKS_LIMIT > 0
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 82abfaf3c2aa..da03c15ecc89 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(console_drivers);
*/
int __read_mostly suppress_printk;
+/*
+ * During panic, heavy printk by other CPUs can delay the
+ * panic and risk deadlock on console resources.
+ */
+static int __read_mostly suppress_panic_printk;
+
#ifdef CONFIG_LOCKDEP
static struct lockdep_map console_lock_dep_map = {
.name = "console_lock"
@@ -146,8 +152,10 @@ static int __control_devkmsg(char *str)
static int __init control_devkmsg(char *str)
{
- if (__control_devkmsg(str) < 0)
+ if (__control_devkmsg(str) < 0) {
+ pr_warn("printk.devkmsg: bad option string '%s'\n", str);
return 1;
+ }
/*
* Set sysctl string accordingly:
@@ -166,7 +174,7 @@ static int __init control_devkmsg(char *str)
*/
devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
- return 0;
+ return 1;
}
__setup("printk.devkmsg=", control_devkmsg);
@@ -257,6 +265,11 @@ static void __up_console_sem(unsigned long ip)
}
#define up_console_sem() __up_console_sem(_RET_IP_)
+static bool panic_in_progress(void)
+{
+ return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
+}
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -1843,6 +1856,16 @@ static int console_trylock_spinning(void)
if (console_trylock())
return 1;
+ /*
+ * It's unsafe to spin once a panic has begun. If we are the
+ * panic CPU, we may have already halted the owner of the
+ * console_sem. If we are not the panic CPU, then we should
+ * avoid taking console_sem, so the panic CPU has a better
+ * chance of cleanly acquiring it later.
+ */
+ if (panic_in_progress())
+ return 0;
+
printk_safe_enter_irqsave(flags);
raw_spin_lock(&console_owner_lock);
@@ -2218,6 +2241,10 @@ asmlinkage int vprintk_emit(int facility, int level,
if (unlikely(suppress_printk))
return 0;
+ if (unlikely(suppress_panic_printk) &&
+ atomic_read(&panic_cpu) != raw_smp_processor_id())
+ return 0;
+
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
in_sched = true;
@@ -2324,6 +2351,20 @@ asmlinkage __visible void early_printk(const char *fmt, ...)
}
#endif
+static void set_user_specified(struct console_cmdline *c, bool user_specified)
+{
+ if (!user_specified)
+ return;
+
+ /*
+ * @c console was defined by the user on the command line.
+ * Do not clear when added twice also by SPCR or the device tree.
+ */
+ c->user_specified = true;
+ /* At least one console defined by the user on the command line. */
+ console_set_on_cmdline = 1;
+}
+
static int __add_preferred_console(char *name, int idx, char *options,
char *brl_options, bool user_specified)
{
@@ -2340,8 +2381,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
if (strcmp(c->name, name) == 0 && c->index == idx) {
if (!brl_options)
preferred_console = i;
- if (user_specified)
- c->user_specified = true;
+ set_user_specified(c, user_specified);
return 0;
}
}
@@ -2351,7 +2391,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
preferred_console = i;
strlcpy(c->name, name, sizeof(c->name));
c->options = options;
- c->user_specified = user_specified;
+ set_user_specified(c, user_specified);
braille_set_options(c, brl_options);
c->index = idx;
@@ -2417,7 +2457,6 @@ static int __init console_setup(char *str)
*s = 0;
__add_preferred_console(buf, idx, options, brl_options, true);
- console_set_on_cmdline = 1;
return 1;
}
__setup("console=", console_setup);
@@ -2574,6 +2613,25 @@ static int have_callable_console(void)
}
/*
+ * Return true when this CPU should unlock console_sem without pushing all
+ * messages to the console. This reduces the chance that the console is
+ * locked when the panic CPU tries to use it.
+ */
+static bool abandon_console_lock_in_panic(void)
+{
+ if (!panic_in_progress())
+ return false;
+
+ /*
+ * We can use raw_smp_processor_id() here because it is impossible for
+ * the task to be migrated to the panic_cpu, or away from it. If
+ * panic_cpu has already been set, and we're not currently executing on
+ * that CPU, then we never will be.
+ */
+ return atomic_read(&panic_cpu) != raw_smp_processor_id();
+}
+
+/*
* Can we actually use the console at this time on this cpu?
*
* Console drivers may assume that per-cpu resources have been allocated. So
@@ -2603,6 +2661,7 @@ void console_unlock(void)
{
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[CONSOLE_LOG_MAX];
+ static int panic_console_dropped;
unsigned long flags;
bool do_cond_resched, retry;
struct printk_info info;
@@ -2657,6 +2716,10 @@ skip:
if (console_seq != r.info->seq) {
console_dropped += r.info->seq - console_seq;
console_seq = r.info->seq;
+ if (panic_in_progress() && panic_console_dropped++ > 10) {
+ suppress_panic_printk = 1;
+ pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
+ }
}
if (suppress_message_printing(r.info->level)) {
@@ -2716,6 +2779,10 @@ skip:
if (handover)
return;
+ /* Allow panic_cpu to take over the consoles safely */
+ if (abandon_console_lock_in_panic())
+ break;
+
if (do_cond_resched)
cond_resched();
}
@@ -2733,7 +2800,7 @@ skip:
* flush, no worries.
*/
retry = prb_read_valid(prb, next_seq, NULL);
- if (retry && console_trylock())
+ if (retry && !abandon_console_lock_in_panic() && console_trylock())
goto again;
}
EXPORT_SYMBOL(console_unlock);
@@ -3228,7 +3295,7 @@ static DEFINE_PER_CPU(int, printk_pending);
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
- int pending = __this_cpu_xchg(printk_pending, 0);
+ int pending = this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_OUTPUT) {
/* If trylock fails, someone else is doing the printing */
@@ -3262,7 +3329,7 @@ void defer_console_output(void)
return;
preempt_disable();
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
+ this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
preempt_enable();
}
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 8a7b7362c0dd..2b7b6ddab4f7 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -474,8 +474,10 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
* state has been re-checked. A memcpy() for all of @desc
* cannot be used because of the atomic_t @state_var field.
*/
- memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
- sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
+ if (desc_out) {
+ memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
+ sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
+ }
if (seq_out)
*seq_out = info->seq; /* also part of desc_read:C */
if (caller_id_out)
@@ -528,7 +530,8 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
d_state = get_desc_state(id, state_val);
out:
- atomic_long_set(&desc_out->state_var, state_val);
+ if (desc_out)
+ atomic_long_set(&desc_out->state_var, state_val);
return d_state;
}
@@ -1449,6 +1452,9 @@ static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
+
+ /* Best effort to remember the last finalized @id. */
+ atomic_long_set(&desc_ring->last_finalized_id, id);
}
/**
@@ -1657,7 +1663,12 @@ void prb_commit(struct prb_reserved_entry *e)
*/
void prb_final_commit(struct prb_reserved_entry *e)
{
+ struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
+
_prb_commit(e, desc_finalized);
+
+ /* Best effort to remember the last finalized @id. */
+ atomic_long_set(&desc_ring->last_finalized_id, e->id);
}
/*
@@ -2005,9 +2016,39 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
*/
u64 prb_next_seq(struct printk_ringbuffer *rb)
{
- u64 seq = 0;
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ enum desc_state d_state;
+ unsigned long id;
+ u64 seq;
+
+ /* Check if the cached @id still points to a valid @seq. */
+ id = atomic_long_read(&desc_ring->last_finalized_id);
+ d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
- /* Search forward from the oldest descriptor. */
+ if (d_state == desc_finalized || d_state == desc_reusable) {
+ /*
+ * Begin searching after the last finalized record.
+ *
+ * On 0, the search must begin at 0 because of hack#2
+ * of the bootstrapping phase it is not known if a
+ * record at index 0 exists.
+ */
+ if (seq != 0)
+ seq++;
+ } else {
+ /*
+ * The information about the last finalized sequence number
+ * has gone. It should happen only when there is a flood of
+ * new messages and the ringbuffer is rapidly recycled.
+ * Give up and start from the beginning.
+ */
+ seq = 0;
+ }
+
+ /*
+ * The information about the last finalized @seq might be inaccurate.
+ * Search forward to find the current one.
+ */
while (_prb_read_valid(rb, &seq, NULL, NULL))
seq++;
@@ -2044,6 +2085,7 @@ void prb_init(struct printk_ringbuffer *rb,
rb->desc_ring.infos = infos;
atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
+ atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits));
rb->text_data_ring.size_bits = textbits;
rb->text_data_ring.data = text_buf;
diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
index 73cc80e01cef..18cd25e489b8 100644
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -75,6 +75,7 @@ struct prb_desc_ring {
struct printk_info *infos;
atomic_long_t head_id;
atomic_long_t tail_id;
+ atomic_long_t last_finalized_id;
};
/*
@@ -258,6 +259,7 @@ static struct printk_ringbuffer name = { \
.infos = &_##name##_infos[0], \
.head_id = ATOMIC_INIT(DESC0_ID(descbits)), \
.tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \
+ .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \
}, \
.text_data_ring = { \
.size_bits = (avgtextbits) + (descbits), \
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
index 653ae04aab7f..c228343eeb97 100644
--- a/kernel/printk/sysctl.c
+++ b/kernel/printk/sysctl.c
@@ -12,7 +12,7 @@
static const int ten_thousand = 10000;
static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index e373fbe44da5..431cee212467 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
int flags)
{
- rsclp->flags |= flags;
+ WRITE_ONCE(rsclp->flags, rsclp->flags | flags);
}
static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
int flags)
{
- rsclp->flags &= ~flags;
+ WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags);
}
static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 422f7e4cc08d..55d049c39608 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
-static bool rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */
+static atomic_t rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */
/*
* Allocate an element from the rcu_tortures pool.
@@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
* period, and we want a long delay occasionally to trigger
* force_quiescent_state. */
- if (!READ_ONCE(rcu_fwd_cb_nodelay) &&
+ if (!atomic_read(&rcu_fwd_cb_nodelay) &&
!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
@@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = {
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
+ .cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcu"
@@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
+ .cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.name = "srcud"
@@ -997,7 +999,7 @@ static int rcu_torture_boost(void *arg)
goto checkwait;
/* Wait for the next test interval. */
- oldstarttime = boost_starttime;
+ oldstarttime = READ_ONCE(boost_starttime);
while (time_before(jiffies, oldstarttime)) {
schedule_timeout_interruptible(oldstarttime - jiffies);
if (stutter_wait("rcu_torture_boost"))
@@ -1041,10 +1043,11 @@ static int rcu_torture_boost(void *arg)
* interval. Besides, we are running at RT priority,
* so delays should be relatively rare.
*/
- while (oldstarttime == boost_starttime && !kthread_should_stop()) {
+ while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) {
if (mutex_trylock(&boost_mutex)) {
if (oldstarttime == boost_starttime) {
- boost_starttime = jiffies + test_boost_interval * HZ;
+ WRITE_ONCE(boost_starttime,
+ jiffies + test_boost_interval * HZ);
n_rcu_torture_boosts++;
}
mutex_unlock(&boost_mutex);
@@ -1276,7 +1279,7 @@ rcu_torture_writer(void *arg)
boot_ended = rcu_inkernel_boot_has_ended();
stutter_waited = stutter_wait("rcu_torture_writer");
if (stutter_waited &&
- !READ_ONCE(rcu_fwd_cb_nodelay) &&
+ !atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
boot_ended)
@@ -2180,7 +2183,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
if (rfp->n_launders_hist[i].n_launders > 0)
break;
- mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
__func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
gps_old = rfp->rcu_launder_gp_seq_start;
@@ -2193,7 +2195,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
gps_old = gps;
}
pr_cont("\n");
- mutex_unlock(&rcu_fwd_mutex);
}
/* Callback function for continuous-flood RCU callbacks. */
@@ -2281,6 +2282,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
unsigned long stopat;
static DEFINE_TORTURE_RANDOM(trs);
+ pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (!cur_ops->sync)
return; // Cannot do need_resched() forward progress testing without ->sync.
if (cur_ops->call && cur_ops->cb_barrier) {
@@ -2289,7 +2291,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
}
/* Tight loop containing cond_resched(). */
- WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+ atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
@@ -2325,6 +2327,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 1);
cur_ops->sync(); /* Wait for running CB to complete. */
+ pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for queued callbacks. */
}
@@ -2333,7 +2336,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
destroy_rcu_head_on_stack(&fcs.rh);
}
schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
- WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+ atomic_dec(&rcu_fwd_cb_nodelay);
}
/* Carry out call_rcu() forward-progress testing. */
@@ -2353,13 +2356,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
unsigned long stopat;
unsigned long stoppedat;
+ pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
if (!cur_ops->call)
return; /* Can't do call_rcu() fwd prog without ->call. */
/* Loop continuously posting RCU callbacks. */
- WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+ atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES;
@@ -2414,6 +2418,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb);
cver = READ_ONCE(rcu_torture_current_version) - cver;
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
+ pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree(rfp);
@@ -2427,11 +2432,13 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
n_launders, n_launders_sa,
n_max_gps, n_max_cbs, cver, gps);
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
+ mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
rcu_torture_fwd_cb_hist(rfp);
+ mutex_unlock(&rcu_fwd_mutex);
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
- WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+ atomic_dec(&rcu_fwd_cb_nodelay);
}
@@ -2511,7 +2518,7 @@ static int rcu_torture_fwd_prog(void *args)
firsttime = false;
WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
} else {
- while (READ_ONCE(rcu_fwd_seq) == oldseq)
+ while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
schedule_timeout_interruptible(1);
oldseq = READ_ONCE(rcu_fwd_seq);
}
@@ -2905,8 +2912,10 @@ rcu_torture_cleanup(void)
int i;
if (torture_cleanup_begin()) {
- if (cur_ops->cb_barrier != NULL)
+ if (cur_ops->cb_barrier != NULL) {
+ pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
+ }
return;
}
if (!cur_ops) {
@@ -2961,8 +2970,10 @@ rcu_torture_cleanup(void)
* Wait for all RCU callbacks to fire, then do torture-type-specific
* cleanup operations.
*/
- if (cur_ops->cb_barrier != NULL)
+ if (cur_ops->cb_barrier != NULL) {
+ pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
+ }
if (cur_ops->cleanup != NULL)
cur_ops->cleanup();
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 84f1d91604cc..99cf3a13954c 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \
.call_func = call, \
.rtpcpu = &rt_name ## __percpu, \
.name = n, \
- .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
+ .percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \
.percpu_enqueue_lim = 1, \
.percpu_dequeue_lim = 1, \
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
@@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
int cpu;
unsigned long flags;
int lim;
+ int shift;
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rcu_task_enqueue_lim < 0) {
@@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
if (lim > nr_cpu_ids)
lim = nr_cpu_ids;
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
+ shift = ilog2(nr_cpu_ids / lim);
+ if (((nr_cpu_ids - 1) >> shift) >= lim)
+ shift++;
+ WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
smp_store_release(&rtp->percpu_enqueue_lim, lim);
for_each_possible_cpu(cpu) {
@@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
if (unlikely(needadjust)) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
+ WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
@@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim > 1) {
- WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
+ WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
@@ -492,7 +496,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
struct rcu_tasks *rtp = arg;
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
- housekeeping_affine(current, HK_FLAG_RCU);
+ housekeeping_affine(current, HK_TYPE_RCU);
WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0..a4b8189455d5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -87,11 +87,12 @@ static struct rcu_state rcu_state = {
.gp_state = RCU_GP_IDLE,
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
+ .barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
.name = RCU_NAME,
.abbr = RCU_ABBR,
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
- .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
+ .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
};
/* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -153,7 +154,7 @@ static void sync_sched_exp_online_cleanup(int cpu);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
-/* rcuc/rcub kthread realtime priority */
+/* rcuc/rcub/rcuop kthread realtime priority */
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
module_param(kthread_prio, int, 0444);
@@ -222,6 +223,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
}
/*
+ * Is the CPU corresponding to the specified rcu_data structure online
+ * from RCU's perspective? This perspective is given by that structure's
+ * ->qsmaskinitnext field rather than by the global cpu_online_mask.
+ */
+static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
+{
+ return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
+}
+
+/*
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* permit this function to be invoked without holding the root rcu_node
* structure's ->lock, but of course results can be subject to change.
@@ -1086,9 +1097,8 @@ void rcu_irq_enter_irqson(void)
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
*/
-int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+int rcu_needs_cpu(void)
{
- *nextevt = KTIME_MAX;
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
@@ -1167,15 +1177,20 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
bool rcu_lockdep_current_cpu_online(void)
{
struct rcu_data *rdp;
- struct rcu_node *rnp;
bool ret = false;
if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
- rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
+ /*
+ * Strictly, we care here about the case where the current CPU is
+ * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
+ * not being up to date. So arch_spin_is_locked() might have a
+ * false positive if it's held by some *other* CPU, but that's
+ * OK because that just means a false *negative* on the warning.
+ */
+ if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
ret = true;
preempt_enable_notrace();
return ret;
@@ -1260,8 +1275,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
* For more detail, please refer to the "Hotplug CPU" section
* of RCU's Requirements documentation.
*/
- if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {
- bool onl;
+ if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) {
struct rcu_node *rnp1;
pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
@@ -1270,9 +1284,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
- onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
- __func__, rdp->cpu, ".o"[onl],
+ __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],
(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
return 1; /* Break things loose after complaining. */
@@ -1739,7 +1752,6 @@ static void rcu_strict_gp_boundary(void *unused)
*/
static noinline_for_stack bool rcu_gp_init(void)
{
- unsigned long firstseq;
unsigned long flags;
unsigned long oldmask;
unsigned long mask;
@@ -1782,22 +1794,17 @@ static noinline_for_stack bool rcu_gp_init(void)
* of RCU's Requirements documentation.
*/
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
+ /* Exclude CPU hotplug operations. */
rcu_for_each_leaf_node(rnp) {
- // Wait for CPU-hotplug operations that might have
- // started before this grace period did.
- smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
- firstseq = READ_ONCE(rnp->ofl_seq);
- if (firstseq & 0x1)
- while (firstseq == READ_ONCE(rnp->ofl_seq))
- schedule_timeout_idle(1); // Can't wake unless RCU is watching.
- smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
- raw_spin_lock(&rcu_state.ofl_lock);
- raw_spin_lock_irq_rcu_node(rnp);
+ local_irq_save(flags);
+ arch_spin_lock(&rcu_state.ofl_lock);
+ raw_spin_lock_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
- raw_spin_unlock_irq_rcu_node(rnp);
- raw_spin_unlock(&rcu_state.ofl_lock);
+ raw_spin_unlock_rcu_node(rnp);
+ arch_spin_unlock(&rcu_state.ofl_lock);
+ local_irq_restore(flags);
continue;
}
@@ -1832,8 +1839,9 @@ static noinline_for_stack bool rcu_gp_init(void)
rcu_cleanup_dead_rnp(rnp);
}
- raw_spin_unlock_irq_rcu_node(rnp);
- raw_spin_unlock(&rcu_state.ofl_lock);
+ raw_spin_unlock_rcu_node(rnp);
+ arch_spin_unlock(&rcu_state.ofl_lock);
+ local_irq_restore(flags);
}
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
@@ -2850,10 +2858,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+ unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
int spincnt;
trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
for (spincnt = 0; spincnt < 10; spincnt++) {
+ WRITE_ONCE(*j, jiffies);
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
@@ -2874,6 +2884,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
schedule_timeout_idle(2);
trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
*statusp = RCU_KTHREAD_WAITING;
+ WRITE_ONCE(*j, jiffies);
}
static struct smp_hotplug_thread rcu_cpu_thread_spec = {
@@ -2894,7 +2905,7 @@ static int __init rcu_spawn_core_kthreads(void)
for_each_possible_cpu(cpu)
per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
- if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+ if (use_softirq)
return 0;
WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
"%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
@@ -2995,9 +3006,47 @@ static void check_cb_ovld(struct rcu_data *rdp)
raw_spin_unlock_rcu_node(rnp);
}
-/* Helper function for call_rcu() and friends. */
-static void
-__call_rcu(struct rcu_head *head, rcu_callback_t func)
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed. However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.
+ *
+ * RCU read-side critical sections are delimited by rcu_read_lock()
+ * and rcu_read_unlock(), and may be nested. In addition, but only in
+ * v5.0 and later, regions of code across which interrupts, preemption,
+ * or softirqs have been disabled also serve as RCU read-side critical
+ * sections. This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section. On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu(). It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section. Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ *
+ * Implementation of these memory-ordering guarantees is described here:
+ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
static atomic_t doublefrees;
unsigned long flags;
@@ -3011,7 +3060,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
/*
* Probable double call_rcu(), so leak the callback.
* Use rcu:rcu_callback trace event to find the previous
- * time callback was passed to __call_rcu().
+ * time callback was passed to call_rcu().
*/
if (atomic_inc_return(&doublefrees) < 4) {
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
@@ -3022,8 +3071,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
}
head->func = func;
head->next = NULL;
- local_irq_save(flags);
kasan_record_aux_stack_noalloc(head);
+ local_irq_save(flags);
rdp = this_cpu_ptr(&rcu_data);
/* Add the callback to our list. */
@@ -3060,51 +3109,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
local_irq_restore(flags);
}
}
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed. However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.
- *
- * RCU read-side critical sections are delimited by rcu_read_lock()
- * and rcu_read_unlock(), and may be nested. In addition, but only in
- * v5.0 and later, regions of code across which interrupts, preemption,
- * or softirqs have been disabled also serve as RCU read-side critical
- * sections. This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section. On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu(). It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section. Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- *
- * Implementation of these memory-ordering guarantees is described here:
- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
- __call_rcu(head, func);
-}
EXPORT_SYMBOL_GPL(call_rcu);
@@ -3984,13 +3988,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
}
/*
- * Called with preemption disabled, and from cross-cpu IRQ context.
+ * If needed, entrain an rcu_barrier() callback on rdp->cblist.
*/
-static void rcu_barrier_func(void *cpu_in)
+static void rcu_barrier_entrain(struct rcu_data *rdp)
{
- uintptr_t cpu = (uintptr_t)cpu_in;
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
+ unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
+ lockdep_assert_held(&rcu_state.barrier_lock);
+ if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
+ return;
rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
@@ -4000,10 +4007,26 @@ static void rcu_barrier_func(void *cpu_in)
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
debug_rcu_head_unqueue(&rdp->barrier_head);
- rcu_barrier_trace(TPS("IRQNQ"), -1,
- rcu_state.barrier_sequence);
+ rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
+ smp_store_release(&rdp->barrier_seq_snap, gseq);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_handler(void *cpu_in)
+{
+ uintptr_t cpu = (uintptr_t)cpu_in;
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ lockdep_assert_irqs_disabled();
+ WARN_ON_ONCE(cpu != rdp->cpu);
+ WARN_ON_ONCE(cpu != smp_processor_id());
+ raw_spin_lock(&rcu_state.barrier_lock);
+ rcu_barrier_entrain(rdp);
+ raw_spin_unlock(&rcu_state.barrier_lock);
}
/**
@@ -4017,6 +4040,8 @@ static void rcu_barrier_func(void *cpu_in)
void rcu_barrier(void)
{
uintptr_t cpu;
+ unsigned long flags;
+ unsigned long gseq;
struct rcu_data *rdp;
unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
@@ -4027,15 +4052,16 @@ void rcu_barrier(void)
/* Did someone else do our work for us? */
if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
- rcu_barrier_trace(TPS("EarlyExit"), -1,
- rcu_state.barrier_sequence);
+ rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence);
smp_mb(); /* caller's subsequent code after above check. */
mutex_unlock(&rcu_state.barrier_mutex);
return;
}
/* Mark the start of the barrier operation. */
+ raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
rcu_seq_start(&rcu_state.barrier_sequence);
+ gseq = rcu_state.barrier_sequence;
rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
/*
@@ -4047,7 +4073,7 @@ void rcu_barrier(void)
*/
init_completion(&rcu_state.barrier_completion);
atomic_set(&rcu_state.barrier_cpu_count, 2);
- cpus_read_lock();
+ raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
/*
* Force each CPU with callbacks to register a new callback.
@@ -4056,29 +4082,31 @@ void rcu_barrier(void)
*/
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(&rcu_data, cpu);
- if (cpu_is_offline(cpu) &&
- !rcu_rdp_is_offloaded(rdp))
+retry:
+ if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
continue;
- if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) {
- rcu_barrier_trace(TPS("OnlineQ"), cpu,
- rcu_state.barrier_sequence);
- smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
- } else if (rcu_segcblist_n_cbs(&rdp->cblist) &&
- cpu_is_offline(cpu)) {
- rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu,
- rcu_state.barrier_sequence);
- local_irq_disable();
- rcu_barrier_func((void *)cpu);
- local_irq_enable();
- } else if (cpu_is_offline(cpu)) {
- rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu,
- rcu_state.barrier_sequence);
- } else {
- rcu_barrier_trace(TPS("OnlineNQ"), cpu,
- rcu_state.barrier_sequence);
+ raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
+ if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
+ WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+ raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+ rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
+ continue;
+ }
+ if (!rcu_rdp_cpu_online(rdp)) {
+ rcu_barrier_entrain(rdp);
+ WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+ raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+ rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
+ continue;
+ }
+ raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+ if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
+ schedule_timeout_uninterruptible(1);
+ goto retry;
}
+ WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+ rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
}
- cpus_read_unlock();
/*
* Now that we have an rcu_barrier_callback() callback on each
@@ -4093,6 +4121,12 @@ void rcu_barrier(void)
/* Mark the end of the barrier operation. */
rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
rcu_seq_end(&rcu_state.barrier_sequence);
+ gseq = rcu_state.barrier_sequence;
+ for_each_possible_cpu(cpu) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+ }
/* Other rcu_barrier() invocations can now safely proceed. */
mutex_unlock(&rcu_state.barrier_mutex);
@@ -4140,6 +4174,7 @@ rcu_boot_init_percpu_data(int cpu)
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
+ rdp->barrier_seq_snap = rcu_state.barrier_sequence;
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
@@ -4287,12 +4322,13 @@ void rcu_cpu_starting(unsigned int cpu)
rnp = rdp->mynode;
mask = rdp->grpmask;
- WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
- WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ local_irq_save(flags);
+ arch_spin_lock(&rcu_state.ofl_lock);
rcu_dynticks_eqs_online();
- smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ raw_spin_lock(&rcu_state.barrier_lock);
+ raw_spin_lock_rcu_node(rnp);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
+ raw_spin_unlock(&rcu_state.barrier_lock);
newcpu = !(rnp->expmaskinitnext & mask);
rnp->expmaskinitnext |= mask;
/* Allow lockless access for expedited grace periods. */
@@ -4304,15 +4340,18 @@ void rcu_cpu_starting(unsigned int cpu)
/* An incoming CPU should never be blocking a grace period. */
if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
+ /* rcu_report_qs_rnp() *really* wants some flags to restore */
+ unsigned long flags2;
+
+ local_irq_save(flags2);
rcu_disable_urgency_upon_qs(rdp);
/* Report QS -after- changing ->qsmaskinitnext! */
- rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
+ rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
} else {
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ raw_spin_unlock_rcu_node(rnp);
}
- smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
- WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
- WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+ arch_spin_unlock(&rcu_state.ofl_lock);
+ local_irq_restore(flags);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}
@@ -4326,7 +4365,7 @@ void rcu_cpu_starting(unsigned int cpu)
*/
void rcu_report_dead(unsigned int cpu)
{
- unsigned long flags;
+ unsigned long flags, seq_flags;
unsigned long mask;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
@@ -4340,10 +4379,8 @@ void rcu_report_dead(unsigned int cpu)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
- WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
- smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
- raw_spin_lock(&rcu_state.ofl_lock);
+ local_irq_save(seq_flags);
+ arch_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4354,10 +4391,8 @@ void rcu_report_dead(unsigned int cpu)
}
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- raw_spin_unlock(&rcu_state.ofl_lock);
- smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
- WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
- WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+ arch_spin_unlock(&rcu_state.ofl_lock);
+ local_irq_restore(seq_flags);
rdp->cpu_started = false;
}
@@ -4380,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */
- local_irq_save(flags);
+ raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
+ WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
+ rcu_barrier_entrain(rdp);
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
@@ -4390,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
needwake = rcu_advance_cbs(my_rnp, rdp) ||
rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
+ raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */
needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
rcu_segcblist_disable(&rdp->cblist);
- WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
- !rcu_segcblist_n_cbs(&my_rdp->cblist));
+ WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
if (rcu_rdp_is_offloaded(my_rdp)) {
raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
__call_rcu_nocb_wake(my_rdp, true, flags);
@@ -4440,26 +4477,10 @@ static int rcu_pm_notify(struct notifier_block *self,
static int __init rcu_spawn_gp_kthread(void)
{
unsigned long flags;
- int kthread_prio_in = kthread_prio;
struct rcu_node *rnp;
struct sched_param sp;
struct task_struct *t;
- /* Force priority into range. */
- if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
- && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
- kthread_prio = 2;
- else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
- kthread_prio = 1;
- else if (kthread_prio < 0)
- kthread_prio = 0;
- else if (kthread_prio > 99)
- kthread_prio = 99;
-
- if (kthread_prio != kthread_prio_in)
- pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
- kthread_prio, kthread_prio_in);
-
rcu_scheduler_fully_active = 1;
t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
@@ -4570,6 +4591,7 @@ static void __init rcu_init_one(void)
init_waitqueue_head(&rnp->exp_wq[2]);
init_waitqueue_head(&rnp->exp_wq[3]);
spin_lock_init(&rnp->exp_lock);
+ mutex_init(&rnp->boost_kthread_mutex);
}
}
@@ -4585,6 +4607,28 @@ static void __init rcu_init_one(void)
}
/*
+ * Force priority from the kernel command-line into range.
+ */
+static void __init sanitize_kthread_prio(void)
+{
+ int kthread_prio_in = kthread_prio;
+
+ if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
+ && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
+ kthread_prio = 2;
+ else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
+ kthread_prio = 1;
+ else if (kthread_prio < 0)
+ kthread_prio = 0;
+ else if (kthread_prio > 99)
+ kthread_prio = 99;
+
+ if (kthread_prio != kthread_prio_in)
+ pr_alert("%s: Limited prio to %d from %d\n",
+ __func__, kthread_prio, kthread_prio_in);
+}
+
+/*
* Compute the rcu_node tree geometry from kernel parameters. This cannot
* replace the definitions in tree.h because those are needed to size
* the ->node array in the rcu_state structure.
@@ -4744,6 +4788,7 @@ void __init rcu_init(void)
kfree_rcu_batch_init();
rcu_bootup_announce();
+ sanitize_kthread_prio();
rcu_init_geometry();
rcu_init_one();
if (dump_tree)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd08..926673ebe355 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,8 +56,6 @@ struct rcu_node {
/* Initialized from ->qsmaskinitnext at the */
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
- unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
- /* Online CPUs for next grace period. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */
/* to complete. */
@@ -110,6 +108,9 @@ struct rcu_node {
/* side effect, not as a lock. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
+ struct mutex boost_kthread_mutex;
+ /* Exclusion for thread spawning and affinity */
+ /* manipulation. */
struct task_struct *boost_kthread_task;
/* kthread that takes care of priority */
/* boosting for this rcu_node structure. */
@@ -190,6 +191,7 @@ struct rcu_data {
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */
/* 4) rcu_barrier(), OOM callbacks, and expediting. */
+ unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */
struct rcu_head barrier_head;
int exp_dynticks_snap; /* Double-check need for IPI. */
@@ -203,6 +205,8 @@ struct rcu_data {
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
struct timer_list nocb_timer; /* Enforce finite deferral. */
unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */
+ struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */
+ /* spawning */
/* The following fields are used by call_rcu, hence own cacheline. */
raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp;
@@ -237,6 +241,7 @@ struct rcu_data {
/* rcuc per-CPU kthread or NULL. */
unsigned int rcu_cpu_kthread_status;
char rcu_cpu_has_work;
+ unsigned long rcuc_activity;
/* 7) Diagnostic data, including RCU CPU stall warnings. */
unsigned int softirq_snap; /* Snapshot of softirq activity. */
@@ -302,9 +307,8 @@ struct rcu_state {
/* The following fields are guarded by the root rcu_node's lock. */
- u8 boost ____cacheline_internodealigned_in_smp;
- /* Subject to priority boost. */
- unsigned long gp_seq; /* Grace-period sequence #. */
+ unsigned long gp_seq ____cacheline_internodealigned_in_smp;
+ /* Grace-period sequence #. */
unsigned long gp_max; /* Maximum GP duration in */
/* jiffies. */
struct task_struct *gp_kthread; /* Task for grace periods. */
@@ -323,6 +327,8 @@ struct rcu_state {
/* rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */
+ raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */
+
struct mutex exp_mutex; /* Serialize expedited GP. */
struct mutex exp_wake_mutex; /* Serialize wakeup. */
unsigned long expedited_sequence; /* Take a ticket. */
@@ -355,7 +361,7 @@ struct rcu_state {
const char *name; /* Name of structure. */
char abbr; /* Abbreviated name. */
- raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
+ arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
/* Synchronize offline with */
/* GP pre-initialization. */
};
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 237a79989aba..60197ea24ceb 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -502,7 +502,8 @@ static void synchronize_rcu_expedited_wait(void)
if (synchronize_rcu_expedited_wait_once(1))
return;
rcu_for_each_leaf_node(rnp) {
- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+ mask = READ_ONCE(rnp->expmask);
+ for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rdp->rcu_forced_tick_exp)
continue;
@@ -656,7 +657,7 @@ static void rcu_exp_handler(void *unused)
*/
if (!depth) {
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
- rcu_dynticks_curr_cpu_in_eqs()) {
+ rcu_is_cpu_rrupt_from_idle()) {
rcu_report_exp_rdp(rdp);
} else {
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index eeafb546a7a0..636d0546a4e9 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void)
struct rcu_data *rdp;
#if defined(CONFIG_NO_HZ_FULL)
- if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+ if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
need_rcu_nocb_mask = true;
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
@@ -1226,6 +1226,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
raw_spin_lock_init(&rdp->nocb_gp_lock);
timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
rcu_cblist_init(&rdp->nocb_bypass);
+ mutex_init(&rdp->nocb_gp_kthread_mutex);
}
/*
@@ -1238,6 +1239,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_data *rdp_gp;
struct task_struct *t;
+ struct sched_param sp;
if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
return;
@@ -1247,20 +1249,30 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
return;
/* If we didn't spawn the GP kthread first, reorganize! */
+ sp.sched_priority = kthread_prio;
rdp_gp = rdp->nocb_gp_rdp;
+ mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
if (!rdp_gp->nocb_gp_kthread) {
t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
"rcuog/%d", rdp_gp->cpu);
- if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
+ if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
+ mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
return;
+ }
WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
+ if (kthread_prio)
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
+ mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
/* Spawn the kthread for this CPU. */
t = kthread_run(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
return;
+
+ if (kthread_prio)
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
WRITE_ONCE(rdp->nocb_cb_kthread, t);
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
}
@@ -1348,7 +1360,7 @@ static void __init rcu_organize_nocb_kthreads(void)
*/
void rcu_bind_current_to_nocb(void)
{
- if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
+ if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask))
WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
}
EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a1..8360d86db1c0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt)
* then queue the task as required based on the states
* of any ongoing and expedited grace periods.
*/
- WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
+ WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp));
WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
trace_rcu_preempt_task(rcu_state.name,
t->pid,
@@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
- /* Unboost if we were boosted. */
- if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
- rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
-
/*
* If this was the last task on the expedited lists,
* then we need to report up the rcu_node hierarchy.
*/
if (!empty_exp && empty_exp_now)
rcu_report_exp_rnp(rnp, true);
+
+ /* Unboost if we were boosted. */
+ if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
+ rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
} else {
local_irq_restore(flags);
}
@@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
int cpu;
int i;
struct list_head *lhp;
- bool onl;
struct rcu_data *rdp;
struct rcu_node *rnp1;
@@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
pr_cont("\n");
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
rdp = per_cpu_ptr(&rcu_data, cpu);
- onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
- cpu, ".o"[onl],
+ cpu, ".o"[rcu_rdp_cpu_online(rdp)],
(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
}
@@ -996,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
*/
static void rcu_cpu_kthread_setup(unsigned int cpu)
{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
#ifdef CONFIG_RCU_BOOST
struct sched_param sp;
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
#endif /* #ifdef CONFIG_RCU_BOOST */
+
+ WRITE_ONCE(rdp->rcuc_activity, jiffies);
}
#ifdef CONFIG_RCU_BOOST
@@ -1172,15 +1173,14 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
struct sched_param sp;
struct task_struct *t;
+ mutex_lock(&rnp->boost_kthread_mutex);
if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
- return;
-
- rcu_state.boost = 1;
+ goto out;
t = kthread_create(rcu_boost_kthread, (void *)rnp,
"rcub/%d", rnp_index);
if (WARN_ON_ONCE(IS_ERR(t)))
- return;
+ goto out;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
@@ -1188,6 +1188,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+
+ out:
+ mutex_unlock(&rnp->boost_kthread_mutex);
}
/*
@@ -1210,14 +1213,16 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
return;
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
+ mutex_lock(&rnp->boost_kthread_mutex);
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
- cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
- if (cpumask_weight(cm) == 0)
- cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
+ cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
+ if (cpumask_empty(cm))
+ cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
set_cpus_allowed_ptr(t, cm);
+ mutex_unlock(&rnp->boost_kthread_mutex);
free_cpumask_var(cm);
}
@@ -1291,7 +1296,7 @@ static void rcu_bind_gp_kthread(void)
{
if (!tick_nohz_full_enabled())
return;
- housekeeping_affine(current, HK_FLAG_RCU);
+ housekeeping_affine(current, HK_TYPE_RCU);
}
/* Record the current task on dyntick-idle entry. */
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030..0c5d8516516a 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
return j > 2 * HZ;
}
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+ unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+ if (jp)
+ *jp = j;
+ return j > 2 * HZ;
+}
+
/*
* Print out diagnostic information for the specified stalled CPU.
*
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
falsepositive ? " (false positive?)" : "");
}
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+ int cpu;
+ unsigned long j;
+ struct task_struct *rcuc;
+
+ rcuc = rdp->rcu_cpu_kthread_task;
+ if (!rcuc)
+ return;
+
+ cpu = task_cpu(rcuc);
+ if (cpu_is_offline(cpu) || idle_cpu(cpu))
+ return;
+
+ if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+ return;
+
+ pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+ sched_show_task(rcuc);
+ if (!trigger_single_cpu_backtrace(cpu))
+ dump_cpu_task(cpu);
+}
+
/* Complain about starvation of grace-period kthread. */
static void rcu_check_gp_kthread_starvation(void)
{
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
rcu_check_gp_kthread_expired_fqs_timer();
rcu_check_gp_kthread_starvation();
+ if (!use_softirq)
+ rcuc_kthread_dump(rdp);
+
rcu_dump_cpu_stacks();
raw_spin_lock_irqsave_rcu_node(rnp, flags);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 156892c22bb5..180ff9c41fa8 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -407,6 +407,13 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
}
EXPORT_SYMBOL_GPL(__wait_rcu_gp);
+void finish_rcuwait(struct rcuwait *w)
+{
+ rcu_assign_pointer(w->task, NULL);
+ __set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL_GPL(finish_rcuwait);
+
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
void init_rcu_head(struct rcu_head *head)
{
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 6d45ac3dae7f..97ac20b4f738 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
int ret;
#ifdef CONFIG_64BIT
- if (get_user(ptr, &t->rseq->rseq_cs.ptr64))
+ if (get_user(ptr, &t->rseq->rseq_cs))
return -EFAULT;
#else
- if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
+ if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
return -EFAULT;
#endif
if (!ptr) {
@@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t)
* Set rseq_cs to NULL.
*/
#ifdef CONFIG_64BIT
- return put_user(0UL, &t->rseq->rseq_cs.ptr64);
+ return put_user(0UL, &t->rseq->rseq_cs);
#else
- if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
+ if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
return -EFAULT;
return 0;
#endif
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index c83b37af155b..976092b7bd45 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -1,7 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
-endif
# The compilers are complaining about unused variables inside an if(0) scope
# block. This is daft, shut them up.
@@ -25,18 +22,13 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
-obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle.o fair.o rt.o deadline.o
-obj-y += wait.o wait_bit.o swait.o completion.o
-
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
-obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
-obj-$(CONFIG_SCHEDSTATS) += stats.o
-obj-$(CONFIG_SCHED_DEBUG) += debug.o
-obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq.o
-obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
-obj-$(CONFIG_MEMBARRIER) += membarrier.o
-obj-$(CONFIG_CPU_ISOLATION) += isolation.o
-obj-$(CONFIG_PSI) += psi.o
-obj-$(CONFIG_SCHED_CORE) += core_sched.o
+#
+# Build efficiency:
+#
+# These compilation units have roughly the same size and complexity - so their
+# build parallelizes well and finishes roughly at once:
+#
+obj-y += core.o
+obj-y += fair.o
+obj-y += build_policy.o
+obj-y += build_utility.o
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 8629b37d118e..16092b49ff6a 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -1,14 +1,35 @@
// SPDX-License-Identifier: GPL-2.0
+
/*
* Auto-group scheduling implementation:
*/
-#include <linux/nospec.h>
-#include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
+#ifdef CONFIG_SYSCTL
+static struct ctl_table sched_autogroup_sysctls[] = {
+ {
+ .procname = "sched_autogroup_enabled",
+ .data = &sysctl_sched_autogroup_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static void __init sched_autogroup_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sched_autogroup_sysctls);
+}
+#else
+#define sched_autogroup_sysctl_init() do { } while (0)
+#endif
+
void __init autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &root_task_group;
@@ -198,6 +219,7 @@ void sched_autogroup_exit(struct signal_struct *sig)
static int __init setup_autogroup(char *str)
{
sysctl_sched_autogroup_enabled = 0;
+ sched_autogroup_sysctl_init();
return 1;
}
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h
index b96419974a1f..90d69f2c5eaf 100644
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_SCHED_AUTOGROUP_H
+#define _KERNEL_SCHED_AUTOGROUP_H
+
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
@@ -27,6 +30,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
+ extern unsigned int sysctl_sched_autogroup_enabled;
int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
if (enabled && task_wants_autogroup(p, tg))
@@ -58,3 +62,5 @@ static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
}
#endif /* CONFIG_SCHED_AUTOGROUP */
+
+#endif /* _KERNEL_SCHED_AUTOGROUP_H */
diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
new file mode 100644
index 000000000000..e0104b45029a
--- /dev/null
+++ b/kernel/sched/build_policy.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * These are the scheduling policy related scheduler files, built
+ * in a single compilation unit for build efficiency reasons.
+ *
+ * ( Incidentally, the size of the compilation unit is roughly
+ * comparable to core.c and fair.c, the other two big
+ * compilation units. This helps balance build time, while
+ * coalescing source files to amortize header inclusion
+ * cost. )
+ *
+ * core.c and fair.c are built separately.
+ */
+
+/* Headers: */
+#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
+#include <linux/sched/posix-timers.h>
+#include <linux/sched/rt.h>
+
+#include <linux/cpuidle.h>
+#include <linux/jiffies.h>
+#include <linux/livepatch.h>
+#include <linux/psi.h>
+#include <linux/seqlock_api.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/tsacct_kern.h>
+#include <linux/vtime.h>
+
+#include <uapi/linux/sched/types.h>
+
+#include "sched.h"
+
+#include "autogroup.h"
+#include "stats.h"
+#include "pelt.h"
+
+/* Source code modules: */
+
+#include "idle.c"
+
+#include "rt.c"
+
+#ifdef CONFIG_SMP
+# include "cpudeadline.c"
+# include "pelt.c"
+#endif
+
+#include "cputime.c"
+#include "deadline.c"
+
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
new file mode 100644
index 000000000000..eec0849b2aae
--- /dev/null
+++ b/kernel/sched/build_utility.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * These are various utility functions of the scheduler,
+ * built in a single compilation unit for build efficiency reasons.
+ *
+ * ( Incidentally, the size of the compilation unit is roughly
+ * comparable to core.c, fair.c, smp.c and policy.c, the other
+ * big compilation units. This helps balance build time, while
+ * coalescing source files to amortize header inclusion
+ * cost. )
+ */
+#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/isolation.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/rseq_api.h>
+#include <linux/sched/task_stack.h>
+
+#include <linux/cpufreq.h>
+#include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/energy_model.h>
+#include <linux/hashtable_api.h>
+#include <linux/irq.h>
+#include <linux/kobject_api.h>
+#include <linux/membarrier.h>
+#include <linux/mempolicy.h>
+#include <linux/nmi.h>
+#include <linux/nospec.h>
+#include <linux/proc_fs.h>
+#include <linux/psi.h>
+#include <linux/psi.h>
+#include <linux/ptrace_api.h>
+#include <linux/sched_clock.h>
+#include <linux/security.h>
+#include <linux/spinlock_api.h>
+#include <linux/swait_api.h>
+#include <linux/timex.h>
+#include <linux/utsname.h>
+#include <linux/wait_api.h>
+#include <linux/workqueue_api.h>
+
+#include <uapi/linux/prctl.h>
+#include <uapi/linux/sched/types.h>
+
+#include <asm/switch_to.h>
+
+#include "sched.h"
+#include "sched-pelt.h"
+#include "stats.h"
+#include "autogroup.h"
+
+#include "clock.c"
+
+#ifdef CONFIG_CGROUP_CPUACCT
+# include "cpuacct.c"
+#endif
+
+#ifdef CONFIG_CPU_FREQ
+# include "cpufreq.c"
+#endif
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+# include "cpufreq_schedutil.c"
+#endif
+
+#ifdef CONFIG_SCHED_DEBUG
+# include "debug.c"
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+# include "stats.c"
+#endif
+
+#include "loadavg.c"
+#include "completion.c"
+#include "swait.c"
+#include "wait_bit.c"
+#include "wait.c"
+
+#ifdef CONFIG_SMP
+# include "cpupri.c"
+# include "stop_task.c"
+# include "topology.c"
+#endif
+
+#ifdef CONFIG_SCHED_CORE
+# include "core_sched.c"
+#endif
+
+#ifdef CONFIG_PSI
+# include "psi.c"
+#endif
+
+#ifdef CONFIG_MEMBARRIER
+# include "membarrier.c"
+#endif
+
+#ifdef CONFIG_CPU_ISOLATION
+# include "isolation.c"
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+# include "autogroup.c"
+#endif
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c2b2859ddd82..d9272d9061a3 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -53,15 +53,13 @@
* that is otherwise invisible (TSC gets stopped).
*
*/
-#include "sched.h"
-#include <linux/sched_clock.h>
/*
* Scheduler clock - returns current time in nanosec units.
* This is default implementation.
* Architectures and sub-architectures can override this.
*/
-unsigned long long __weak sched_clock(void)
+notrace unsigned long long __weak sched_clock(void)
{
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
* (NSEC_PER_SEC / HZ);
@@ -95,28 +93,28 @@ struct sched_clock_data {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
-static inline struct sched_clock_data *this_scd(void)
+notrace static inline struct sched_clock_data *this_scd(void)
{
return this_cpu_ptr(&sched_clock_data);
}
-static inline struct sched_clock_data *cpu_sdc(int cpu)
+notrace static inline struct sched_clock_data *cpu_sdc(int cpu)
{
return &per_cpu(sched_clock_data, cpu);
}
-int sched_clock_stable(void)
+notrace int sched_clock_stable(void)
{
return static_branch_likely(&__sched_clock_stable);
}
-static void __scd_stamp(struct sched_clock_data *scd)
+notrace static void __scd_stamp(struct sched_clock_data *scd)
{
scd->tick_gtod = ktime_get_ns();
scd->tick_raw = sched_clock();
}
-static void __set_sched_clock_stable(void)
+notrace static void __set_sched_clock_stable(void)
{
struct sched_clock_data *scd;
@@ -151,7 +149,7 @@ static void __set_sched_clock_stable(void)
* The only way to fully avoid random clock jumps is to boot with:
* "tsc=unstable".
*/
-static void __sched_clock_work(struct work_struct *work)
+notrace static void __sched_clock_work(struct work_struct *work)
{
struct sched_clock_data *scd;
int cpu;
@@ -177,7 +175,7 @@ static void __sched_clock_work(struct work_struct *work)
static DECLARE_WORK(sched_clock_work, __sched_clock_work);
-static void __clear_sched_clock_stable(void)
+notrace static void __clear_sched_clock_stable(void)
{
if (!sched_clock_stable())
return;
@@ -186,7 +184,7 @@ static void __clear_sched_clock_stable(void)
schedule_work(&sched_clock_work);
}
-void clear_sched_clock_stable(void)
+notrace void clear_sched_clock_stable(void)
{
__sched_clock_stable_early = 0;
@@ -196,7 +194,7 @@ void clear_sched_clock_stable(void)
__clear_sched_clock_stable();
}
-static void __sched_clock_gtod_offset(void)
+notrace static void __sched_clock_gtod_offset(void)
{
struct sched_clock_data *scd = this_scd();
@@ -246,12 +244,12 @@ late_initcall(sched_clock_init_late);
* min, max except they take wrapping into account
*/
-static inline u64 wrap_min(u64 x, u64 y)
+notrace static inline u64 wrap_min(u64 x, u64 y)
{
return (s64)(x - y) < 0 ? x : y;
}
-static inline u64 wrap_max(u64 x, u64 y)
+notrace static inline u64 wrap_max(u64 x, u64 y)
{
return (s64)(x - y) > 0 ? x : y;
}
@@ -262,7 +260,7 @@ static inline u64 wrap_max(u64 x, u64 y)
* - filter out backward motion
* - use the GTOD tick value to create a window to filter crazy TSC values
*/
-static u64 sched_clock_local(struct sched_clock_data *scd)
+notrace static u64 sched_clock_local(struct sched_clock_data *scd)
{
u64 now, clock, old_clock, min_clock, max_clock, gtod;
s64 delta;
@@ -295,7 +293,7 @@ again:
return clock;
}
-static u64 sched_clock_remote(struct sched_clock_data *scd)
+notrace static u64 sched_clock_remote(struct sched_clock_data *scd)
{
struct sched_clock_data *my_scd = this_scd();
u64 this_clock, remote_clock;
@@ -362,7 +360,7 @@ again:
*
* See cpu_clock().
*/
-u64 sched_clock_cpu(int cpu)
+notrace u64 sched_clock_cpu(int cpu)
{
struct sched_clock_data *scd;
u64 clock;
@@ -386,7 +384,7 @@ u64 sched_clock_cpu(int cpu)
}
EXPORT_SYMBOL_GPL(sched_clock_cpu);
-void sched_clock_tick(void)
+notrace void sched_clock_tick(void)
{
struct sched_clock_data *scd;
@@ -403,7 +401,7 @@ void sched_clock_tick(void)
sched_clock_local(scd);
}
-void sched_clock_tick_stable(void)
+notrace void sched_clock_tick_stable(void)
{
if (!sched_clock_stable())
return;
@@ -423,7 +421,7 @@ void sched_clock_tick_stable(void)
/*
* We are going deep-idle (irqs are disabled):
*/
-void sched_clock_idle_sleep_event(void)
+notrace void sched_clock_idle_sleep_event(void)
{
sched_clock_cpu(smp_processor_id());
}
@@ -432,7 +430,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
/*
* We just idled; resync with ktime.
*/
-void sched_clock_idle_wakeup_event(void)
+notrace void sched_clock_idle_wakeup_event(void)
{
unsigned long flags;
@@ -458,7 +456,7 @@ void __init sched_clock_init(void)
local_irq_enable();
}
-u64 sched_clock_cpu(int cpu)
+notrace u64 sched_clock_cpu(int cpu)
{
if (!static_branch_likely(&sched_clock_running))
return 0;
@@ -476,7 +474,7 @@ u64 sched_clock_cpu(int cpu)
* On bare metal this function should return the same as local_clock.
* Architectures and sub-architectures can override this.
*/
-u64 __weak running_clock(void)
+notrace u64 __weak running_clock(void)
{
return local_clock();
}
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index a778554f9dad..35f15c26ed54 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+
/*
* Generic wait-for-completion handler;
*
@@ -11,7 +12,6 @@
* typically be used for exclusion which gives rise to priority inversion.
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
-#include "sched.h"
/**
* complete: - signals a single thread waiting on this completion
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2e4ae00e52d1..d575b4914925 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6,26 +6,90 @@
*
* Copyright (C) 1991-2002 Linus Torvalds
*/
-#define CREATE_TRACE_POINTS
-#include <trace/events/sched.h>
-#undef CREATE_TRACE_POINTS
-
-#include "sched.h"
+#include <linux/highmem.h>
+#include <linux/hrtimer_api.h>
+#include <linux/ktime_api.h>
+#include <linux/sched/signal.h>
+#include <linux/syscalls_api.h>
+#include <linux/debug_locks.h>
+#include <linux/prefetch.h>
+#include <linux/capability.h>
+#include <linux/pgtable_api.h>
+#include <linux/wait_bit.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock_api.h>
+#include <linux/cpumask_api.h>
+#include <linux/lockdep_api.h>
+#include <linux/hardirq.h>
+#include <linux/softirq.h>
+#include <linux/refcount_api.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/cond_resched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/isolation.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/nohz.h>
+#include <linux/sched/rseq_api.h>
+#include <linux/sched/rt.h>
-#include <linux/nospec.h>
#include <linux/blkdev.h>
+#include <linux/context_tracking.h>
+#include <linux/cpuset.h>
+#include <linux/delayacct.h>
+#include <linux/init_task.h>
+#include <linux/interrupt.h>
+#include <linux/ioprio.h>
+#include <linux/kallsyms.h>
#include <linux/kcov.h>
+#include <linux/kprobes.h>
+#include <linux/llist_api.h>
+#include <linux/mmu_context.h>
+#include <linux/mmzone.h>
+#include <linux/mutex_api.h>
+#include <linux/nmi.h>
+#include <linux/nospec.h>
+#include <linux/perf_event_api.h>
+#include <linux/profile.h>
+#include <linux/psi.h>
+#include <linux/rcuwait_api.h>
+#include <linux/sched/wake_q.h>
#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/vtime.h>
+#include <linux/wait_api.h>
+#include <linux/workqueue_api.h>
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+# ifdef CONFIG_GENERIC_ENTRY
+# include <linux/entry-common.h>
+# endif
+#endif
+
+#include <uapi/linux/sched/types.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
-#include "../workqueue_internal.h"
-#include "../../fs/io-wq.h"
-#include "../smpboot.h"
+#define CREATE_TRACE_POINTS
+#include <linux/sched/rseq_api.h>
+#include <trace/events/sched.h>
+#undef CREATE_TRACE_POINTS
+#include "sched.h"
+#include "stats.h"
+#include "autogroup.h"
+
+#include "autogroup.h"
#include "pelt.h"
#include "smp.h"
+#include "stats.h"
+
+#include "../workqueue_internal.h"
+#include "../../fs/io-wq.h"
+#include "../smpboot.h"
/*
* Export tracepoints that act as a bare tracehook (ie: have no trace event
@@ -36,6 +100,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
@@ -1024,13 +1089,13 @@ int get_nohz_timer_target(void)
struct sched_domain *sd;
const struct cpumask *hk_mask;
- if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
+ if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) {
if (!idle_cpu(cpu))
return cpu;
default_cpu = cpu;
}
- hk_mask = housekeeping_cpumask(HK_FLAG_TIMER);
+ hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
rcu_read_lock();
for_each_domain(cpu, sd) {
@@ -1046,7 +1111,7 @@ int get_nohz_timer_target(void)
}
if (default_cpu == -1)
- default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+ default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
cpu = default_cpu;
unlock:
rcu_read_unlock();
@@ -4279,7 +4344,9 @@ DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
#ifdef CONFIG_NUMA_BALANCING
-void set_numabalancing_state(bool enabled)
+int sysctl_numa_balancing_mode;
+
+static void __set_numabalancing_state(bool enabled)
{
if (enabled)
static_branch_enable(&sched_numa_balancing);
@@ -4287,13 +4354,22 @@ void set_numabalancing_state(bool enabled)
static_branch_disable(&sched_numa_balancing);
}
+void set_numabalancing_state(bool enabled)
+{
+ if (enabled)
+ sysctl_numa_balancing_mode = NUMA_BALANCING_NORMAL;
+ else
+ sysctl_numa_balancing_mode = NUMA_BALANCING_DISABLED;
+ __set_numabalancing_state(enabled);
+}
+
#ifdef CONFIG_PROC_SYSCTL
int sysctl_numa_balancing(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
int err;
- int state = static_branch_likely(&sched_numa_balancing);
+ int state = sysctl_numa_balancing_mode;
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -4303,8 +4379,10 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
if (err < 0)
return err;
- if (write)
- set_numabalancing_state(state);
+ if (write) {
+ sysctl_numa_balancing_mode = state;
+ __set_numabalancing_state(state);
+ }
return err;
}
#endif
@@ -4424,6 +4502,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
init_entity_runnable_average(&p->se);
+
#ifdef CONFIG_SCHED_INFO
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -4439,18 +4518,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
return 0;
}
-void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
+void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
{
unsigned long flags;
-#ifdef CONFIG_CGROUP_SCHED
- struct task_group *tg;
-#endif
+ /*
+ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
+ * required yet, but lockdep gets upset if rules are violated.
+ */
raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_CGROUP_SCHED
- tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
- struct task_group, css);
- p->sched_task_group = autogroup_task_group(p, tg);
+ if (1) {
+ struct task_group *tg;
+ tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
+ struct task_group, css);
+ tg = autogroup_task_group(p, tg);
+ p->sched_task_group = tg;
+ }
#endif
rseq_migrate(p);
/*
@@ -4461,7 +4545,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+}
+void sched_post_fork(struct task_struct *p)
+{
uclamp_post_fork(p);
}
@@ -4825,7 +4912,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
{
struct rq *rq = this_rq();
struct mm_struct *mm = rq->prev_mm;
- long prev_state;
+ unsigned int prev_state;
/*
* The previous task will have left us with a preempt_count of 2
@@ -5370,7 +5457,7 @@ static void sched_tick_start(int cpu)
int os;
struct tick_work *twork;
- if (housekeeping_cpu(cpu, HK_FLAG_TICK))
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK))
return;
WARN_ON_ONCE(!tick_work_cpu);
@@ -5391,7 +5478,7 @@ static void sched_tick_stop(int cpu)
struct tick_work *twork;
int os;
- if (housekeeping_cpu(cpu, HK_FLAG_TICK))
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK))
return;
WARN_ON_ONCE(!tick_work_cpu);
@@ -5822,8 +5909,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
if (schedstat_enabled() && rq->core->core_forceidle_count) {
- if (cookie)
- rq->core->core_forceidle_start = rq_clock(rq->core);
+ rq->core->core_forceidle_start = rq_clock(rq->core);
rq->core->core_forceidle_occupation = occ;
}
@@ -6290,7 +6376,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
@@ -6345,8 +6431,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
*/
- if (blk_needs_flush_plug(tsk))
- blk_flush_plug(tsk->plug, true);
+ blk_flush_plug(tsk->plug, true);
}
static void sched_update_worker(struct task_struct *tsk)
@@ -6483,17 +6568,31 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
*/
if (likely(!preemptible()))
return;
-
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
EXPORT_SYMBOL(preempt_schedule);
#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#ifndef preempt_schedule_dynamic_enabled
+#define preempt_schedule_dynamic_enabled preempt_schedule
+#define preempt_schedule_dynamic_disabled NULL
+#endif
+DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
+void __sched notrace dynamic_preempt_schedule(void)
+{
+ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule))
+ return;
+ preempt_schedule();
+}
+NOKPROBE_SYMBOL(dynamic_preempt_schedule);
+EXPORT_SYMBOL(dynamic_preempt_schedule);
+#endif
#endif
-
/**
* preempt_schedule_notrace - preempt_schedule called by tracing
@@ -6548,147 +6647,27 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
-EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#ifndef preempt_schedule_notrace_dynamic_enabled
+#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace
+#define preempt_schedule_notrace_dynamic_disabled NULL
#endif
-
-#endif /* CONFIG_PREEMPTION */
-
-#ifdef CONFIG_PREEMPT_DYNAMIC
-
-#include <linux/entry-common.h>
-
-/*
- * SC:cond_resched
- * SC:might_resched
- * SC:preempt_schedule
- * SC:preempt_schedule_notrace
- * SC:irqentry_exit_cond_resched
- *
- *
- * NONE:
- * cond_resched <- __cond_resched
- * might_resched <- RET0
- * preempt_schedule <- NOP
- * preempt_schedule_notrace <- NOP
- * irqentry_exit_cond_resched <- NOP
- *
- * VOLUNTARY:
- * cond_resched <- __cond_resched
- * might_resched <- __cond_resched
- * preempt_schedule <- NOP
- * preempt_schedule_notrace <- NOP
- * irqentry_exit_cond_resched <- NOP
- *
- * FULL:
- * cond_resched <- RET0
- * might_resched <- RET0
- * preempt_schedule <- preempt_schedule
- * preempt_schedule_notrace <- preempt_schedule_notrace
- * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
- */
-
-enum {
- preempt_dynamic_undefined = -1,
- preempt_dynamic_none,
- preempt_dynamic_voluntary,
- preempt_dynamic_full,
-};
-
-int preempt_dynamic_mode = preempt_dynamic_undefined;
-
-int sched_dynamic_mode(const char *str)
-{
- if (!strcmp(str, "none"))
- return preempt_dynamic_none;
-
- if (!strcmp(str, "voluntary"))
- return preempt_dynamic_voluntary;
-
- if (!strcmp(str, "full"))
- return preempt_dynamic_full;
-
- return -EINVAL;
-}
-
-void sched_dynamic_update(int mode)
-{
- /*
- * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
- * the ZERO state, which is invalid.
- */
- static_call_update(cond_resched, __cond_resched);
- static_call_update(might_resched, __cond_resched);
- static_call_update(preempt_schedule, __preempt_schedule_func);
- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
-
- switch (mode) {
- case preempt_dynamic_none:
- static_call_update(cond_resched, __cond_resched);
- static_call_update(might_resched, (void *)&__static_call_return0);
- static_call_update(preempt_schedule, NULL);
- static_call_update(preempt_schedule_notrace, NULL);
- static_call_update(irqentry_exit_cond_resched, NULL);
- pr_info("Dynamic Preempt: none\n");
- break;
-
- case preempt_dynamic_voluntary:
- static_call_update(cond_resched, __cond_resched);
- static_call_update(might_resched, __cond_resched);
- static_call_update(preempt_schedule, NULL);
- static_call_update(preempt_schedule_notrace, NULL);
- static_call_update(irqentry_exit_cond_resched, NULL);
- pr_info("Dynamic Preempt: voluntary\n");
- break;
-
- case preempt_dynamic_full:
- static_call_update(cond_resched, (void *)&__static_call_return0);
- static_call_update(might_resched, (void *)&__static_call_return0);
- static_call_update(preempt_schedule, __preempt_schedule_func);
- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
- pr_info("Dynamic Preempt: full\n");
- break;
- }
-
- preempt_dynamic_mode = mode;
-}
-
-static int __init setup_preempt_mode(char *str)
-{
- int mode = sched_dynamic_mode(str);
- if (mode < 0) {
- pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
- return 0;
- }
-
- sched_dynamic_update(mode);
- return 1;
-}
-__setup("preempt=", setup_preempt_mode);
-
-static void __init preempt_dynamic_init(void)
+DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
+EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
+void __sched notrace dynamic_preempt_schedule_notrace(void)
{
- if (preempt_dynamic_mode == preempt_dynamic_undefined) {
- if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
- sched_dynamic_update(preempt_dynamic_none);
- } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
- sched_dynamic_update(preempt_dynamic_voluntary);
- } else {
- /* Default static call setting, nothing to do */
- WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
- preempt_dynamic_mode = preempt_dynamic_full;
- pr_info("Dynamic Preempt: full\n");
- }
- }
+ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace))
+ return;
+ preempt_schedule_notrace();
}
+NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace);
+EXPORT_SYMBOL(dynamic_preempt_schedule_notrace);
+#endif
+#endif
-#else /* !CONFIG_PREEMPT_DYNAMIC */
-
-static inline void preempt_dynamic_init(void) { }
-
-#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
+#endif /* CONFIG_PREEMPTION */
/*
* This is the entry point to schedule() from kernel preemption
@@ -8195,11 +8174,35 @@ EXPORT_SYMBOL(__cond_resched);
#endif
#ifdef CONFIG_PREEMPT_DYNAMIC
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#define cond_resched_dynamic_enabled __cond_resched
+#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
EXPORT_STATIC_CALL_TRAMP(cond_resched);
+#define might_resched_dynamic_enabled __cond_resched
+#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
EXPORT_STATIC_CALL_TRAMP(might_resched);
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
+int __sched dynamic_cond_resched(void)
+{
+ if (!static_branch_unlikely(&sk_dynamic_cond_resched))
+ return 0;
+ return __cond_resched();
+}
+EXPORT_SYMBOL(dynamic_cond_resched);
+
+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched);
+int __sched dynamic_might_resched(void)
+{
+ if (!static_branch_unlikely(&sk_dynamic_might_resched))
+ return 0;
+ return __cond_resched();
+}
+EXPORT_SYMBOL(dynamic_might_resched);
+#endif
#endif
/*
@@ -8219,9 +8222,7 @@ int __cond_resched_lock(spinlock_t *lock)
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
spin_lock(lock);
@@ -8239,9 +8240,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
read_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
read_lock(lock);
@@ -8259,9 +8258,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
if (rwlock_needbreak(lock) || resched) {
write_unlock(lock);
- if (resched)
- preempt_schedule_common();
- else
+ if (!_cond_resched())
cpu_relax();
ret = 1;
write_lock(lock);
@@ -8270,6 +8267,154 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_rwlock_write);
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+#ifdef CONFIG_GENERIC_ENTRY
+#include <linux/entry-common.h>
+#endif
+
+/*
+ * SC:cond_resched
+ * SC:might_resched
+ * SC:preempt_schedule
+ * SC:preempt_schedule_notrace
+ * SC:irqentry_exit_cond_resched
+ *
+ *
+ * NONE:
+ * cond_resched <- __cond_resched
+ * might_resched <- RET0
+ * preempt_schedule <- NOP
+ * preempt_schedule_notrace <- NOP
+ * irqentry_exit_cond_resched <- NOP
+ *
+ * VOLUNTARY:
+ * cond_resched <- __cond_resched
+ * might_resched <- __cond_resched
+ * preempt_schedule <- NOP
+ * preempt_schedule_notrace <- NOP
+ * irqentry_exit_cond_resched <- NOP
+ *
+ * FULL:
+ * cond_resched <- RET0
+ * might_resched <- RET0
+ * preempt_schedule <- preempt_schedule
+ * preempt_schedule_notrace <- preempt_schedule_notrace
+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
+ */
+
+enum {
+ preempt_dynamic_undefined = -1,
+ preempt_dynamic_none,
+ preempt_dynamic_voluntary,
+ preempt_dynamic_full,
+};
+
+int preempt_dynamic_mode = preempt_dynamic_undefined;
+
+int sched_dynamic_mode(const char *str)
+{
+ if (!strcmp(str, "none"))
+ return preempt_dynamic_none;
+
+ if (!strcmp(str, "voluntary"))
+ return preempt_dynamic_voluntary;
+
+ if (!strcmp(str, "full"))
+ return preempt_dynamic_full;
+
+ return -EINVAL;
+}
+
+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
+#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
+#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
+#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
+#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
+#else
+#error "Unsupported PREEMPT_DYNAMIC mechanism"
+#endif
+
+void sched_dynamic_update(int mode)
+{
+ /*
+ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
+ * the ZERO state, which is invalid.
+ */
+ preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(might_resched);
+ preempt_dynamic_enable(preempt_schedule);
+ preempt_dynamic_enable(preempt_schedule_notrace);
+ preempt_dynamic_enable(irqentry_exit_cond_resched);
+
+ switch (mode) {
+ case preempt_dynamic_none:
+ preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_disable(might_resched);
+ preempt_dynamic_disable(preempt_schedule);
+ preempt_dynamic_disable(preempt_schedule_notrace);
+ preempt_dynamic_disable(irqentry_exit_cond_resched);
+ pr_info("Dynamic Preempt: none\n");
+ break;
+
+ case preempt_dynamic_voluntary:
+ preempt_dynamic_enable(cond_resched);
+ preempt_dynamic_enable(might_resched);
+ preempt_dynamic_disable(preempt_schedule);
+ preempt_dynamic_disable(preempt_schedule_notrace);
+ preempt_dynamic_disable(irqentry_exit_cond_resched);
+ pr_info("Dynamic Preempt: voluntary\n");
+ break;
+
+ case preempt_dynamic_full:
+ preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(might_resched);
+ preempt_dynamic_enable(preempt_schedule);
+ preempt_dynamic_enable(preempt_schedule_notrace);
+ preempt_dynamic_enable(irqentry_exit_cond_resched);
+ pr_info("Dynamic Preempt: full\n");
+ break;
+ }
+
+ preempt_dynamic_mode = mode;
+}
+
+static int __init setup_preempt_mode(char *str)
+{
+ int mode = sched_dynamic_mode(str);
+ if (mode < 0) {
+ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
+ return 0;
+ }
+
+ sched_dynamic_update(mode);
+ return 1;
+}
+__setup("preempt=", setup_preempt_mode);
+
+static void __init preempt_dynamic_init(void)
+{
+ if (preempt_dynamic_mode == preempt_dynamic_undefined) {
+ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
+ sched_dynamic_update(preempt_dynamic_none);
+ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+ sched_dynamic_update(preempt_dynamic_voluntary);
+ } else {
+ /* Default static call setting, nothing to do */
+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
+ preempt_dynamic_mode = preempt_dynamic_full;
+ pr_info("Dynamic Preempt: full\n");
+ }
+ }
+}
+
+#else /* !CONFIG_PREEMPT_DYNAMIC */
+
+static inline void preempt_dynamic_init(void) { }
+
+#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
+
/**
* yield - yield the current processor to other threads.
*
@@ -8378,9 +8523,7 @@ int io_schedule_prepare(void)
int old_iowait = current->in_iowait;
current->in_iowait = 1;
- if (current->plug)
- blk_flush_plug(current->plug, true);
-
+ blk_flush_plug(current->plug, true);
return old_iowait;
}
@@ -8707,7 +8850,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
{
int ret = 1;
- if (!cpumask_weight(cur))
+ if (cpumask_empty(cur))
return ret;
ret = dl_cpuset_cpumask_can_shrink(cur, trial);
@@ -8735,8 +8878,11 @@ int task_can_attach(struct task_struct *p,
}
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
- cs_cpus_allowed))
- ret = dl_task_can_attach(p, cs_cpus_allowed);
+ cs_cpus_allowed)) {
+ int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
+
+ ret = dl_cpu_busy(cpu, p);
+ }
out:
return ret;
@@ -9020,8 +9166,10 @@ static void cpuset_cpu_active(void)
static int cpuset_cpu_inactive(unsigned int cpu)
{
if (!cpuhp_tasks_frozen) {
- if (dl_cpu_busy(cpu))
- return -EBUSY;
+ int ret = dl_cpu_busy(cpu, NULL);
+
+ if (ret)
+ return ret;
cpuset_update_active_cpus();
} else {
num_cpus_frozen++;
@@ -9051,6 +9199,7 @@ int sched_cpu_activate(unsigned int cpu)
set_cpu_active(cpu, true);
if (sched_smp_initialized) {
+ sched_update_numa(cpu, true);
sched_domains_numa_masks_set(cpu);
cpuset_cpu_active();
}
@@ -9129,10 +9278,12 @@ int sched_cpu_deactivate(unsigned int cpu)
if (!sched_smp_initialized)
return 0;
+ sched_update_numa(cpu, false);
ret = cpuset_cpu_inactive(cpu);
if (ret) {
balance_push_set(cpu, false);
set_cpu_active(cpu, true);
+ sched_update_numa(cpu, true);
return ret;
}
sched_domains_numa_masks_clear(cpu);
@@ -9235,7 +9386,7 @@ int sched_cpu_dying(unsigned int cpu)
void __init sched_init_smp(void)
{
- sched_init_numa();
+ sched_init_numa(NUMA_NO_NODE);
/*
* There's no userspace yet to cause hotplug operations; hence all the
@@ -9247,7 +9398,7 @@ void __init sched_init_smp(void)
mutex_unlock(&sched_domains_mutex);
/* Move init over to a non-isolated CPU */
- if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
BUG();
current->flags &= ~PF_NO_SETAFFINITY;
sched_init_granularity();
@@ -9347,7 +9498,6 @@ void __init sched_init(void)
#endif /* CONFIG_CPUMASK_OFFSTACK */
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
- init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
#ifdef CONFIG_SMP
init_defrootdomain();
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index 1fb45672ec85..38a2cec21014 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -1,8 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/prctl.h>
-#include "sched.h"
-
/*
* A simple wrapper around refcount. An allocated sched_core_cookie's
* address is used to compute the cookie of the task.
@@ -277,7 +274,7 @@ void __sched_core_account_forceidle(struct rq *rq)
rq_i = cpu_rq(i);
p = rq_i->core_pick ?: rq_i->curr;
- if (!p->core_cookie)
+ if (p == rq_i->idle)
continue;
__schedstat_add(p->stats.core_forceidle_sum, delta);
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 3d06c5e4220d..0de9dda09949 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -1,12 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+
/*
* CPU accounting code for task groups.
*
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
* (balbir@in.ibm.com).
*/
-#include <asm/irq_regs.h>
-#include "sched.h"
/* Time spent by the tasks of the CPU accounting group executing in ... */
enum cpuacct_stat_index {
@@ -334,14 +333,13 @@ static struct cftype files[] = {
*/
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
+ unsigned int cpu = task_cpu(tsk);
struct cpuacct *ca;
- rcu_read_lock();
+ lockdep_assert_rq_held(cpu_rq(cpu));
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
- __this_cpu_add(*ca->cpuusage, cputime);
-
- rcu_read_unlock();
+ *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
}
/*
@@ -353,10 +351,8 @@ void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
struct cpuacct *ca;
- rcu_read_lock();
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
__this_cpu_add(ca->cpustat->cpustat[index], val);
- rcu_read_unlock();
}
struct cgroup_subsys cpuacct_cgrp_subsys = {
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index ceb03d76c0cc..02d970a879ed 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -1,12 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * kernel/sched/cpudl.c
+ * kernel/sched/cpudeadline.c
*
* Global CPU deadline management
*
* Author: Juri Lelli <j.lelli@sssup.it>
*/
-#include "sched.h"
static inline int parent(int i)
{
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 7c2fe50fd76d..5252fb191fae 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -5,9 +5,6 @@
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*/
-#include <linux/cpufreq.h>
-
-#include "sched.h"
DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 26778884d9ab..3dbf351d12d5 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -6,13 +6,6 @@
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include "sched.h"
-
-#include <linux/sched/cpufreq.h>
-#include <trace/events/power.h>
-
#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
struct sugov_tunables {
@@ -289,6 +282,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
* into the same scale so we can compare.
*/
boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
+ boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
if (sg_cpu->util < boost)
sg_cpu->util = boost;
}
@@ -348,8 +342,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
/*
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
+ *
+ * Except when the rq is capped by uclamp_max.
*/
- if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
+ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
+ sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
/* Restore cached freq as next_freq has changed */
@@ -395,8 +392,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
/*
* Do not reduce the target performance level if the CPU has not been
* idle recently, as the reduction is likely to be premature then.
+ *
+ * Except when the rq is capped by uclamp_max.
*/
- if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
+ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
+ sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
sg_cpu->util = prev_util;
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
@@ -539,7 +539,7 @@ ATTRIBUTE_GROUPS(sugov);
static void sugov_tunables_free(struct kobject *kobj)
{
- struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+ struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
kfree(to_sugov_tunables(attr_set));
}
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index d583f2aa744e..fa9ce9d83683 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -22,7 +22,6 @@
* worst case complexity of O(min(101, nr_domcpus)), though the scenario that
* yields the worst case search is fairly contrived.
*/
-#include "sched.h"
/*
* p->rt_priority p->prio newpri cpupri
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b7ec42732b28..78a233d43757 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -2,7 +2,6 @@
/*
* Simple CPU accounting cgroup controller
*/
-#include "sched.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d2c072b0ef01..fb4255ae0b2c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -15,10 +15,6 @@
* Michael Trimarchi <michael@amarulasolutions.com>,
* Fabio Checconi <fchecconi@gmail.com>
*/
-#include "sched.h"
-#include "pelt.h"
-
-struct dl_bandwidth def_dl_bandwidth;
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
{
@@ -130,6 +126,21 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
rd->visit_gen = gen;
return false;
}
+
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
+ int i;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ "sched RCU must be held");
+ for_each_cpu_and(i, rd->span, cpu_active_mask) {
+ struct rq *rq = cpu_rq(i);
+
+ rq->dl.extra_bw += bw;
+ }
+}
#else
static inline struct dl_bw *dl_bw_of(int i)
{
@@ -150,9 +161,38 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
{
return false;
}
+
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+ struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
+
+ dl->extra_bw += bw;
+}
#endif
static inline
+void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
+{
+ dl_b->total_bw -= tsk_bw;
+ __dl_update(dl_b, (s32)tsk_bw / cpus);
+}
+
+static inline
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
+{
+ dl_b->total_bw += tsk_bw;
+ __dl_update(dl_b, -((s32)tsk_bw / cpus));
+}
+
+static inline bool
+__dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
+{
+ return dl_b->bw != -1 &&
+ cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
+}
+
+static inline
void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
{
u64 old = dl_rq->running_bw;
@@ -408,7 +448,7 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
{
struct sched_dl_entity *dl_se = &p->dl;
- return dl_rq->root.rb_leftmost == &dl_se->rb_node;
+ return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
}
static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
@@ -423,12 +463,10 @@ void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
void init_dl_bw(struct dl_bw *dl_b)
{
raw_spin_lock_init(&dl_b->lock);
- raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
if (global_rt_runtime() == RUNTIME_INF)
dl_b->bw = -1;
else
dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
- raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
dl_b->total_bw = 0;
}
@@ -683,15 +721,6 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
}
-static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
-{
- return false;
-}
-
-static inline void pull_dl_task(struct rq *rq)
-{
-}
-
static inline void deadline_queue_push_tasks(struct rq *rq)
{
}
@@ -1393,6 +1422,9 @@ void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
timer->function = inactive_task_timer;
}
+#define __node_2_dle(node) \
+ rb_entry((node), struct sched_dl_entity, rb_node)
+
#ifdef CONFIG_SMP
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -1422,10 +1454,9 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
cpudl_clear(&rq->rd->cpudl, rq->cpu);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
} else {
- struct rb_node *leftmost = dl_rq->root.rb_leftmost;
- struct sched_dl_entity *entry;
+ struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
+ struct sched_dl_entity *entry = __node_2_dle(leftmost);
- entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline;
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
}
@@ -1466,9 +1497,6 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
dec_dl_migration(dl_se, dl_rq);
}
-#define __node_2_dle(node) \
- rb_entry((node), struct sched_dl_entity, rb_node)
-
static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
{
return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
@@ -1931,15 +1959,14 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
deadline_queue_push_tasks(rq);
}
-static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
- struct dl_rq *dl_rq)
+static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
{
struct rb_node *left = rb_first_cached(&dl_rq->root);
if (!left)
return NULL;
- return rb_entry(left, struct sched_dl_entity, rb_node);
+ return __node_2_dle(left);
}
static struct task_struct *pick_task_dl(struct rq *rq)
@@ -1951,7 +1978,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
if (!sched_dl_runnable(rq))
return NULL;
- dl_se = pick_next_dl_entity(rq, dl_rq);
+ dl_se = pick_next_dl_entity(dl_rq);
BUG_ON(!dl_se);
p = dl_task_of(dl_se);
@@ -2034,15 +2061,17 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
*/
static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
{
- struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
struct task_struct *p = NULL;
+ struct rb_node *next_node;
if (!has_pushable_dl_tasks(rq))
return NULL;
+ next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
+
next_node:
if (next_node) {
- p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
+ p = __node_2_pdl(next_node);
if (pick_dl_task(rq, p, cpu))
return p;
@@ -2208,8 +2237,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
if (!has_pushable_dl_tasks(rq))
return NULL;
- p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
- struct task_struct, pushable_dl_tasks);
+ p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p));
@@ -2240,12 +2268,6 @@ static int push_dl_task(struct rq *rq)
return 0;
retry:
- if (is_migration_disabled(next_task))
- return 0;
-
- if (WARN_ON(next_task == rq->curr))
- return 0;
-
/*
* If next_task preempts rq->curr, and rq->curr
* can move away, it makes sense to just reschedule
@@ -2258,6 +2280,12 @@ retry:
return 0;
}
+ if (is_migration_disabled(next_task))
+ return 0;
+
+ if (WARN_ON(next_task == rq->curr))
+ return 0;
+
/* We might release rq lock */
get_task_struct(next_task);
@@ -2731,9 +2759,6 @@ void sched_dl_do_global(void)
int cpu;
unsigned long flags;
- def_dl_bandwidth.dl_period = global_rt_period();
- def_dl_bandwidth.dl_runtime = global_rt_runtime();
-
if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
@@ -2955,41 +2980,6 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
}
#ifdef CONFIG_SMP
-int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
-{
- unsigned long flags, cap;
- unsigned int dest_cpu;
- struct dl_bw *dl_b;
- bool overflow;
- int ret;
-
- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
-
- rcu_read_lock_sched();
- dl_b = dl_bw_of(dest_cpu);
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cap = dl_bw_capacity(dest_cpu);
- overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
- if (overflow) {
- ret = -EBUSY;
- } else {
- /*
- * We reserve space for this task in the destination
- * root_domain, as we can't fail after this point.
- * We will free resources in the source root_domain
- * later on (see set_cpus_allowed_dl()).
- */
- int cpus = dl_bw_cpus(dest_cpu);
-
- __dl_add(dl_b, p->dl.dl_bw, cpus);
- ret = 0;
- }
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- rcu_read_unlock_sched();
-
- return ret;
-}
-
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
@@ -3011,7 +3001,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret;
}
-bool dl_cpu_busy(unsigned int cpu)
+int dl_cpu_busy(int cpu, struct task_struct *p)
{
unsigned long flags, cap;
struct dl_bw *dl_b;
@@ -3021,11 +3011,22 @@ bool dl_cpu_busy(unsigned int cpu)
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
cap = dl_bw_capacity(cpu);
- overflow = __dl_overflow(dl_b, cap, 0, 0);
+ overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
+
+ if (!overflow && p) {
+ /*
+ * We reserve space for this task in the destination
+ * root_domain, as we can't fail after this point.
+ * We will free resources in the source root_domain
+ * later on (see set_cpus_allowed_dl()).
+ */
+ __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
+ }
+
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched();
- return overflow;
+ return overflow ? -EBUSY : 0;
}
#endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index aa29211de1bf..bb3d63bdf4ae 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -6,7 +6,6 @@
*
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
*/
-#include "sched.h"
/*
* This allows printing both to /proc/sched_debug and
@@ -931,25 +930,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
{
#ifdef CONFIG_NUMA_BALANCING
- struct mempolicy *pol;
-
if (p->mm)
P(mm->numa_scan_seq);
- task_lock(p);
- pol = p->mempolicy;
- if (pol && !(pol->flags & MPOL_F_MORON))
- pol = NULL;
- mpol_get(pol);
- task_unlock(p);
-
P(numa_pages_migrated);
P(numa_preferred_nid);
P(total_numa_faults);
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
task_node(p), task_numa_group_id(p));
show_numa_stats(p, m);
- mpol_put(pol);
#endif
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 095b0aa378df..ee0664c9d291 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,7 +20,38 @@
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*/
+#include <linux/energy_model.h>
+#include <linux/mmap_lock.h>
+#include <linux/hugetlb_inline.h>
+#include <linux/jiffies.h>
+#include <linux/mm_api.h>
+#include <linux/highmem.h>
+#include <linux/spinlock_api.h>
+#include <linux/cpumask_api.h>
+#include <linux/lockdep_api.h>
+#include <linux/softirq.h>
+#include <linux/refcount_api.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/cond_resched.h>
+#include <linux/sched/cputime.h>
+#include <linux/sched/isolation.h>
+
+#include <linux/cpuidle.h>
+#include <linux/interrupt.h>
+#include <linux/mempolicy.h>
+#include <linux/mutex_api.h>
+#include <linux/profile.h>
+#include <linux/psi.h>
+#include <linux/ratelimit.h>
+
+#include <asm/switch_to.h>
+
+#include <linux/sched/cond_resched.h>
+
#include "sched.h"
+#include "stats.h"
+#include "autogroup.h"
/*
* Targeted preemption latency for CPU-bound tasks:
@@ -1259,10 +1290,10 @@ static bool numa_is_active_node(int nid, struct numa_group *ng)
/* Handle placement on systems where not all nodes are directly connected. */
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
- int maxdist, bool task)
+ int lim_dist, bool task)
{
unsigned long score = 0;
- int node;
+ int node, max_dist;
/*
* All nodes are directly connected, and the same distance
@@ -1271,6 +1302,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
if (sched_numa_topology_type == NUMA_DIRECT)
return 0;
+ /* sched_max_numa_distance may be changed in parallel. */
+ max_dist = READ_ONCE(sched_max_numa_distance);
/*
* This code is called for each node, introducing N^2 complexity,
* which should be ok given the number of nodes rarely exceeds 8.
@@ -1283,7 +1316,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
* The furthest away nodes in the system are not interesting
* for placement; nid was already counted.
*/
- if (dist == sched_max_numa_distance || node == nid)
+ if (dist >= max_dist || node == nid)
continue;
/*
@@ -1293,8 +1326,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
* "hoplimit", only nodes closer by than "hoplimit" are part
* of each group. Skip other nodes.
*/
- if (sched_numa_topology_type == NUMA_BACKPLANE &&
- dist >= maxdist)
+ if (sched_numa_topology_type == NUMA_BACKPLANE && dist >= lim_dist)
continue;
/* Add up the faults from nearby nodes. */
@@ -1312,8 +1344,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
* This seems to result in good task placement.
*/
if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
- faults *= (sched_max_numa_distance - dist);
- faults /= (sched_max_numa_distance - LOCAL_DISTANCE);
+ faults *= (max_dist - dist);
+ faults /= (max_dist - LOCAL_DISTANCE);
}
score += faults;
@@ -1489,6 +1521,7 @@ struct task_numa_env {
int src_cpu, src_nid;
int dst_cpu, dst_nid;
+ int imb_numa_nr;
struct numa_stats src_stats, dst_stats;
@@ -1503,7 +1536,7 @@ struct task_numa_env {
static unsigned long cpu_load(struct rq *rq);
static unsigned long cpu_runnable(struct rq *rq);
static inline long adjust_numa_imbalance(int imbalance,
- int dst_running, int dst_weight);
+ int dst_running, int imb_numa_nr);
static inline enum
numa_type numa_classify(unsigned int imbalance_pct,
@@ -1884,7 +1917,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
dst_running = env->dst_stats.nr_running + 1;
imbalance = max(0, dst_running - src_running);
imbalance = adjust_numa_imbalance(imbalance, dst_running,
- env->dst_stats.weight);
+ env->imb_numa_nr);
/* Use idle CPU if there is no imbalance */
if (!imbalance) {
@@ -1949,8 +1982,10 @@ static int task_numa_migrate(struct task_struct *p)
*/
rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
- if (sd)
+ if (sd) {
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
+ env.imb_numa_nr = sd->imb_numa_nr;
+ }
rcu_read_unlock();
/*
@@ -1985,7 +2020,7 @@ static int task_numa_migrate(struct task_struct *p)
*/
ng = deref_curr_numa_group(p);
if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
- for_each_online_node(nid) {
+ for_each_node_state(nid, N_CPU) {
if (nid == env.src_nid || nid == p->numa_preferred_nid)
continue;
@@ -2083,13 +2118,13 @@ static void numa_group_count_active_nodes(struct numa_group *numa_group)
unsigned long faults, max_faults = 0;
int nid, active_nodes = 0;
- for_each_online_node(nid) {
+ for_each_node_state(nid, N_CPU) {
faults = group_faults_cpu(numa_group, nid);
if (faults > max_faults)
max_faults = faults;
}
- for_each_online_node(nid) {
+ for_each_node_state(nid, N_CPU) {
faults = group_faults_cpu(numa_group, nid);
if (faults * ACTIVE_NODE_FRACTION > max_faults)
active_nodes++;
@@ -2243,7 +2278,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
dist = sched_max_numa_distance;
- for_each_online_node(node) {
+ for_each_node_state(node, N_CPU) {
score = group_weight(p, node, dist);
if (score > max_score) {
max_score = score;
@@ -2262,7 +2297,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
* inside the highest scoring group of nodes. The nodemask tricks
* keep the complexity of the search down.
*/
- nodes = node_online_map;
+ nodes = node_states[N_CPU];
for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
unsigned long max_faults = 0;
nodemask_t max_group = NODE_MASK_NONE;
@@ -2401,6 +2436,21 @@ static void task_numa_placement(struct task_struct *p)
}
}
+ /* Cannot migrate task to CPU-less node */
+ if (max_nid != NUMA_NO_NODE && !node_state(max_nid, N_CPU)) {
+ int near_nid = max_nid;
+ int distance, near_distance = INT_MAX;
+
+ for_each_node_state(nid, N_CPU) {
+ distance = node_distance(max_nid, nid);
+ if (distance < near_distance) {
+ near_nid = nid;
+ near_distance = distance;
+ }
+ }
+ max_nid = near_nid;
+ }
+
if (ng) {
numa_group_count_active_nodes(ng);
spin_unlock_irq(group_lock);
@@ -2825,6 +2875,8 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
/* Protect against double add, see task_tick_numa and task_numa_work */
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
+ p->numa_pages_migrated = 0;
+ p->total_numa_faults = 0;
RCU_INIT_POINTER(p->numa_group, NULL);
p->last_task_numa_placement = 0;
p->last_sum_exec_runtime = 0;
@@ -3028,9 +3080,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- u32 divider = get_pelt_divider(&se->avg);
sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
#else
static inline void
@@ -3381,7 +3435,6 @@ void set_task_rq_fair(struct sched_entity *se,
se->avg.last_update_time = n_last_update_time;
}
-
/*
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
* propagate its contribution. The key to this propagation is the invariant
@@ -3449,15 +3502,14 @@ void set_task_rq_fair(struct sched_entity *se,
* XXX: only do this for the part of runnable > running ?
*
*/
-
static inline void
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3466,23 +3518,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
*/
divider = get_pelt_divider(&cfs_rq->avg);
+
/* Set new sched_entity's utilization */
se->avg.util_avg = gcfs_rq->avg.util_avg;
- se->avg.util_sum = se->avg.util_avg * divider;
+ new_sum = se->avg.util_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.util_sum;
+ se->avg.util_sum = new_sum;
/* Update parent cfs_rq utilization */
- add_positive(&cfs_rq->avg.util_avg, delta);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ add_positive(&cfs_rq->avg.util_avg, delta_avg);
+ add_positive(&cfs_rq->avg.util_sum, delta_sum);
+
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
- u32 divider;
+ long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+ u32 new_sum, divider;
/* Nothing to update */
- if (!delta)
+ if (!delta_avg)
return;
/*
@@ -3493,19 +3552,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
/* Set new sched_entity's runnable */
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
- se->avg.runnable_sum = se->avg.runnable_avg * divider;
+ new_sum = se->avg.runnable_avg * divider;
+ delta_sum = (long)new_sum - (long)se->avg.runnable_sum;
+ se->avg.runnable_sum = new_sum;
/* Update parent cfs_rq runnable */
- add_positive(&cfs_rq->avg.runnable_avg, delta);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ add_positive(&cfs_rq->avg.runnable_avg, delta_avg);
+ add_positive(&cfs_rq->avg.runnable_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
}
static inline void
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
{
- long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+ long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
unsigned long load_avg;
u64 load_sum = 0;
+ s64 delta_sum;
u32 divider;
if (!runnable_sum)
@@ -3532,7 +3597,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
* assuming all tasks are equally runnable.
*/
if (scale_load_down(gcfs_rq->load.weight)) {
- load_sum = div_s64(gcfs_rq->avg.load_sum,
+ load_sum = div_u64(gcfs_rq->avg.load_sum,
scale_load_down(gcfs_rq->load.weight));
}
@@ -3549,19 +3614,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
runnable_sum = max(runnable_sum, running_sum);
- load_sum = (s64)se_weight(se) * runnable_sum;
- load_avg = div_s64(load_sum, divider);
+ load_sum = se_weight(se) * runnable_sum;
+ load_avg = div_u64(load_sum, divider);
- se->avg.load_sum = runnable_sum;
-
- delta = load_avg - se->avg.load_avg;
- if (!delta)
+ delta_avg = load_avg - se->avg.load_avg;
+ if (!delta_avg)
return;
- se->avg.load_avg = load_avg;
+ delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
- add_positive(&cfs_rq->avg.load_avg, delta);
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
+ se->avg.load_sum = runnable_sum;
+ se->avg.load_avg = load_avg;
+ add_positive(&cfs_rq->avg.load_avg, delta_avg);
+ add_positive(&cfs_rq->avg.load_sum, delta_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum,
+ cfs_rq->avg.load_avg * PELT_MIN_DIVIDER);
}
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3652,7 +3720,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
- * Returns true if the load decayed or we removed load.
+ * Return: true if the load decayed or we removed load.
*
* Since both these conditions indicate a changed cfs_rq->avg.load we should
* call update_tg_load_avg() when this function returns true.
@@ -3677,15 +3745,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
r = removed_load;
sub_positive(&sa->load_avg, r);
- sa->load_sum = sa->load_avg * divider;
+ sub_positive(&sa->load_sum, r * divider);
+ /* See sa->util_sum below */
+ sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER);
r = removed_util;
sub_positive(&sa->util_avg, r);
- sa->util_sum = sa->util_avg * divider;
+ sub_positive(&sa->util_sum, r * divider);
+ /*
+ * Because of rounding, se->util_sum might ends up being +1 more than
+ * cfs->util_sum. Although this is not a problem by itself, detaching
+ * a lot of tasks with the rounding problem between 2 updates of
+ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
+ * cfs_util_avg is not.
+ * Check that util_sum is still above its lower bound for the new
+ * util_avg. Given that period_contrib might have moved since the last
+ * sync, we are only sure that util_sum must be above or equal to
+ * util_avg * minimum possible divider
+ */
+ sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
r = removed_runnable;
sub_positive(&sa->runnable_avg, r);
- sa->runnable_sum = sa->runnable_avg * divider;
+ sub_positive(&sa->runnable_sum, r * divider);
+ /* See sa->util_sum above */
+ sa->runnable_sum = max_t(u32, sa->runnable_sum,
+ sa->runnable_avg * PELT_MIN_DIVIDER);
/*
* removed_runnable is the unweighted version of removed_load so we
@@ -3772,17 +3857,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
*/
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- /*
- * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
- * See ___update_load_avg() for details.
- */
- u32 divider = get_pelt_divider(&cfs_rq->avg);
-
dequeue_load_avg(cfs_rq, se);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
- cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
+ sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
+ cfs_rq->avg.util_avg * PELT_MIN_DIVIDER);
+
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
- cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
+ sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+ /* See update_cfs_rq_load_avg() */
+ cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum,
+ cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER);
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
@@ -8539,6 +8625,8 @@ group_type group_classify(unsigned int imbalance_pct,
*
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
* of @dst_cpu are idle and @sg has lower priority.
+ *
+ * Return: true if @dst_cpu can pull tasks, false otherwise.
*/
static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sg_lb_stats *sgs,
@@ -8614,6 +8702,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
+ * @sds: Load-balancing data with statistics of the local group.
* @group: sched_group whose statistics are to be updated.
* @sgs: variable to hold the statistics for this group.
* @sg_status: Holds flag indicating the status of the sched_group
@@ -9003,9 +9092,9 @@ static bool update_pick_idlest(struct sched_group *idlest,
* This is an approximation as the number of running tasks may not be
* related to the number of busy CPUs due to sched_setaffinity.
*/
-static inline bool allow_numa_imbalance(int dst_running, int dst_weight)
+static inline bool allow_numa_imbalance(int running, int imb_numa_nr)
{
- return (dst_running < (dst_weight >> 2));
+ return running <= imb_numa_nr;
}
/*
@@ -9139,12 +9228,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
return idlest;
#endif
/*
- * Otherwise, keep the task on this node to stay close
- * its wakeup source and improve locality. If there is
- * a real need of migration, periodic load balance will
- * take care of it.
+ * Otherwise, keep the task close to the wakeup source
+ * and improve locality if the number of running tasks
+ * would remain below threshold where an imbalance is
+ * allowed. If there is a real need of migration,
+ * periodic load balance will take care of it.
*/
- if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight))
+ if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, sd->imb_numa_nr))
return NULL;
}
@@ -9236,9 +9326,9 @@ next_group:
#define NUMA_IMBALANCE_MIN 2
static inline long adjust_numa_imbalance(int imbalance,
- int dst_running, int dst_weight)
+ int dst_running, int imb_numa_nr)
{
- if (!allow_numa_imbalance(dst_running, dst_weight))
+ if (!allow_numa_imbalance(dst_running, imb_numa_nr))
return imbalance;
/*
@@ -9350,7 +9440,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
/* Consider allowing a small imbalance between NUMA groups */
if (env->sd->flags & SD_NUMA) {
env->imbalance = adjust_numa_imbalance(env->imbalance,
- busiest->sum_nr_running, busiest->group_weight);
+ local->sum_nr_running + 1, env->sd->imb_numa_nr);
}
return;
@@ -9421,12 +9511,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
/**
* find_busiest_group - Returns the busiest group within the sched_domain
* if there is an imbalance.
+ * @env: The load balancing environment.
*
* Also calculates the amount of runnable load which should be moved
* to restore balance.
*
- * @env: The load balancing environment.
- *
* Return: - The busiest group if imbalance exists.
*/
static struct sched_group *find_busiest_group(struct lb_env *env)
@@ -10315,7 +10404,7 @@ static inline int on_null_domain(struct rq *rq)
* - When one of the busy CPUs notice that there may be an idle rebalancing
* needed, they will kick the idle load balancer, which then does idle
* load balancing for all the idle CPUs.
- * - HK_FLAG_MISC CPUs are used for this task, because HK_FLAG_SCHED not set
+ * - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED not set
* anywhere yet.
*/
@@ -10324,7 +10413,7 @@ static inline int find_new_ilb(void)
int ilb;
const struct cpumask *hk_mask;
- hk_mask = housekeeping_cpumask(HK_FLAG_MISC);
+ hk_mask = housekeeping_cpumask(HK_TYPE_MISC);
for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
@@ -10340,7 +10429,7 @@ static inline int find_new_ilb(void)
/*
* Kick a CPU to do the nohz balancing, if it is time for it. We pick any
- * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
+ * idle CPU in the HK_TYPE_MISC housekeeping set (if there is one).
*/
static void kick_ilb(unsigned int flags)
{
@@ -10553,7 +10642,7 @@ void nohz_balance_enter_idle(int cpu)
return;
/* Spare idle load balancing on CPUs that don't want to be disturbed: */
- if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
+ if (!housekeeping_cpu(cpu, HK_TYPE_SCHED))
return;
/*
@@ -10769,7 +10858,7 @@ static void nohz_newidle_balance(struct rq *this_rq)
* This CPU doesn't want to be disturbed by scheduler
* housekeeping
*/
- if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
+ if (!housekeeping_cpu(this_cpu, HK_TYPE_SCHED))
return;
/* Will wake up very soon. No time for doing anything else*/
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index d17b0a5ce6ac..8f8b5020e76a 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -6,9 +6,6 @@
* (NOTE: these are not related to SCHED_IDLE batch scheduled
* tasks which are handled in sched/fair.c )
*/
-#include "sched.h"
-
-#include <trace/events/power.h>
/* Linker adds these: start and end of __cpuidle functions */
extern char __cpuidle_text_start[], __cpuidle_text_end[];
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 7f06eaf12818..373d42c707bc 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -7,136 +7,179 @@
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
*
*/
-#include "sched.h"
+
+enum hk_flags {
+ HK_FLAG_TIMER = BIT(HK_TYPE_TIMER),
+ HK_FLAG_RCU = BIT(HK_TYPE_RCU),
+ HK_FLAG_MISC = BIT(HK_TYPE_MISC),
+ HK_FLAG_SCHED = BIT(HK_TYPE_SCHED),
+ HK_FLAG_TICK = BIT(HK_TYPE_TICK),
+ HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
+ HK_FLAG_WQ = BIT(HK_TYPE_WQ),
+ HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
+ HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD),
+};
DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
EXPORT_SYMBOL_GPL(housekeeping_overridden);
-static cpumask_var_t housekeeping_mask;
-static unsigned int housekeeping_flags;
-bool housekeeping_enabled(enum hk_flags flags)
+struct housekeeping {
+ cpumask_var_t cpumasks[HK_TYPE_MAX];
+ unsigned long flags;
+};
+
+static struct housekeeping housekeeping;
+
+bool housekeeping_enabled(enum hk_type type)
{
- return !!(housekeeping_flags & flags);
+ return !!(housekeeping.flags & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
-int housekeeping_any_cpu(enum hk_flags flags)
+int housekeeping_any_cpu(enum hk_type type)
{
int cpu;
if (static_branch_unlikely(&housekeeping_overridden)) {
- if (housekeeping_flags & flags) {
- cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
+ if (housekeeping.flags & BIT(type)) {
+ cpu = sched_numa_find_closest(housekeeping.cpumasks[type], smp_processor_id());
if (cpu < nr_cpu_ids)
return cpu;
- return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+ return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
}
}
return smp_processor_id();
}
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
-const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
+const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
- return housekeeping_mask;
+ if (housekeeping.flags & BIT(type))
+ return housekeeping.cpumasks[type];
return cpu_possible_mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
-void housekeeping_affine(struct task_struct *t, enum hk_flags flags)
+void housekeeping_affine(struct task_struct *t, enum hk_type type)
{
if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
- set_cpus_allowed_ptr(t, housekeeping_mask);
+ if (housekeeping.flags & BIT(type))
+ set_cpus_allowed_ptr(t, housekeeping.cpumasks[type]);
}
EXPORT_SYMBOL_GPL(housekeeping_affine);
-bool housekeeping_test_cpu(int cpu, enum hk_flags flags)
+bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
- return cpumask_test_cpu(cpu, housekeeping_mask);
+ if (housekeeping.flags & BIT(type))
+ return cpumask_test_cpu(cpu, housekeeping.cpumasks[type]);
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
void __init housekeeping_init(void)
{
- if (!housekeeping_flags)
+ enum hk_type type;
+
+ if (!housekeeping.flags)
return;
static_branch_enable(&housekeeping_overridden);
- if (housekeeping_flags & HK_FLAG_TICK)
+ if (housekeeping.flags & HK_FLAG_TICK)
sched_tick_offload_init();
- /* We need at least one CPU to handle housekeeping work */
- WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
+ for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
+ /* We need at least one CPU to handle housekeeping work */
+ WARN_ON_ONCE(cpumask_empty(housekeeping.cpumasks[type]));
+ }
}
-static int __init housekeeping_setup(char *str, enum hk_flags flags)
+static void __init housekeeping_setup_type(enum hk_type type,
+ cpumask_var_t housekeeping_staging)
{
- cpumask_var_t non_housekeeping_mask;
- cpumask_var_t tmp;
+
+ alloc_bootmem_cpumask_var(&housekeeping.cpumasks[type]);
+ cpumask_copy(housekeeping.cpumasks[type],
+ housekeeping_staging);
+}
+
+static int __init housekeeping_setup(char *str, unsigned long flags)
+{
+ cpumask_var_t non_housekeeping_mask, housekeeping_staging;
+ int err = 0;
+
+ if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
+ if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+ pr_warn("Housekeeping: nohz unsupported."
+ " Build with CONFIG_NO_HZ_FULL\n");
+ return 0;
+ }
+ }
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
- free_bootmem_cpumask_var(non_housekeeping_mask);
- return 0;
+ goto free_non_housekeeping_mask;
}
- alloc_bootmem_cpumask_var(&tmp);
- if (!housekeeping_flags) {
- alloc_bootmem_cpumask_var(&housekeeping_mask);
- cpumask_andnot(housekeeping_mask,
- cpu_possible_mask, non_housekeeping_mask);
+ alloc_bootmem_cpumask_var(&housekeeping_staging);
+ cpumask_andnot(housekeeping_staging,
+ cpu_possible_mask, non_housekeeping_mask);
- cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
- if (cpumask_empty(tmp)) {
+ if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
+ __cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
+ __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
+ if (!housekeeping.flags) {
pr_warn("Housekeeping: must include one present CPU, "
"using boot CPU:%d\n", smp_processor_id());
- __cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
- __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
- }
- } else {
- cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
- if (cpumask_empty(tmp))
- __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
- cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
- if (!cpumask_equal(tmp, housekeeping_mask)) {
- pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
- free_bootmem_cpumask_var(tmp);
- free_bootmem_cpumask_var(non_housekeeping_mask);
- return 0;
}
}
- free_bootmem_cpumask_var(tmp);
- if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
- if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
- tick_nohz_full_setup(non_housekeeping_mask);
- } else {
- pr_warn("Housekeeping: nohz unsupported."
- " Build with CONFIG_NO_HZ_FULL\n");
- free_bootmem_cpumask_var(non_housekeeping_mask);
- return 0;
+ if (!housekeeping.flags) {
+ /* First setup call ("nohz_full=" or "isolcpus=") */
+ enum hk_type type;
+
+ for_each_set_bit(type, &flags, HK_TYPE_MAX)
+ housekeeping_setup_type(type, housekeeping_staging);
+ } else {
+ /* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */
+ enum hk_type type;
+ unsigned long iter_flags = flags & housekeeping.flags;
+
+ for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) {
+ if (!cpumask_equal(housekeeping_staging,
+ housekeeping.cpumasks[type])) {
+ pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
+ goto free_housekeeping_staging;
+ }
}
+
+ iter_flags = flags & ~housekeeping.flags;
+
+ for_each_set_bit(type, &iter_flags, HK_TYPE_MAX)
+ housekeeping_setup_type(type, housekeeping_staging);
}
- housekeeping_flags |= flags;
+ if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK))
+ tick_nohz_full_setup(non_housekeeping_mask);
+
+ housekeeping.flags |= flags;
+ err = 1;
+free_housekeeping_staging:
+ free_bootmem_cpumask_var(housekeeping_staging);
+free_non_housekeeping_mask:
free_bootmem_cpumask_var(non_housekeeping_mask);
- return 1;
+ return err;
}
static int __init housekeeping_nohz_full_setup(char *str)
{
- unsigned int flags;
+ unsigned long flags;
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
HK_FLAG_MISC | HK_FLAG_KTHREAD;
@@ -147,7 +190,7 @@ __setup("nohz_full=", housekeeping_nohz_full_setup);
static int __init housekeeping_isolcpus_setup(char *str)
{
- unsigned int flags = 0;
+ unsigned long flags = 0;
bool illegal = false;
char *par;
int len;
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 954b229868d9..52c8f8226b0d 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -6,7 +6,6 @@
* figure. Its a silly number but people think its important. We go through
* great pains to make it work on big machines and tickless kernels.
*/
-#include "sched.h"
/*
* Global load-average calculations
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index b5add64d9698..0c5be7ebb1dc 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -4,7 +4,6 @@
*
* membarrier system call
*/
-#include "sched.h"
/*
* For documentation purposes, here are some membarrier ordering
@@ -147,11 +146,11 @@
#endif
#ifdef CONFIG_RSEQ
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
#else
-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
+#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
#endif
#define MEMBARRIER_CMD_BITMASK \
@@ -159,7 +158,8 @@
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
- | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
+ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
static void ipi_mb(void *info)
{
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index a554e3bbab2b..0f310768260c 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -24,10 +24,6 @@
* Author: Vincent Guittot <vincent.guittot@linaro.org>
*/
-#include <linux/sched.h>
-#include "sched.h"
-#include "pelt.h"
-
/*
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index e06071bf3472..c336f5f481bc 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
}
#endif
+#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
+
static inline u32 get_pelt_divider(struct sched_avg *avg)
{
- return LOAD_AVG_MAX - 1024 + avg->period_contrib;
+ return PELT_MIN_DIVIDER + avg->period_contrib;
}
static inline void cfs_se_util_change(struct sched_avg *avg)
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index a679613a7cb7..a4fa3aadfcba 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -137,21 +137,6 @@
* sampling of the aggregate task states would be.
*/
-#include "../workqueue_internal.h"
-#include <linux/sched/loadavg.h>
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/seqlock.h>
-#include <linux/uaccess.h>
-#include <linux/cgroup.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/ctype.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/psi.h>
-#include "sched.h"
-
static int psi_bug __read_mostly;
DEFINE_STATIC_KEY_FALSE(psi_disabled);
@@ -523,7 +508,7 @@ static void init_triggers(struct psi_group *group, u64 now)
static u64 update_triggers(struct psi_group *group, u64 now)
{
struct psi_trigger *t;
- bool new_stall = false;
+ bool update_total = false;
u64 *total = group->total[PSI_POLL];
/*
@@ -532,24 +517,35 @@ static u64 update_triggers(struct psi_group *group, u64 now)
*/
list_for_each_entry(t, &group->triggers, node) {
u64 growth;
+ bool new_stall;
- /* Check for stall activity */
- if (group->polling_total[t->state] == total[t->state])
- continue;
+ new_stall = group->polling_total[t->state] != total[t->state];
+ /* Check for stall activity or a previous threshold breach */
+ if (!new_stall && !t->pending_event)
+ continue;
/*
- * Multiple triggers might be looking at the same state,
- * remember to update group->polling_total[] once we've
- * been through all of them. Also remember to extend the
- * polling time if we see new stall activity.
+ * Check for new stall activity, as well as deferred
+ * events that occurred in the last window after the
+ * trigger had already fired (we want to ratelimit
+ * events without dropping any).
*/
- new_stall = true;
-
- /* Calculate growth since last update */
- growth = window_update(&t->win, now, total[t->state]);
- if (growth < t->threshold)
- continue;
-
+ if (new_stall) {
+ /*
+ * Multiple triggers might be looking at the same state,
+ * remember to update group->polling_total[] once we've
+ * been through all of them. Also remember to extend the
+ * polling time if we see new stall activity.
+ */
+ update_total = true;
+
+ /* Calculate growth since last update */
+ growth = window_update(&t->win, now, total[t->state]);
+ if (growth < t->threshold)
+ continue;
+
+ t->pending_event = true;
+ }
/* Limit event signaling to once per window */
if (now < t->last_event_time + t->win.size)
continue;
@@ -558,9 +554,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
if (cmpxchg(&t->event, 0, 1) == 0)
wake_up_interruptible(&t->event_wait);
t->last_event_time = now;
+ /* Reset threshold breach flag once event got generated */
+ t->pending_event = false;
}
- if (new_stall)
+ if (update_total)
memcpy(group->polling_total, total,
sizeof(group->polling_total));
@@ -1082,44 +1080,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
return 0;
}
-static int psi_io_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_IO);
-}
-
-static int psi_memory_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_MEM);
-}
-
-static int psi_cpu_show(struct seq_file *m, void *v)
-{
- return psi_show(m, &psi_system, PSI_CPU);
-}
-
-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
-{
- if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- return single_open(file, psi_show, NULL);
-}
-
-static int psi_io_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_io_show);
-}
-
-static int psi_memory_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_memory_show);
-}
-
-static int psi_cpu_open(struct inode *inode, struct file *file)
-{
- return psi_open(file, psi_cpu_show);
-}
-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res)
{
@@ -1162,7 +1122,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
- kref_init(&t->refcount);
+ t->pending_event = false;
mutex_lock(&group->trigger_lock);
@@ -1191,15 +1151,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
return t;
}
-static void psi_trigger_destroy(struct kref *ref)
+void psi_trigger_destroy(struct psi_trigger *t)
{
- struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
- struct psi_group *group = t->group;
+ struct psi_group *group;
struct task_struct *task_to_destroy = NULL;
- if (static_branch_likely(&psi_disabled))
+ /*
+ * We do not check psi_disabled since it might have been disabled after
+ * the trigger got created.
+ */
+ if (!t)
return;
+ group = t->group;
/*
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
* from under a polling process.
@@ -1235,9 +1199,9 @@ static void psi_trigger_destroy(struct kref *ref)
mutex_unlock(&group->trigger_lock);
/*
- * Wait for both *trigger_ptr from psi_trigger_replace and
- * poll_task RCUs to complete their read-side critical sections
- * before destroying the trigger and optionally the poll_task
+ * Wait for psi_schedule_poll_work RCU to complete its read-side
+ * critical section before destroying the trigger and optionally the
+ * poll_task.
*/
synchronize_rcu();
/*
@@ -1254,18 +1218,6 @@ static void psi_trigger_destroy(struct kref *ref)
kfree(t);
}
-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
-{
- struct psi_trigger *old = *trigger_ptr;
-
- if (static_branch_likely(&psi_disabled))
- return;
-
- rcu_assign_pointer(*trigger_ptr, new);
- if (old)
- kref_put(&old->refcount, psi_trigger_destroy);
-}
-
__poll_t psi_trigger_poll(void **trigger_ptr,
struct file *file, poll_table *wait)
{
@@ -1275,27 +1227,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (static_branch_likely(&psi_disabled))
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- rcu_read_lock();
-
- t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
- if (!t) {
- rcu_read_unlock();
+ t = smp_load_acquire(trigger_ptr);
+ if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- }
- kref_get(&t->refcount);
-
- rcu_read_unlock();
poll_wait(file, &t->event_wait, wait);
if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;
- kref_put(&t->refcount, psi_trigger_destroy);
-
return ret;
}
+#ifdef CONFIG_PROC_FS
+static int psi_io_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_IO);
+}
+
+static int psi_memory_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_MEM);
+}
+
+static int psi_cpu_show(struct seq_file *m, void *v)
+{
+ return psi_show(m, &psi_system, PSI_CPU);
+}
+
+static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
+{
+ if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ return single_open(file, psi_show, NULL);
+}
+
+static int psi_io_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_io_show);
+}
+
+static int psi_memory_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_memory_show);
+}
+
+static int psi_cpu_open(struct inode *inode, struct file *file)
+{
+ return psi_open(file, psi_cpu_show);
+}
+
static ssize_t psi_write(struct file *file, const char __user *user_buf,
size_t nbytes, enum psi_res res)
{
@@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
buf[buf_size - 1] = '\0';
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
seq = file->private_data;
+
/* Take seq->lock to protect seq->private from concurrent writes */
mutex_lock(&seq->lock);
- psi_trigger_replace(&seq->private, new);
+
+ /* Allow only one trigger per file descriptor */
+ if (seq->private) {
+ mutex_unlock(&seq->lock);
+ return -EBUSY;
+ }
+
+ new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ if (IS_ERR(new)) {
+ mutex_unlock(&seq->lock);
+ return PTR_ERR(new);
+ }
+
+ smp_store_release(&seq->private, new);
mutex_unlock(&seq->lock);
return nbytes;
@@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- psi_trigger_replace(&seq->private, NULL);
+ psi_trigger_destroy(seq->private);
return single_release(inode, file);
}
@@ -1400,3 +1392,5 @@ static int __init psi_proc_init(void)
return 0;
}
module_init(psi_proc_init);
+
+#endif /* CONFIG_PROC_FS */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7b4f4fbbb404..a32c46889af8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3,9 +3,6 @@
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
* policies)
*/
-#include "sched.h"
-
-#include "pelt.h"
int sched_rr_timeslice = RR_TIMESLICE;
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
@@ -271,8 +268,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
#ifdef CONFIG_SMP
-static void pull_rt_task(struct rq *this_rq);
-
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
@@ -429,15 +424,6 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
}
-static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
-{
- return false;
-}
-
-static inline void pull_rt_task(struct rq *this_rq)
-{
-}
-
static inline void rt_queue_push_tasks(struct rq *rq)
{
}
@@ -1730,8 +1716,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
rt_queue_push_tasks(rq);
}
-static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
- struct rt_rq *rt_rq)
+static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
{
struct rt_prio_array *array = &rt_rq->active;
struct sched_rt_entity *next = NULL;
@@ -1753,7 +1738,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
struct rt_rq *rt_rq = &rq->rt;
do {
- rt_se = pick_next_rt_entity(rq, rt_rq);
+ rt_se = pick_next_rt_entity(rt_rq);
BUG_ON(!rt_se);
rt_rq = group_rt_rq(rt_se);
} while (rt_rq);
@@ -2026,6 +2011,16 @@ static int push_rt_task(struct rq *rq, bool pull)
return 0;
retry:
+ /*
+ * It's possible that the next_task slipped in of
+ * higher priority than current. If that's the case
+ * just reschedule current.
+ */
+ if (unlikely(next_task->prio < rq->curr->prio)) {
+ resched_curr(rq);
+ return 0;
+ }
+
if (is_migration_disabled(next_task)) {
struct task_struct *push_task = NULL;
int cpu;
@@ -2033,6 +2028,18 @@ retry:
if (!pull || rq->push_busy)
return 0;
+ /*
+ * Invoking find_lowest_rq() on anything but an RT task doesn't
+ * make sense. Per the above priority check, curr has to
+ * be of higher priority than next_task, so no need to
+ * reschedule when bailing out.
+ *
+ * Note that the stoppers are masqueraded as SCHED_FIFO
+ * (cf. sched_set_stop_task()), so we can't rely on rt_task().
+ */
+ if (rq->curr->sched_class != &rt_sched_class)
+ return 0;
+
cpu = find_lowest_rq(rq->curr);
if (cpu == -1 || cpu == rq->cpu)
return 0;
@@ -2057,16 +2064,6 @@ retry:
if (WARN_ON(next_task == rq->curr))
return 0;
- /*
- * It's possible that the next_task slipped in of
- * higher priority than current. If that's the case
- * just reschedule current.
- */
- if (unlikely(next_task->prio < rq->curr->prio)) {
- resched_curr(rq);
- return 0;
- }
-
/* We might release rq lock */
get_task_struct(next_task);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index de53be905739..58263f90c559 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2,86 +2,98 @@
/*
* Scheduler internal types and methods:
*/
-#include <linux/sched.h>
+#ifndef _KERNEL_SCHED_SCHED_H
+#define _KERNEL_SCHED_SCHED_H
+#include <linux/sched/affinity.h>
#include <linux/sched/autogroup.h>
-#include <linux/sched/clock.h>
-#include <linux/sched/coredump.h>
#include <linux/sched/cpufreq.h>
-#include <linux/sched/cputime.h>
#include <linux/sched/deadline.h>
-#include <linux/sched/debug.h>
-#include <linux/sched/hotplug.h>
-#include <linux/sched/idle.h>
-#include <linux/sched/init.h>
-#include <linux/sched/isolation.h>
-#include <linux/sched/jobctl.h>
+#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/mm.h>
-#include <linux/sched/nohz.h>
-#include <linux/sched/numa_balancing.h>
-#include <linux/sched/prio.h>
-#include <linux/sched/rt.h>
+#include <linux/sched/rseq_api.h>
#include <linux/sched/signal.h>
#include <linux/sched/smt.h>
#include <linux/sched/stat.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/task_flags.h>
#include <linux/sched/task.h>
-#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
-#include <linux/sched/user.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/xacct.h>
-#include <uapi/linux/sched/types.h>
-
-#include <linux/binfmts.h>
-#include <linux/bitops.h>
-#include <linux/compat.h>
-#include <linux/context_tracking.h>
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/capability.h>
+#include <linux/cgroup_api.h>
+#include <linux/cgroup.h>
#include <linux/cpufreq.h>
-#include <linux/cpuidle.h>
-#include <linux/cpuset.h>
+#include <linux/cpumask_api.h>
#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/delayacct.h>
-#include <linux/energy_model.h>
-#include <linux/init_task.h>
-#include <linux/kprobes.h>
+#include <linux/file.h>
+#include <linux/fs_api.h>
+#include <linux/hrtimer_api.h>
+#include <linux/interrupt.h>
+#include <linux/irq_work.h>
+#include <linux/jiffies.h>
+#include <linux/kref_api.h>
#include <linux/kthread.h>
-#include <linux/membarrier.h>
-#include <linux/migrate.h>
-#include <linux/mmu_context.h>
-#include <linux/nmi.h>
+#include <linux/ktime_api.h>
+#include <linux/lockdep_api.h>
+#include <linux/lockdep.h>
+#include <linux/minmax.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex_api.h>
+#include <linux/plist.h>
+#include <linux/poll.h>
#include <linux/proc_fs.h>
-#include <linux/prefetch.h>
#include <linux/profile.h>
#include <linux/psi.h>
-#include <linux/ratelimit.h>
-#include <linux/rcupdate_wait.h>
-#include <linux/security.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/seqlock.h>
+#include <linux/softirq.h>
+#include <linux/spinlock_api.h>
+#include <linux/static_key.h>
#include <linux/stop_machine.h>
-#include <linux/suspend.h>
-#include <linux/swait.h>
+#include <linux/syscalls_api.h>
#include <linux/syscalls.h>
-#include <linux/task_work.h>
-#include <linux/tsacct_kern.h>
+#include <linux/tick.h>
+#include <linux/topology.h>
+#include <linux/types.h>
+#include <linux/u64_stats_sync_api.h>
+#include <linux/uaccess.h>
+#include <linux/wait_api.h>
+#include <linux/wait_bit.h>
+#include <linux/workqueue_api.h>
+
+#include <trace/events/power.h>
+#include <trace/events/sched.h>
+
+#include "../workqueue_internal.h"
+
+#ifdef CONFIG_CGROUP_SCHED
+#include <linux/cgroup.h>
+#include <linux/psi.h>
+#endif
-#include <asm/tlb.h>
+#ifdef CONFIG_SCHED_DEBUG
+# include <linux/static_key.h>
+#endif
#ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
+# include <asm/paravirt_api_clock.h>
#endif
#include "cpupri.h"
#include "cpudeadline.h"
-#include <trace/events/sched.h>
-
#ifdef CONFIG_SCHED_DEBUG
-# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
+# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
#else
-# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
+# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
#endif
struct rq;
@@ -301,29 +313,6 @@ struct dl_bw {
u64 total_bw;
};
-static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
-
-static inline
-void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
-{
- dl_b->total_bw -= tsk_bw;
- __dl_update(dl_b, (s32)tsk_bw / cpus);
-}
-
-static inline
-void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
-{
- dl_b->total_bw += tsk_bw;
- __dl_update(dl_b, -((s32)tsk_bw / cpus));
-}
-
-static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
- u64 old_bw, u64 new_bw)
-{
- return dl_b->bw != -1 &&
- cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
-}
-
/*
* Verify the fitness of task @p to run on @cpu taking into account the
* CPU original capacity and the runtime/deadline ratio of the task.
@@ -347,15 +336,11 @@ extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
-extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
-extern bool dl_cpu_busy(unsigned int cpu);
+extern int dl_cpu_busy(int cpu, struct task_struct *p);
#ifdef CONFIG_CGROUP_SCHED
-#include <linux/cgroup.h>
-#include <linux/psi.h>
-
struct cfs_rq;
struct rt_rq;
@@ -1662,12 +1647,14 @@ enum numa_topology_type {
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
-extern void sched_init_numa(void);
+extern void sched_init_numa(int offline_node);
+extern void sched_update_numa(int cpu, bool online);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
-static inline void sched_init_numa(void) { }
+static inline void sched_init_numa(int offline_node) { }
+static inline void sched_update_numa(int cpu, bool online) { }
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
@@ -1854,7 +1841,6 @@ static inline void flush_smp_call_function_from_idle(void) { }
#endif
#include "stats.h"
-#include "autogroup.h"
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
@@ -1950,7 +1936,6 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
#ifdef CONFIG_SCHED_DEBUG
-# include <linux/static_key.h>
# define const_debug __read_mostly
#else
# define const_debug const
@@ -2331,7 +2316,6 @@ extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-extern struct dl_bandwidth def_dl_bandwidth;
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
@@ -2747,32 +2731,6 @@ extern void nohz_run_idle_balance(int cpu);
static inline void nohz_run_idle_balance(int cpu) { }
#endif
-#ifdef CONFIG_SMP
-static inline
-void __dl_update(struct dl_bw *dl_b, s64 bw)
-{
- struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
- int i;
-
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
- "sched RCU must be held");
- for_each_cpu_and(i, rd->span, cpu_active_mask) {
- struct rq *rq = cpu_rq(i);
-
- rq->dl.extra_bw += bw;
- }
-}
-#else
-static inline
-void __dl_update(struct dl_bw *dl_b, s64 bw)
-{
- struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
-
- dl->extra_bw += bw;
-}
-#endif
-
-
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;
@@ -2841,88 +2799,6 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
-#ifdef CONFIG_UCLAMP_TASK
-unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
-
-/**
- * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
- * @rq: The rq to clamp against. Must not be NULL.
- * @util: The util value to clamp.
- * @p: The task to clamp against. Can be NULL if you want to clamp
- * against @rq only.
- *
- * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
- *
- * If sched_uclamp_used static key is disabled, then just return the util
- * without any clamping since uclamp aggregation at the rq level in the fast
- * path is disabled, rendering this operation a NOP.
- *
- * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
- * will return the correct effective uclamp value of the task even if the
- * static key is disabled.
- */
-static __always_inline
-unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
- struct task_struct *p)
-{
- unsigned long min_util = 0;
- unsigned long max_util = 0;
-
- if (!static_branch_likely(&sched_uclamp_used))
- return util;
-
- if (p) {
- min_util = uclamp_eff_value(p, UCLAMP_MIN);
- max_util = uclamp_eff_value(p, UCLAMP_MAX);
-
- /*
- * Ignore last runnable task's max clamp, as this task will
- * reset it. Similarly, no need to read the rq's min clamp.
- */
- if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
- goto out;
- }
-
- min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
- max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
-out:
- /*
- * Since CPU's {min,max}_util clamps are MAX aggregated considering
- * RUNNABLE tasks with _different_ clamps, we can end up with an
- * inversion. Fix it now when the clamps are applied.
- */
- if (unlikely(min_util >= max_util))
- return min_util;
-
- return clamp(util, min_util, max_util);
-}
-
-/*
- * When uclamp is compiled in, the aggregation at rq level is 'turned off'
- * by default in the fast path and only gets turned on once userspace performs
- * an operation that requires it.
- *
- * Returns true if userspace opted-in to use uclamp and aggregation at rq level
- * hence is active.
- */
-static inline bool uclamp_is_used(void)
-{
- return static_branch_likely(&sched_uclamp_used);
-}
-#else /* CONFIG_UCLAMP_TASK */
-static inline
-unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
- struct task_struct *p)
-{
- return util;
-}
-
-static inline bool uclamp_is_used(void)
-{
- return false;
-}
-#endif /* CONFIG_UCLAMP_TASK */
-
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
# define arch_scale_freq_invariant() true
@@ -3020,6 +2896,105 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
}
#endif
+#ifdef CONFIG_UCLAMP_TASK
+unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+
+/**
+ * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
+ * @rq: The rq to clamp against. Must not be NULL.
+ * @util: The util value to clamp.
+ * @p: The task to clamp against. Can be NULL if you want to clamp
+ * against @rq only.
+ *
+ * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
+ *
+ * If sched_uclamp_used static key is disabled, then just return the util
+ * without any clamping since uclamp aggregation at the rq level in the fast
+ * path is disabled, rendering this operation a NOP.
+ *
+ * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
+ * will return the correct effective uclamp value of the task even if the
+ * static key is disabled.
+ */
+static __always_inline
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ struct task_struct *p)
+{
+ unsigned long min_util = 0;
+ unsigned long max_util = 0;
+
+ if (!static_branch_likely(&sched_uclamp_used))
+ return util;
+
+ if (p) {
+ min_util = uclamp_eff_value(p, UCLAMP_MIN);
+ max_util = uclamp_eff_value(p, UCLAMP_MAX);
+
+ /*
+ * Ignore last runnable task's max clamp, as this task will
+ * reset it. Similarly, no need to read the rq's min clamp.
+ */
+ if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
+ goto out;
+ }
+
+ min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
+ max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
+out:
+ /*
+ * Since CPU's {min,max}_util clamps are MAX aggregated considering
+ * RUNNABLE tasks with _different_ clamps, we can end up with an
+ * inversion. Fix it now when the clamps are applied.
+ */
+ if (unlikely(min_util >= max_util))
+ return min_util;
+
+ return clamp(util, min_util, max_util);
+}
+
+/* Is the rq being capped/throttled by uclamp_max? */
+static inline bool uclamp_rq_is_capped(struct rq *rq)
+{
+ unsigned long rq_util;
+ unsigned long max_util;
+
+ if (!static_branch_likely(&sched_uclamp_used))
+ return false;
+
+ rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
+ max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
+
+ return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
+}
+
+/*
+ * When uclamp is compiled in, the aggregation at rq level is 'turned off'
+ * by default in the fast path and only gets turned on once userspace performs
+ * an operation that requires it.
+ *
+ * Returns true if userspace opted-in to use uclamp and aggregation at rq level
+ * hence is active.
+ */
+static inline bool uclamp_is_used(void)
+{
+ return static_branch_likely(&sched_uclamp_used);
+}
+#else /* CONFIG_UCLAMP_TASK */
+static inline
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ struct task_struct *p)
+{
+ return util;
+}
+
+static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
+
+static inline bool uclamp_is_used(void)
+{
+ return false;
+}
+#endif /* CONFIG_UCLAMP_TASK */
+
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
{
@@ -3118,3 +3093,4 @@ extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
+#endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 07dde2928c79..857f837f52cb 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -2,7 +2,6 @@
/*
* /proc/schedstat implementation
*/
-#include "sched.h"
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats)
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 3a3c826dd83a..baa839c1ba96 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_STATS_H
+#define _KERNEL_STATS_H
#ifdef CONFIG_SCHEDSTATS
@@ -298,3 +300,5 @@ sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *n
# define sched_info_dequeue(rq, t) do { } while (0)
# define sched_info_switch(rq, t, next) do { } while (0)
#endif /* CONFIG_SCHED_INFO */
+
+#endif /* _KERNEL_STATS_H */
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 0b165a25f22f..d04073a93eb4 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -7,7 +7,6 @@
*
* See kernel/stop_machine.c
*/
-#include "sched.h"
#ifdef CONFIG_SMP
static int
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index e1c655f928c7..76b9b796e695 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -2,7 +2,6 @@
/*
* <linux/swait.h> (simple wait queues ) implementation:
*/
-#include "sched.h"
void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
struct lock_class_key *key)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d201a7052a29..810750e62118 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2,7 +2,6 @@
/*
* Scheduler topology setup/handling methods
*/
-#include "sched.h"
DEFINE_MUTEX(sched_domains_mutex);
@@ -74,7 +73,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
break;
}
- if (!cpumask_weight(sched_group_span(group))) {
+ if (cpumask_empty(sched_group_span(group))) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: empty group\n");
break;
@@ -1366,7 +1365,7 @@ static void asym_cpu_capacity_scan(void)
list_for_each_entry(entry, &asym_cap_list, link)
cpumask_clear(cpu_capacity_span(entry));
- for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_DOMAIN))
+ for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN))
asym_cpu_capacity_update_data(cpu);
list_for_each_entry_safe(entry, next, &asym_cap_list, link) {
@@ -1492,8 +1491,6 @@ static int sched_domains_curr_level;
int sched_max_numa_distance;
static int *sched_domains_numa_distance;
static struct cpumask ***sched_domains_numa_masks;
-
-static unsigned long __read_mostly *sched_numa_onlined_nodes;
#endif
/*
@@ -1651,6 +1648,7 @@ static struct sched_domain_topology_level default_topology[] = {
static struct sched_domain_topology_level *sched_domain_topology =
default_topology;
+static struct sched_domain_topology_level *sched_domain_topology_saved;
#define for_each_sd_topology(tl) \
for (tl = sched_domain_topology; tl->mask; tl++)
@@ -1661,6 +1659,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
return;
sched_domain_topology = tl;
+ sched_domain_topology_saved = NULL;
}
#ifdef CONFIG_NUMA
@@ -1684,8 +1683,12 @@ static void sched_numa_warn(const char *str)
for (i = 0; i < nr_node_ids; i++) {
printk(KERN_WARNING " ");
- for (j = 0; j < nr_node_ids; j++)
- printk(KERN_CONT "%02d ", node_distance(i,j));
+ for (j = 0; j < nr_node_ids; j++) {
+ if (!node_state(i, N_CPU) || !node_state(j, N_CPU))
+ printk(KERN_CONT "(%02d) ", node_distance(i,j));
+ else
+ printk(KERN_CONT " %02d ", node_distance(i,j));
+ }
printk(KERN_CONT "\n");
}
printk(KERN_WARNING "\n");
@@ -1693,19 +1696,34 @@ static void sched_numa_warn(const char *str)
bool find_numa_distance(int distance)
{
- int i;
+ bool found = false;
+ int i, *distances;
if (distance == node_distance(0, 0))
return true;
+ rcu_read_lock();
+ distances = rcu_dereference(sched_domains_numa_distance);
+ if (!distances)
+ goto unlock;
for (i = 0; i < sched_domains_numa_levels; i++) {
- if (sched_domains_numa_distance[i] == distance)
- return true;
+ if (distances[i] == distance) {
+ found = true;
+ break;
+ }
}
+unlock:
+ rcu_read_unlock();
- return false;
+ return found;
}
+#define for_each_cpu_node_but(n, nbut) \
+ for_each_node_state(n, N_CPU) \
+ if (n == nbut) \
+ continue; \
+ else
+
/*
* A system can have three types of NUMA topology:
* NUMA_DIRECT: all nodes are directly connected, or not a NUMA system
@@ -1725,7 +1743,7 @@ bool find_numa_distance(int distance)
* there is an intermediary node C, which is < N hops away from both
* nodes A and B, the system is a glueless mesh.
*/
-static void init_numa_topology_type(void)
+static void init_numa_topology_type(int offline_node)
{
int a, b, c, n;
@@ -1736,14 +1754,14 @@ static void init_numa_topology_type(void)
return;
}
- for_each_online_node(a) {
- for_each_online_node(b) {
+ for_each_cpu_node_but(a, offline_node) {
+ for_each_cpu_node_but(b, offline_node) {
/* Find two nodes furthest removed from each other. */
if (node_distance(a, b) < n)
continue;
/* Is there an intermediary node between a and b? */
- for_each_online_node(c) {
+ for_each_cpu_node_but(c, offline_node) {
if (node_distance(a, c) < n &&
node_distance(b, c) < n) {
sched_numa_topology_type =
@@ -1756,17 +1774,22 @@ static void init_numa_topology_type(void)
return;
}
}
+
+ pr_err("Failed to find a NUMA topology type, defaulting to DIRECT\n");
+ sched_numa_topology_type = NUMA_DIRECT;
}
#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
-void sched_init_numa(void)
+void sched_init_numa(int offline_node)
{
struct sched_domain_topology_level *tl;
unsigned long *distance_map;
int nr_levels = 0;
int i, j;
+ int *distances;
+ struct cpumask ***masks;
/*
* O(nr_nodes^2) deduplicating selection sort -- in order to find the
@@ -1777,12 +1800,13 @@ void sched_init_numa(void)
return;
bitmap_zero(distance_map, NR_DISTANCE_VALUES);
- for (i = 0; i < nr_node_ids; i++) {
- for (j = 0; j < nr_node_ids; j++) {
+ for_each_cpu_node_but(i, offline_node) {
+ for_each_cpu_node_but(j, offline_node) {
int distance = node_distance(i, j);
if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
sched_numa_warn("Invalid distance value range");
+ bitmap_free(distance_map);
return;
}
@@ -1795,16 +1819,17 @@ void sched_init_numa(void)
*/
nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
- sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
- if (!sched_domains_numa_distance) {
+ distances = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
+ if (!distances) {
bitmap_free(distance_map);
return;
}
for (i = 0, j = 0; i < nr_levels; i++, j++) {
j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
- sched_domains_numa_distance[i] = j;
+ distances[i] = j;
}
+ rcu_assign_pointer(sched_domains_numa_distance, distances);
bitmap_free(distance_map);
@@ -1826,8 +1851,8 @@ void sched_init_numa(void)
*/
sched_domains_numa_levels = 0;
- sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
- if (!sched_domains_numa_masks)
+ masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
+ if (!masks)
return;
/*
@@ -1835,31 +1860,20 @@ void sched_init_numa(void)
* CPUs of nodes that are that many hops away from us.
*/
for (i = 0; i < nr_levels; i++) {
- sched_domains_numa_masks[i] =
- kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
- if (!sched_domains_numa_masks[i])
+ masks[i] = kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
+ if (!masks[i])
return;
- for (j = 0; j < nr_node_ids; j++) {
+ for_each_cpu_node_but(j, offline_node) {
struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
int k;
if (!mask)
return;
- sched_domains_numa_masks[i][j] = mask;
-
- for_each_node(k) {
- /*
- * Distance information can be unreliable for
- * offline nodes, defer building the node
- * masks to its bringup.
- * This relies on all unique distance values
- * still being visible at init time.
- */
- if (!node_online(j))
- continue;
+ masks[i][j] = mask;
+ for_each_cpu_node_but(k, offline_node) {
if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
sched_numa_warn("Node-distance not symmetric");
@@ -1870,6 +1884,7 @@ void sched_init_numa(void)
}
}
}
+ rcu_assign_pointer(sched_domains_numa_masks, masks);
/* Compute default topology size */
for (i = 0; sched_domain_topology[i].mask; i++);
@@ -1907,59 +1922,67 @@ void sched_init_numa(void)
};
}
+ sched_domain_topology_saved = sched_domain_topology;
sched_domain_topology = tl;
sched_domains_numa_levels = nr_levels;
- sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
+ WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]);
- init_numa_topology_type();
-
- sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
- if (!sched_numa_onlined_nodes)
- return;
-
- bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
- for_each_online_node(i)
- bitmap_set(sched_numa_onlined_nodes, i, 1);
+ init_numa_topology_type(offline_node);
}
-static void __sched_domains_numa_masks_set(unsigned int node)
-{
- int i, j;
-
- /*
- * NUMA masks are not built for offline nodes in sched_init_numa().
- * Thus, when a CPU of a never-onlined-before node gets plugged in,
- * adding that new CPU to the right NUMA masks is not sufficient: the
- * masks of that CPU's node must also be updated.
- */
- if (test_bit(node, sched_numa_onlined_nodes))
- return;
- bitmap_set(sched_numa_onlined_nodes, node, 1);
-
- for (i = 0; i < sched_domains_numa_levels; i++) {
- for (j = 0; j < nr_node_ids; j++) {
- if (!node_online(j) || node == j)
- continue;
+static void sched_reset_numa(void)
+{
+ int nr_levels, *distances;
+ struct cpumask ***masks;
- if (node_distance(j, node) > sched_domains_numa_distance[i])
+ nr_levels = sched_domains_numa_levels;
+ sched_domains_numa_levels = 0;
+ sched_max_numa_distance = 0;
+ sched_numa_topology_type = NUMA_DIRECT;
+ distances = sched_domains_numa_distance;
+ rcu_assign_pointer(sched_domains_numa_distance, NULL);
+ masks = sched_domains_numa_masks;
+ rcu_assign_pointer(sched_domains_numa_masks, NULL);
+ if (distances || masks) {
+ int i, j;
+
+ synchronize_rcu();
+ kfree(distances);
+ for (i = 0; i < nr_levels && masks; i++) {
+ if (!masks[i])
continue;
-
- /* Add remote nodes in our masks */
- cpumask_or(sched_domains_numa_masks[i][node],
- sched_domains_numa_masks[i][node],
- sched_domains_numa_masks[0][j]);
+ for_each_node(j)
+ kfree(masks[i][j]);
+ kfree(masks[i]);
}
+ kfree(masks);
+ }
+ if (sched_domain_topology_saved) {
+ kfree(sched_domain_topology);
+ sched_domain_topology = sched_domain_topology_saved;
+ sched_domain_topology_saved = NULL;
}
+}
+/*
+ * Call with hotplug lock held
+ */
+void sched_update_numa(int cpu, bool online)
+{
+ int node;
+
+ node = cpu_to_node(cpu);
/*
- * A new node has been brought up, potentially changing the topology
- * classification.
- *
- * Note that this is racy vs any use of sched_numa_topology_type :/
+ * Scheduler NUMA topology is updated when the first CPU of a
+ * node is onlined or the last CPU of a node is offlined.
*/
- init_numa_topology_type();
+ if (cpumask_weight(cpumask_of_node(node)) != 1)
+ return;
+
+ sched_reset_numa();
+ sched_init_numa(online ? NUMA_NO_NODE : node);
}
void sched_domains_numa_masks_set(unsigned int cpu)
@@ -1967,11 +1990,9 @@ void sched_domains_numa_masks_set(unsigned int cpu)
int node = cpu_to_node(cpu);
int i, j;
- __sched_domains_numa_masks_set(node);
-
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
- if (!node_online(j))
+ if (!node_state(j, N_CPU))
continue;
/* Set ourselves in the remote node's masks */
@@ -1986,8 +2007,10 @@ void sched_domains_numa_masks_clear(unsigned int cpu)
int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) {
- for (j = 0; j < nr_node_ids; j++)
- cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+ for (j = 0; j < nr_node_ids; j++) {
+ if (sched_domains_numa_masks[i][j])
+ cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+ }
}
}
@@ -2001,14 +2024,26 @@ void sched_domains_numa_masks_clear(unsigned int cpu)
*/
int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
{
- int i, j = cpu_to_node(cpu);
+ int i, j = cpu_to_node(cpu), found = nr_cpu_ids;
+ struct cpumask ***masks;
+ rcu_read_lock();
+ masks = rcu_dereference(sched_domains_numa_masks);
+ if (!masks)
+ goto unlock;
for (i = 0; i < sched_domains_numa_levels; i++) {
- cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
- if (cpu < nr_cpu_ids)
- return cpu;
+ if (!masks[i][j])
+ break;
+ cpu = cpumask_any_and(cpus, masks[i][j]);
+ if (cpu < nr_cpu_ids) {
+ found = cpu;
+ break;
+ }
}
- return nr_cpu_ids;
+unlock:
+ rcu_read_unlock();
+
+ return found;
}
#endif /* CONFIG_NUMA */
@@ -2242,6 +2277,57 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
}
}
+ /*
+ * Calculate an allowed NUMA imbalance such that LLCs do not get
+ * imbalanced.
+ */
+ for_each_cpu(i, cpu_map) {
+ unsigned int imb = 0;
+ unsigned int imb_span = 1;
+
+ for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+ struct sched_domain *child = sd->child;
+
+ if (!(sd->flags & SD_SHARE_PKG_RESOURCES) && child &&
+ (child->flags & SD_SHARE_PKG_RESOURCES)) {
+ struct sched_domain __rcu *top_p;
+ unsigned int nr_llcs;
+
+ /*
+ * For a single LLC per node, allow an
+ * imbalance up to 25% of the node. This is an
+ * arbitrary cutoff based on SMT-2 to balance
+ * between memory bandwidth and avoiding
+ * premature sharing of HT resources and SMT-4
+ * or SMT-8 *may* benefit from a different
+ * cutoff.
+ *
+ * For multiple LLCs, allow an imbalance
+ * until multiple tasks would share an LLC
+ * on one node while LLCs on another node
+ * remain idle.
+ */
+ nr_llcs = sd->span_weight / child->span_weight;
+ if (nr_llcs == 1)
+ imb = sd->span_weight >> 2;
+ else
+ imb = nr_llcs;
+ sd->imb_numa_nr = imb;
+
+ /* Set span based on the first NUMA domain. */
+ top_p = sd->parent;
+ while (top_p && !(top_p->flags & SD_NUMA)) {
+ top_p = top_p->parent;
+ }
+ imb_span = top_p ? top_p->span_weight : sd->span_weight;
+ } else {
+ int factor = max(1U, (sd->span_weight / imb_span));
+
+ sd->imb_numa_nr = imb * factor;
+ }
+ }
+ }
+
/* Calculate CPU capacity for physical packages and nodes */
for (i = nr_cpumask_bits-1; i >= 0; i--) {
if (!cpumask_test_cpu(i, cpu_map))
@@ -2351,7 +2437,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = alloc_sched_domains(ndoms_cur);
if (!doms_cur)
doms_cur = &fallback_doms;
- cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
+ cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_TYPE_DOMAIN));
err = build_sched_domains(doms_cur[0], NULL);
return err;
@@ -2440,7 +2526,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
if (doms_new) {
n = 1;
cpumask_and(doms_new[0], cpu_active_mask,
- housekeeping_cpumask(HK_FLAG_DOMAIN));
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
}
} else {
n = ndoms_new;
@@ -2475,7 +2561,7 @@ match1:
n = 0;
doms_new = &fallback_doms;
cpumask_and(doms_new[0], cpu_active_mask,
- housekeeping_cpumask(HK_FLAG_DOMAIN));
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
}
/* Build new domains: */
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index eca38107b32f..9860bb9a847c 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -4,7 +4,6 @@
*
* (C) 2004 Nadia Yvette Chambers, Oracle
*/
-#include "sched.h"
void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
{
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index 02ce292b9bc0..d4788f810b55 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
+
/*
* The implementation of the wait_bit*() and related waiting APIs:
*/
-#include "sched.h"
#define WAIT_TABLE_BITS 8
#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 4d8f44a17727..db10e73d06e0 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -29,6 +29,9 @@
#include <linux/syscalls.h>
#include <linux/sysctl.h>
+/* Not exposed in headers: strictly internal use only. */
+#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
+
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
#endif
@@ -1010,6 +1013,7 @@ static void __secure_computing_strict(int this_syscall)
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
+ current->seccomp.mode = SECCOMP_MODE_DEAD;
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
do_exit(SIGKILL);
}
@@ -1261,6 +1265,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
case SECCOMP_RET_KILL_THREAD:
case SECCOMP_RET_KILL_PROCESS:
default:
+ current->seccomp.mode = SECCOMP_MODE_DEAD;
seccomp_log(this_syscall, SIGSYS, action, true);
/* Dump core only if this is the last remaining thread. */
if (action != SECCOMP_RET_KILL_THREAD ||
@@ -1309,6 +1314,11 @@ int __secure_computing(const struct seccomp_data *sd)
return 0;
case SECCOMP_MODE_FILTER:
return __seccomp_filter(this_syscall, sd, false);
+ /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */
+ case SECCOMP_MODE_DEAD:
+ WARN_ON_ONCE(1);
+ do_exit(SIGKILL);
+ return -1;
default:
BUG();
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 38602738866e..e93de6daa188 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1308,6 +1308,43 @@ enum sig_handler {
};
/*
+ * On some archictectures, PREEMPT_RT has to delay sending a signal from a
+ * trap since it cannot enable preemption, and the signal code's
+ * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME
+ * which will send the signal on exit of the trap.
+ */
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+ struct task_struct *t)
+{
+ if (!in_atomic())
+ return false;
+
+ if (WARN_ON_ONCE(t->forced_info.si_signo))
+ return true;
+
+ if (is_si_special(info)) {
+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
+ t->forced_info.si_signo = info->si_signo;
+ t->forced_info.si_errno = 0;
+ t->forced_info.si_code = SI_KERNEL;
+ t->forced_info.si_pid = 0;
+ t->forced_info.si_uid = 0;
+ } else {
+ t->forced_info = *info;
+ }
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ return true;
+}
+#else
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+ struct task_struct *t)
+{
+ return false;
+}
+#endif
+
+/*
* Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL.
*
@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
struct k_sigaction *action;
int sig = info->si_signo;
+ if (force_sig_delayed(info, t))
+ return 0;
+
spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1];
ignored = action->sa.sa_handler == SIG_IGN;
@@ -1342,9 +1382,10 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
}
/*
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
- * debugging to leave init killable.
+ * debugging to leave init killable. But HANDLER_EXIT is always fatal.
*/
- if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
+ if (action->sa.sa_handler == SIG_DFL &&
+ (!t->ptrace || (handler == HANDLER_EXIT)))
t->signal->flags &= ~SIGNAL_UNKILLABLE;
ret = send_signal(sig, info, t, PIDTYPE_PID);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 41f470929e99..fac801815554 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -222,7 +222,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
u32 pending;
int curcnt;
- WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(in_hardirq());
lockdep_assert_irqs_enabled();
local_irq_save(flags);
@@ -305,7 +305,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;
- WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(in_hardirq());
raw_local_irq_save(flags);
/*
@@ -352,14 +352,14 @@ static void __local_bh_enable(unsigned int cnt)
*/
void _local_bh_enable(void)
{
- WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(in_hardirq());
__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
}
EXPORT_SYMBOL(_local_bh_enable);
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{
- WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(in_hardirq());
lockdep_assert_irqs_enabled();
#ifdef CONFIG_TRACE_IRQFLAGS
local_irq_disable();
@@ -618,7 +618,7 @@ static inline void tick_irq_exit(void)
/* Make sure that timer wheel updates are propagated */
if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
- if (!in_irq())
+ if (!in_hardirq())
tick_nohz_irq_exit();
}
#endif
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 66b8af394e58..ddb5a7f48d69 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
#define skip_erasing() false
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
-asmlinkage void notrace stackleak_erase(void)
+asmlinkage void noinstr stackleak_erase(void)
{
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
unsigned long kstack_ptr = current->lowest_stack;
@@ -124,9 +124,8 @@ asmlinkage void notrace stackleak_erase(void)
/* Reset the 'lowest_stack' value for the next syscall */
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
}
-NOKPROBE_SYMBOL(stackleak_erase);
-void __used __no_caller_saved_registers notrace stackleak_track_stack(void)
+void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
{
unsigned long sp = current_stack_pointer;
diff --git a/kernel/sys.c b/kernel/sys.c
index ecc4cf019242..5b0e172c4d47 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/reboot.h>
@@ -472,6 +473,16 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
+ free_uid(new->user);
+ new->user = new_user;
+ return 0;
+}
+
+static void flag_nproc_exceeded(struct cred *new)
+{
+ if (new->ucounts == current_ucounts())
+ return;
+
/*
* We don't fail in case of NPROC limit excess here because too many
* poorly written programs don't check set*uid() return code, assuming
@@ -480,15 +491,10 @@ static int set_user(struct cred *new)
* failure to the execve() stage.
*/
if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
- new_user != INIT_USER &&
- !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
+ new->user != INIT_USER)
current->flags |= PF_NPROC_EXCEEDED;
else
current->flags &= ~PF_NPROC_EXCEEDED;
-
- free_uid(new->user);
- new->user = new_user;
- return 0;
}
/*
@@ -563,6 +569,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
if (retval < 0)
goto error;
+ flag_nproc_exceeded(new);
return commit_creds(new);
error:
@@ -625,6 +632,7 @@ long __sys_setuid(uid_t uid)
if (retval < 0)
goto error;
+ flag_nproc_exceeded(new);
return commit_creds(new);
error:
@@ -704,6 +712,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (retval < 0)
goto error;
+ flag_nproc_exceeded(new);
return commit_creds(new);
error:
@@ -2278,15 +2287,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
{
struct mm_struct *mm = current->mm;
const char __user *uname;
- char *name, *pch;
+ struct anon_vma_name *anon_name = NULL;
int error;
switch (opt) {
case PR_SET_VMA_ANON_NAME:
uname = (const char __user *)arg;
if (uname) {
- name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
+ char *name, *pch;
+ name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -2296,15 +2306,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
return -EINVAL;
}
}
- } else {
- /* Reset the name */
- name = NULL;
+ /* anon_vma has its own copy */
+ anon_name = anon_vma_name_alloc(name);
+ kfree(name);
+ if (!anon_name)
+ return -ENOMEM;
+
}
mmap_write_lock(mm);
- error = madvise_set_anon_name(mm, addr, size, name);
+ error = madvise_set_anon_name(mm, addr, size, anon_name);
mmap_write_unlock(mm);
- kfree(name);
+ anon_vma_name_put(anon_name);
break;
default:
error = -EINVAL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ae443b2882e..830aaf8ca08e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -180,6 +180,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
return ret;
}
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -197,6 +201,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
return -EPERM;
*(int *)table->data = unpriv_enable;
}
+
+ unpriv_ebpf_notify(unpriv_enable);
+
return ret;
}
#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
@@ -1689,7 +1696,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sysctl_numa_balancing,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = SYSCTL_FOUR,
},
#endif /* CONFIG_NUMA_BALANCING */
{
@@ -1750,17 +1757,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = sysctl_sched_uclamp_handler,
},
#endif
-#ifdef CONFIG_SCHED_AUTOGROUP
- {
- .procname = "sched_autogroup_enabled",
- .data = &sysctl_sched_autogroup_enabled,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
.procname = "sched_cfs_bandwidth_slice_us",
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 04bfd62f5e5c..27b7868b5c30 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -181,5 +181,14 @@ config HIGH_RES_TIMERS
hardware is not capable then this option only increases
the size of the kernel image.
+config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+ int "Clocksource watchdog maximum allowable skew (in μs)"
+ depends on CLOCKSOURCE_WATCHDOG
+ range 50 1000
+ default 100
+ help
+ Specify the maximum amount of allowable watchdog skew in
+ microseconds before reporting the clocksource to be unstable.
+
endmenu
endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1cf73807b450..95d7ca35bdf2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -107,7 +107,13 @@ static u64 suspend_start;
* This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
* a lower bound for cs->uncertainty_margin values when registering clocks.
*/
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#else
+#define MAX_SKEW_USEC 100
+#endif
+
+#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 17a283ce2b20..2d76c91b85de 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}
+#define MAX_STALLED_JIFFIES 5
+
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();
@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
+ /*
+ * If jiffies update stalled for too long (timekeeper in stop_machine()
+ * or VMEXIT'ed for several msecs), force an update.
+ */
+ if (ts->last_tick_jiffies != jiffies) {
+ ts->stalled_jiffies = 0;
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
+ } else {
+ if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
+ tick_do_update_jiffies64(now);
+ ts->stalled_jiffies = 0;
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
+ }
+ }
+
if (ts->inidle)
ts->got_idle_tick = 1;
}
@@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void)
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
- u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
+ u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
unsigned int seq;
@@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* minimal delta which brings us back to this place
* immediately. Lather, rinse and repeat...
*/
- if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+ if (rcu_needs_cpu() || arch_needs_cpu() ||
irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
@@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* disabled this also looks at the next expiring
* hrtimer.
*/
- next_tmr = get_next_timer_interrupt(basejiff, basemono);
- ts->next_timer = next_tmr;
- /* Take the next rcu event into account */
- next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
+ next_tick = get_next_timer_interrupt(basejiff, basemono);
+ ts->next_timer = next_tick;
}
/*
@@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
__tick_nohz_full_update_tick(ts, ktime_get());
}
+/*
+ * A pending softirq outside an IRQ (or softirq disabled section) context
+ * should be waiting for ksoftirqd to handle it. Therefore we shouldn't
+ * reach here due to the need_resched() early check in can_stop_idle_tick().
+ *
+ * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
+ * cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
+ * triggering the below since wakep_softirqd() is ignored.
+ *
+ */
+static bool report_idle_softirq(void)
+{
+ static int ratelimit;
+ unsigned int pending = local_softirq_pending();
+
+ if (likely(!pending))
+ return false;
+
+ /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
+ if (!cpu_active(smp_processor_id())) {
+ pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
+ if (!pending)
+ return false;
+ }
+
+ if (ratelimit < 10)
+ return false;
+
+ /* On RT, softirqs handling may be waiting on some lock */
+ if (!local_bh_blocked())
+ return false;
+
+ pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
+ pending);
+ ratelimit++;
+
+ return true;
+}
+
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
@@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (need_resched())
return false;
- if (unlikely(local_softirq_pending())) {
- static int ratelimit;
-
- if (ratelimit < 10 && !local_bh_blocked() &&
- (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
- pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
- (unsigned int) local_softirq_pending());
- ratelimit++;
- }
+ if (unlikely(report_idle_softirq()))
return false;
- }
if (tick_nohz_full_enabled()) {
/*
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index d952ae393423..504649513399 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @next_timer: Expiry time of next expiring timer for debugging purpose only
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
+ * @last_tick_jiffies: Value of jiffies seen on last tick
+ * @stalled_jiffies: Number of stalled jiffies detected across ticks
*/
struct tick_sched {
struct hrtimer sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
atomic_t tick_dep_mask;
+ unsigned long last_tick_jiffies;
+ unsigned int stalled_jiffies;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/kernel/torture.c b/kernel/torture.c
index ef27a6c82451..789aeb0e1159 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -911,7 +911,7 @@ void torture_kthread_stopping(char *title)
{
char buf[128];
- snprintf(buf, sizeof(buf), "Stopping %s", title);
+ snprintf(buf, sizeof(buf), "%s is stopping", title);
VERBOSE_TOROUT_STRING(buf);
while (!kthread_should_stop()) {
torture_shutdown_absorb(title);
@@ -931,12 +931,14 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
int ret = 0;
VERBOSE_TOROUT_STRING(m);
- *tp = kthread_run(fn, arg, "%s", s);
+ *tp = kthread_create(fn, arg, "%s", s);
if (IS_ERR(*tp)) {
ret = PTR_ERR(*tp);
TOROUT_ERRSTRING(f);
*tp = NULL;
+ return ret;
}
+ wake_up_process(*tp); // Process is sleeping, so ordering provided.
torture_shuffle_task_register(*tp);
return ret;
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index af68a67179b4..21dea90eaa93 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -310,10 +310,20 @@ record_it:
local_irq_restore(flags);
}
-static void blk_trace_free(struct blk_trace *bt)
+static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
{
relay_close(bt->rchan);
- debugfs_remove(bt->dir);
+
+ /*
+ * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
+ * under 'q->debugfs_dir', thus lookup and remove them.
+ */
+ if (!bt->dir) {
+ debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir));
+ debugfs_remove(debugfs_lookup("msg", q->debugfs_dir));
+ } else {
+ debugfs_remove(bt->dir);
+ }
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -335,10 +345,10 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
-static void blk_trace_cleanup(struct blk_trace *bt)
+static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
put_probe_ref();
}
@@ -352,7 +362,7 @@ static int __blk_trace_remove(struct request_queue *q)
return -EINVAL;
if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -572,7 +582,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = 0;
err:
if (ret)
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
@@ -1616,7 +1626,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
put_probe_ref();
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return 0;
}
@@ -1647,7 +1657,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
return 0;
free_bt:
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 22061d38fc00..19028e072cdb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -415,7 +415,9 @@ free:
static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
unsigned long long timestamp;
int index;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4e29bd1cf151..e59d80c9afd7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7193,7 +7193,6 @@ static int __init ftrace_nodyn_init(void)
core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
-static inline void ftrace_startup_enable(int command) { }
static inline void ftrace_startup_all(int command) { }
# define ftrace_startup_sysctl() do { } while (0)
@@ -7349,7 +7348,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
@@ -7793,7 +7794,7 @@ int ftrace_is_dead(void)
/**
* register_ftrace_function - register a function for profiling
- * @ops - ops structure that holds the function for profiling.
+ * @ops: ops structure that holds the function for profiling.
*
* Register a function to be called by all functions in the
* kernel.
@@ -7820,7 +7821,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_function);
/**
* unregister_ftrace_function - unregister a function for profiling.
- * @ops - ops structure that holds the function to unregister
+ * @ops: ops structure that holds the function to unregister
*
* Unregister a function that was added to be called by ftrace profiling.
*/
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index eaf7d30ca6f1..08ea781540b5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -245,7 +245,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
static int __init set_trace_boot_options(char *str)
{
strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
- return 0;
+ return 1;
}
__setup("trace_options=", set_trace_boot_options);
@@ -256,7 +256,7 @@ static int __init set_trace_boot_clock(char *str)
{
strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
trace_boot_clock = trace_boot_clock_buf;
- return 0;
+ return 1;
}
__setup("trace_clock=", set_trace_boot_clock);
@@ -1484,10 +1484,12 @@ static int __init set_buf_size(char *str)
if (!str)
return 0;
buf_size = memparse(str, &str);
- /* nr_entries can not be zero */
- if (buf_size == 0)
- return 0;
- trace_buf_size = buf_size;
+ /*
+ * nr_entries can not be zero and the startup
+ * tests require some buffer space. Therefore
+ * ensure we have at least 4096 bytes of buffer.
+ */
+ trace_buf_size = max(4096UL, buf_size);
return 1;
}
__setup("trace_buf_size=", set_buf_size);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0f5e22238cd2..07d990270e2a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -136,7 +136,6 @@ struct kprobe_trace_entry_head {
struct eprobe_trace_entry_head {
struct trace_entry ent;
- unsigned int type;
};
struct kretprobe_trace_entry_head {
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 191db32dec46..541aa13581b9 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -242,7 +242,6 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
static int eprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret;
struct eprobe_trace_entry_head field;
struct trace_probe *tp;
@@ -250,8 +249,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
if (WARN_ON_ONCE(!tp))
return -ENOENT;
- DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
-
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
@@ -270,7 +267,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
struct trace_event_call *pevent;
struct trace_event *probed_event;
struct trace_seq *s = &iter->seq;
+ struct trace_eprobe *ep;
struct trace_probe *tp;
+ unsigned int type;
field = (struct eprobe_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
@@ -278,15 +277,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
if (WARN_ON_ONCE(!tp))
goto out;
+ ep = container_of(tp, struct trace_eprobe, tp);
+ type = ep->event->event.type;
+
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- probed_event = ftrace_find_event(field->type);
+ probed_event = ftrace_find_event(type);
if (probed_event) {
pevent = container_of(probed_event, struct trace_event_call, event);
trace_seq_printf(s, "%s.%s", pevent->class->system,
trace_event_name(pevent));
} else {
- trace_seq_printf(s, "%u", field->type);
+ trace_seq_printf(s, "%u", type);
}
trace_seq_putc(s, ')');
@@ -498,10 +500,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
- if (edata->ep->event)
- entry->type = edata->ep->event->event.type;
- else
- entry->type = 0;
store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0d91152172c9..e11e167b7809 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -392,6 +392,12 @@ static void test_event_printk(struct trace_event_call *call)
if (!(dereference_flags & (1ULL << arg)))
goto next_arg;
+ /* Check for __get_sockaddr */;
+ if (str_has_prefix(fmt + i, "__get_sockaddr(")) {
+ dereference_flags &= ~(1ULL << arg);
+ goto next_arg;
+ }
+
/* Find the REC-> in the argument */
c = strchr(fmt + i, ',');
r = strstr(fmt + i, "REC->");
@@ -767,7 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -791,7 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 954b19e2f196..44db5ba9cabb 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2308,9 +2308,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
/*
* For backward compatibility, if field_name
* was "cpu", then we treat this the same as
- * common_cpu.
+ * common_cpu. This also works for "CPU".
*/
- if (strcmp(field_name, "cpu") == 0) {
+ if (field && field->filter_type == FILTER_CPU) {
*flags |= HIST_FIELD_FL_CPU;
} else {
hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
@@ -4851,7 +4851,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
cmp_fn = tracing_map_cmp_none;
- else if (!field)
+ else if (!field || hist_field->flags & HIST_FIELD_FL_CPU)
cmp_fn = tracing_map_cmp_num(hist_field->size,
hist_field->is_signed);
else if (is_string_field(field))
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d00fee705f9c..7eb9d04f1c2e 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -84,6 +84,20 @@ event_triggers_call(struct trace_event_file *file,
}
EXPORT_SYMBOL_GPL(event_triggers_call);
+bool __trace_trigger_soft_disabled(struct trace_event_file *file)
+{
+ unsigned long eflags = file->flags;
+
+ if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
+ event_triggers_call(file, NULL, NULL, NULL);
+ if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
+ return true;
+ if (eflags & EVENT_FILE_FL_PID_FILTER)
+ return trace_event_ignore_this_pid(file);
+ return false;
+}
+EXPORT_SYMBOL_GPL(__trace_trigger_soft_disabled);
+
/**
* event_triggers_post_call - Call 'post_triggers' for a trace event
* @file: The trace_event_file associated with the event
@@ -1295,6 +1309,16 @@ traceon_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (tracer_tracing_is_on(file->tr))
+ return;
+
+ tracer_tracing_on(file->tr);
+ return;
+ }
+
if (tracing_is_on())
return;
@@ -1306,8 +1330,15 @@ traceon_count_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- if (tracing_is_on())
- return;
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (tracer_tracing_is_on(file->tr))
+ return;
+ } else {
+ if (tracing_is_on())
+ return;
+ }
if (!data->count)
return;
@@ -1315,7 +1346,10 @@ traceon_count_trigger(struct event_trigger_data *data,
if (data->count != -1)
(data->count)--;
- tracing_on();
+ if (file)
+ tracer_tracing_on(file->tr);
+ else
+ tracing_on();
}
static void
@@ -1323,6 +1357,16 @@ traceoff_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (!tracer_tracing_is_on(file->tr))
+ return;
+
+ tracer_tracing_off(file->tr);
+ return;
+ }
+
if (!tracing_is_on())
return;
@@ -1334,8 +1378,15 @@ traceoff_count_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- if (!tracing_is_on())
- return;
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (!tracer_tracing_is_on(file->tr))
+ return;
+ } else {
+ if (!tracing_is_on())
+ return;
+ }
if (!data->count)
return;
@@ -1343,7 +1394,10 @@ traceoff_count_trigger(struct event_trigger_data *data,
if (data->count != -1)
(data->count)--;
- tracing_off();
+ if (file)
+ tracer_tracing_off(file->tr);
+ else
+ tracing_off();
}
static int
@@ -1540,7 +1594,12 @@ stacktrace_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- trace_dump_stack(STACK_SKIP);
+ struct trace_event_file *file = data->private_data;
+
+ if (file)
+ __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP);
+ else
+ trace_dump_stack(STACK_SKIP);
}
static void
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 508f14af4f2c..b62fd785b599 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -32,7 +32,7 @@ static int __init set_kprobe_boot_events(char *str)
strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
disable_tracing_selftest("running kprobe events");
- return 0;
+ return 1;
}
__setup("kprobe_event=", set_kprobe_boot_events);
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 870a08da5b48..e9ae1f33a7f0 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1167,7 +1167,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
* used to record the beginning and to report the end of a thread noise window.
*/
static void
-trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p,
+trace_sched_switch_callback(void *data, bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p,
struct task_struct *n)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
@@ -1387,6 +1389,26 @@ static int run_osnoise(void)
}
/*
+ * In some cases, notably when running on a nohz_full CPU with
+ * a stopped tick PREEMPT_RCU has no way to account for QSs.
+ * This will eventually cause unwarranted noise as PREEMPT_RCU
+ * will force preemption as the means of ending the current
+ * grace period. We avoid this problem by calling
+ * rcu_momentary_dyntick_idle(), which performs a zero duration
+ * EQS allowing PREEMPT_RCU to end the current grace period.
+ * This call shouldn't be wrapped inside an RCU critical
+ * section.
+ *
+ * Note that in non PREEMPT_RCU kernels QSs are handled through
+ * cond_resched()
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+ local_irq_disable();
+ rcu_momentary_dyntick_idle();
+ local_irq_enable();
+ }
+
+ /*
* For the non-preemptive kernel config: let threads runs, if
* they so wish.
*/
@@ -1437,6 +1459,37 @@ static struct cpumask osnoise_cpumask;
static struct cpumask save_cpumask;
/*
+ * osnoise_sleep - sleep until the next period
+ */
+static void osnoise_sleep(void)
+{
+ u64 interval;
+ ktime_t wake_time;
+
+ mutex_lock(&interface_lock);
+ interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
+ mutex_unlock(&interface_lock);
+
+ /*
+ * differently from hwlat_detector, the osnoise tracer can run
+ * without a pause because preemption is on.
+ */
+ if (!interval) {
+ /* Let synchronize_rcu_tasks() make progress */
+ cond_resched_tasks_rcu_qs();
+ return;
+ }
+
+ wake_time = ktime_add_us(ktime_get(), interval);
+ __set_current_state(TASK_INTERRUPTIBLE);
+
+ while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
+ if (kthread_should_stop())
+ break;
+ }
+}
+
+/*
* osnoise_main - The osnoise detection kernel thread
*
* Calls run_osnoise() function to measure the osnoise for the configured runtime,
@@ -1444,30 +1497,10 @@ static struct cpumask save_cpumask;
*/
static int osnoise_main(void *data)
{
- u64 interval;
while (!kthread_should_stop()) {
-
run_osnoise();
-
- mutex_lock(&interface_lock);
- interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
- mutex_unlock(&interface_lock);
-
- do_div(interval, USEC_PER_MSEC);
-
- /*
- * differently from hwlat_detector, the osnoise tracer can run
- * without a pause because preemption is on.
- */
- if (interval < 1) {
- /* Let synchronize_rcu_tasks() make progress */
- cond_resched_tasks_rcu_qs();
- continue;
- }
-
- if (msleep_interruptible(interval))
- break;
+ osnoise_sleep();
}
return 0;
@@ -2189,6 +2222,17 @@ static void osnoise_workload_stop(void)
if (osnoise_has_registered_instances())
return;
+ /*
+ * If callbacks were already disabled in a previous stop
+ * call, there is no need to disable then again.
+ *
+ * For instance, this happens when tracing is stopped via:
+ * echo 0 > tracing_on
+ * echo nop > current_tracer.
+ */
+ if (!trace_osnoise_callback_enabled)
+ return;
+
trace_osnoise_callback_enabled = false;
/*
* Make sure that ftrace_nmi_enter/exit() see
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 73d90179b51b..80863c6508e5 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
switch (ptype) {
case PROBE_PRINT_NORMAL:
fmt = "(%lx)";
- arg = "REC->" FIELD_STRING_IP;
+ arg = ", REC->" FIELD_STRING_IP;
break;
case PROBE_PRINT_RETURN:
fmt = "(%lx <- %lx)";
- arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+ arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
break;
case PROBE_PRINT_EVENT:
- fmt = "(%u)";
- arg = "REC->" FIELD_STRING_TYPE;
+ fmt = "";
+ arg = "";
break;
default:
WARN_ON_ONCE(1);
@@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
parg->type->fmt);
}
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
for (i = 0; i < tp->nr_args; i++) {
parg = tp->args + i;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 99e7a5df025e..92cc149af0fd 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -38,7 +38,6 @@
#define FIELD_STRING_IP "__probe_ip"
#define FIELD_STRING_RETIP "__probe_ret_ip"
#define FIELD_STRING_FUNC "__probe_func"
-#define FIELD_STRING_TYPE "__probe_type"
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 5cd33be2031b..45796d8bd4b2 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -22,6 +22,7 @@ static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
int flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 2402de520eca..46429f9a96fa 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -426,6 +426,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index afd937a46496..abcadbe933bb 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -784,9 +784,7 @@ static struct fgraph_ops fgraph_ops __initdata = {
.retfunc = &trace_graph_return,
};
-#if defined(CONFIG_DYNAMIC_FTRACE) && \
- defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
-#define TEST_DIRECT_TRAMP
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
noinline __noclone static void trace_direct_tramp(void) { }
#endif
@@ -849,7 +847,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
goto out;
}
-#ifdef TEST_DIRECT_TRAMP
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
tracing_reset_online_cpus(&tr->array_buffer);
set_graph_array(tr);
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 7b32c356ebc5..06ea04d44685 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
kfree(new);
} else {
hlist_add_head(&new->node, hashent);
+ get_user_ns(new->ns);
spin_unlock_irq(&ucounts_lock);
return new;
}
@@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts)
if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
hlist_del_init(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
+ put_user_ns(ucounts->ns);
kfree(ucounts);
}
}
@@ -348,7 +350,8 @@ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsign
if (rlimit > LONG_MAX)
max = LONG_MAX;
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
- if (get_ucounts_value(iter, type) > max)
+ long val = get_ucounts_value(iter, type);
+ if (val < 0 || val > max)
return true;
max = READ_ONCE(iter->ns->ucount_max[type]);
}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b2e3ca7ee99..5481ba44a8d6 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -58,6 +58,18 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
cred->user_ns = user_ns;
}
+static unsigned long enforced_nproc_rlimit(void)
+{
+ unsigned long limit = RLIM_INFINITY;
+
+ /* Is RLIMIT_NPROC currently enforced? */
+ if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
+ (current_user_ns() != &init_user_ns))
+ limit = rlimit(RLIMIT_NPROC);
+
+ return limit;
+}
+
/*
* Create a new user namespace, deriving the creator from the user in the
* passed credentials, and replacing that user with the new root user for the
@@ -122,7 +134,7 @@ int create_user_ns(struct cred *new)
for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
ns->ucount_max[i] = INT_MAX;
}
- set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC));
+ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 9c9eb20dd2c5..3990e4df3d7b 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
bit += page->index;
set_bit(bit, wqueue->notes_bitmap);
+ generic_pipe_buf_release(pipe, buf);
}
// No try_steal function => no stealing
@@ -112,7 +113,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
buf->offset = offset;
buf->len = len;
buf->flags = PIPE_BUF_FLAG_WHOLE;
- pipe->head = head + 1;
+ smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
spin_unlock_irq(&pipe->rd_wait.lock);
@@ -219,7 +220,6 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
struct page **pages;
unsigned long *bitmap;
unsigned long user_bufs;
- unsigned int bmsize;
int ret, i, nr_pages;
if (!wqueue)
@@ -243,7 +243,8 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
goto error;
}
- ret = pipe_resize_ring(pipe, nr_notes);
+ nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+ ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
if (ret < 0)
goto error;
@@ -258,21 +259,19 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE;
}
- bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG;
- bmsize *= sizeof(unsigned long);
- bitmap = kmalloc(bmsize, GFP_KERNEL);
+ bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);
if (!bitmap)
goto error_p;
- memset(bitmap, 0xff, bmsize);
+ bitmap_fill(bitmap, nr_notes);
wqueue->notes = pages;
wqueue->notes_bitmap = bitmap;
wqueue->nr_pages = nr_pages;
- wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+ wqueue->nr_notes = nr_notes;
return 0;
error_p:
- for (i = 0; i < nr_pages; i++)
+ while (--i >= 0)
__free_page(pages[i]);
kfree(pages);
error:
@@ -320,7 +319,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
tf[i].info_mask & WATCH_INFO_LENGTH)
goto err_filter;
/* Ignore any unknown types */
- if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
+ if (tf[i].type >= WATCH_TYPE__NR)
continue;
nr_filter++;
}
@@ -336,7 +335,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
q = wfilter->filters;
for (i = 0; i < filter.nr_filters; i++) {
- if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
+ if (tf[i].type >= WATCH_TYPE__NR)
continue;
q->type = tf[i].type;
@@ -371,6 +370,7 @@ static void __put_watch_queue(struct kref *kref)
for (i = 0; i < wqueue->nr_pages; i++)
__free_page(wqueue->notes[i]);
+ bitmap_free(wqueue->notes_bitmap);
wfilter = rcu_access_pointer(wqueue->filter);
if (wfilter)
@@ -395,6 +395,7 @@ static void free_watch(struct rcu_head *rcu)
put_watch_queue(rcu_access_pointer(watch->queue));
atomic_dec(&watch->cred->user->nr_watches);
put_cred(watch->cred);
+ kfree(watch);
}
static void __put_watch(struct kref *kref)
@@ -566,7 +567,7 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock();
spin_lock_bh(&wqueue->lock);
- /* Prevent new additions and prevent notifications from happening */
+ /* Prevent new notifications from being stored. */
wqueue->defunct = true;
while (!hlist_empty(&wqueue->watches)) {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 99afb88d2e85..9166220457bc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -848,7 +848,7 @@ void __init lockup_detector_init(void)
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask,
- housekeeping_cpumask(HK_FLAG_TIMER));
+ housekeeping_cpumask(HK_TYPE_TIMER));
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 33f1106b4f99..52e9abbb7759 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -6006,13 +6006,13 @@ static void __init wq_numa_init(void)
void __init workqueue_init_early(void)
{
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
- int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
int i, cpu;
BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
- cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
+ cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
+ cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
diff --git a/lib/Kconfig b/lib/Kconfig
index c80fde816a7e..9b5a692ce00c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -45,7 +45,6 @@ config BITREVERSE
config HAVE_ARCH_BITREVERSE
bool
default n
- depends on BITREVERSE
help
This option enables the use of hardware bit-reversal instructions on
architectures which support such operations.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 14b89aa37c5c..a5556ab05240 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2214,9 +2214,6 @@ config TEST_UUID
config TEST_XARRAY
tristate "Test the XArray code at runtime"
-config TEST_OVERFLOW
- tristate "Test check_*_overflow() functions at runtime"
-
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
help
@@ -2501,6 +2498,30 @@ config MEMCPY_KUNIT_TEST
If unsure, say N.
+config OVERFLOW_KUNIT_TEST
+ tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Builds unit tests for the check_*_overflow(), size_*(), allocation, and
+ related functions.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
+config STACKINIT_KUNIT_TEST
+ tristate "Test level of stack variable initialization" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Test if the kernel is zero-initializing stack variables and
+ padding. Coverage is controlled by compiler flags,
+ CONFIG_INIT_STACK_ALL_PATTERN, CONFIG_INIT_STACK_ALL_ZERO,
+ CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
+ or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
+
config TEST_UDELAY
tristate "udelay test driver"
help
@@ -2592,17 +2613,6 @@ config TEST_OBJAGG
Enable this option to test object aggregation manager on boot
(or module load).
-
-config TEST_STACKINIT
- tristate "Test level of stack variable initialization"
- help
- Test if the kernel is zero-initializing stack variables and
- padding. Coverage is controlled by compiler flags,
- CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
- or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
-
- If unsure, say N.
-
config TEST_MEMINIT
tristate "Test heap/page initialization"
help
diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence
index 912f252a41fc..459dda9ef619 100644
--- a/lib/Kconfig.kfence
+++ b/lib/Kconfig.kfence
@@ -45,6 +45,18 @@ config KFENCE_NUM_OBJECTS
pages are required; with one containing the object and two adjacent
ones used as guard pages.
+config KFENCE_DEFERRABLE
+ bool "Use a deferrable timer to trigger allocations"
+ help
+ Use a deferrable timer to trigger allocations. This avoids forcing
+ CPU wake-ups if the system is idle, at the risk of a less predictable
+ sample interval.
+
+ Warning: The KUnit test suite fails with this option enabled - due to
+ the unpredictability of the sample interval!
+
+ Say N if you are unsure.
+
config KFENCE_STATIC_KEYS
bool "Use static keys to set up allocations" if EXPERT
depends on JUMP_LABEL
diff --git a/lib/Makefile b/lib/Makefile
index 300f569c626b..353bc09ce38d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o
obj-$(CONFIG_TEST_LKM) += test_module.o
obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
-obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
obj-$(CONFIG_TEST_SORT) += test_sort.o
obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
@@ -94,8 +93,6 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
-CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable)
-obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
@@ -363,6 +360,9 @@ obj-$(CONFIG_BITS_TEST) += test_bits.o
obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
+obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
+CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
+obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/crc32.c b/lib/crc32.c
index 2a68dfd3b96c..5649847d0a8d 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -194,13 +194,11 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
#else
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
- return crc32_le_generic(crc, p, len,
- (const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
+ return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
}
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
- return crc32_le_generic(crc, p, len,
- (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
+ return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
}
#endif
EXPORT_SYMBOL(crc32_le);
@@ -208,6 +206,7 @@ EXPORT_SYMBOL(__crc32c_le);
u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
/*
* This multiplies the polynomials x and y modulo the given modulus.
@@ -332,15 +331,14 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
}
#if CRC_BE_BITS == 1
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
}
#else
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
{
- return crc32_be_generic(crc, p, len,
- (const u32 (*)[256])crc32table_be, CRC32_POLY_BE);
+ return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
}
#endif
EXPORT_SYMBOL(crc32_be);
diff --git a/lib/crc32test.c b/lib/crc32test.c
index 61ddce2cff77..9b4af79412c4 100644
--- a/lib/crc32test.c
+++ b/lib/crc32test.c
@@ -675,7 +675,7 @@ static int __init crc32c_test(void)
/* pre-warm the cache */
for (i = 0; i < 100; i++) {
- bytes += 2*test[i].length;
+ bytes += test[i].length;
crc ^= __crc32c_le(test[i].crc, test_buf +
test[i].start, test[i].length);
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index e8e525650cf2..379a66d7f504 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -123,6 +123,9 @@ config CRYPTO_LIB_CHACHA20POLY1305
config CRYPTO_LIB_SHA256
tristate
+config CRYPTO_LIB_SM3
+ tristate
+
config CRYPTO_LIB_SM4
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index ed43a41f2dcc..6c872d05d1e6 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -37,6 +37,9 @@ libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
+obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o
+libsm3-y := sm3.o
+
obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
libsm4-y := sm4.o
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 9364f79937b8..c71c09621c09 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -18,14 +18,14 @@
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
{
- __blake2s_update(state, in, inlen, blake2s_compress);
+ __blake2s_update(state, in, inlen, false);
}
EXPORT_SYMBOL(blake2s_update);
void blake2s_final(struct blake2s_state *state, u8 *out)
{
WARN_ON(IS_ENABLED(DEBUG) && !out);
- __blake2s_final(state, out, blake2s_compress);
+ __blake2s_final(state, out, false);
memzero_explicit(state, sizeof(*state));
}
EXPORT_SYMBOL(blake2s_final);
diff --git a/lib/crypto/sm3.c b/lib/crypto/sm3.c
new file mode 100644
index 000000000000..d473e358a873
--- /dev/null
+++ b/lib/crypto/sm3.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described
+ * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm3.h>
+
+static const u32 ____cacheline_aligned K[64] = {
+ 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+ 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+ 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+ 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+ 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+ 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+ 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+ 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+ 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+ 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+ 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+ 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+ 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+ 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+ 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+ 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+};
+
+/*
+ * Transform the message X which consists of 16 32-bit-words. See
+ * GM/T 004-2012 for details.
+ */
+#define R(i, a, b, c, d, e, f, g, h, t, w1, w2) \
+ do { \
+ ss1 = rol32((rol32((a), 12) + (e) + (t)), 7); \
+ ss2 = ss1 ^ rol32((a), 12); \
+ d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2)); \
+ h += GG ## i(e, f, g) + ss1 + (w1); \
+ b = rol32((b), 9); \
+ f = rol32((f), 19); \
+ h = P0((h)); \
+ } while (0)
+
+#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \
+ R(1, a, b, c, d, e, f, g, h, t, w1, w2)
+#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \
+ R(2, a, b, c, d, e, f, g, h, t, w1, w2)
+
+#define FF1(x, y, z) (x ^ y ^ z)
+#define FF2(x, y, z) ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z) FF1(x, y, z)
+#define GG2(x, y, z) ((x & y) | (~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17))
+#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23))
+#define I(i) (W[i] = get_unaligned_be32(data + i * 4))
+#define W1(i) (W[i & 0x0f])
+#define W2(i) (W[i & 0x0f] = \
+ P1(W[i & 0x0f] \
+ ^ W[(i-9) & 0x0f] \
+ ^ rol32(W[(i-3) & 0x0f], 15)) \
+ ^ rol32(W[(i-13) & 0x0f], 7) \
+ ^ W[(i-6) & 0x0f])
+
+static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16])
+{
+ u32 a, b, c, d, e, f, g, h, ss1, ss2;
+
+ a = sctx->state[0];
+ b = sctx->state[1];
+ c = sctx->state[2];
+ d = sctx->state[3];
+ e = sctx->state[4];
+ f = sctx->state[5];
+ g = sctx->state[6];
+ h = sctx->state[7];
+
+ R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+ R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+ R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+ R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+ R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+ R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+ R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+ R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+ R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+ R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+ R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+ R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+ R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+ R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+ R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+ R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+ R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+ R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+ R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+ R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+ R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+ R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+ R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+ R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+ R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+ R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+ R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+ R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+ R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+ R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+ R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+ R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+ R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+ R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+ R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+ R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+ R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+ R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+ R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+ R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+ R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+ R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+ R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+ R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+ R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+ R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+ R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+ R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+ R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+ R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+ R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+ R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+ R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+ R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+ R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+ R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+ R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+ R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+ R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+ R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+ R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+ R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+ R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+ R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+ sctx->state[0] ^= a;
+ sctx->state[1] ^= b;
+ sctx->state[2] ^= c;
+ sctx->state[3] ^= d;
+ sctx->state[4] ^= e;
+ sctx->state[5] ^= f;
+ sctx->state[6] ^= g;
+ sctx->state[7] ^= h;
+}
+#undef R
+#undef R1
+#undef R2
+#undef I
+#undef W1
+#undef W2
+
+static inline void sm3_block(struct sm3_state *sctx,
+ u8 const *data, int blocks, u32 W[16])
+{
+ while (blocks--) {
+ sm3_transform(sctx, data, W);
+ data += SM3_BLOCK_SIZE;
+ }
+}
+
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len)
+{
+ unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+ u32 W[16];
+
+ sctx->count += len;
+
+ if ((partial + len) >= SM3_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SM3_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buffer + partial, data, p);
+ data += p;
+ len -= p;
+
+ sm3_block(sctx, sctx->buffer, 1, W);
+ }
+
+ blocks = len / SM3_BLOCK_SIZE;
+ len %= SM3_BLOCK_SIZE;
+
+ if (blocks) {
+ sm3_block(sctx, data, blocks, W);
+ data += blocks * SM3_BLOCK_SIZE;
+ }
+
+ memzero_explicit(W, sizeof(W));
+
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buffer + partial, data, len);
+}
+EXPORT_SYMBOL_GPL(sm3_update);
+
+void sm3_final(struct sm3_state *sctx, u8 *out)
+{
+ const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64);
+ __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
+ __be32 *digest = (__be32 *)out;
+ unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+ u32 W[16];
+ int i;
+
+ sctx->buffer[partial++] = 0x80;
+ if (partial > bit_offset) {
+ memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial);
+ partial = 0;
+
+ sm3_block(sctx, sctx->buffer, 1, W);
+ }
+
+ memset(sctx->buffer + partial, 0, bit_offset - partial);
+ *bits = cpu_to_be64(sctx->count << 3);
+ sm3_block(sctx, sctx->buffer, 1, W);
+
+ for (i = 0; i < 8; i++)
+ put_unaligned_be32(sctx->state[i], digest++);
+
+ /* Zeroize sensitive information. */
+ memzero_explicit(W, sizeof(W));
+ memzero_explicit(sctx, sizeof(*sctx));
+}
+EXPORT_SYMBOL_GPL(sm3_final);
+
+MODULE_DESCRIPTION("Generic SM3 library");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index b0e0acdf96c1..6dd5330f7a99 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -414,6 +414,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
return 0;
buf->ops = &page_cache_pipe_buf_ops;
+ buf->flags = 0;
get_page(page);
buf->page = page;
buf->offset = offset;
@@ -577,6 +578,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size,
break;
buf->ops = &default_pipe_buf_ops;
+ buf->flags = 0;
buf->page = page;
buf->offset = 0;
buf->len = min_t(ssize_t, left, PAGE_SIZE);
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index be38a2c5ecc2..f7825991d576 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -52,7 +52,7 @@ static unsigned long kunit_test_timeout(void)
* If tests timeout due to exceeding sysctl_hung_task_timeout_secs,
* the task will be killed and an oops generated.
*/
- return 300 * MSEC_PER_SEC; /* 5 min */
+ return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
}
void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
@@ -78,6 +78,7 @@ void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
if (time_remaining == 0) {
kunit_err(test, "try timed out\n");
try_catch->try_result = -ETIMEDOUT;
+ kthread_stop(task_struct);
}
exit_code = try_catch->try_result;
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index 142b680835df..070ba784c9f1 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -242,6 +242,7 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
}
MPN_NORMALIZE(x->d, x->nlimbs);
}
+EXPORT_SYMBOL_GPL(mpi_rshift);
/****************
* Shift A by COUNT limbs to the left
diff --git a/lib/test_overflow.c b/lib/overflow_kunit.c
index 7a4b6f6c5473..475f0c064bf6 100644
--- a/lib/test_overflow.c
+++ b/lib/overflow_kunit.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Test cases for arithmetic overflow checks.
+ * Test cases for arithmetic overflow checks. See:
+ * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests
+ * ./tools/testing/kunit/kunit.py run overflow [--raw_output]
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <kunit/test.h>
#include <linux/device.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -19,7 +21,7 @@
t a, b; \
t sum, diff, prod; \
bool s_of, d_of, p_of; \
- } t ## _tests[] __initconst
+ } t ## _tests[]
DEFINE_TEST_ARRAY(u8) = {
{0, 0, 0, 0, 0, false, false, false},
@@ -220,43 +222,31 @@ DEFINE_TEST_ARRAY(s64) = {
bool _of; \
\
_of = check_ ## op ## _overflow(a, b, &_r); \
- if (_of != of) { \
- pr_warn("expected "fmt" "sym" "fmt \
- " to%s overflow (type %s)\n", \
- a, b, of ? "" : " not", #t); \
- err = 1; \
- } \
- if (_r != r) { \
- pr_warn("expected "fmt" "sym" "fmt" == " \
- fmt", got "fmt" (type %s)\n", \
- a, b, r, _r, #t); \
- err = 1; \
- } \
+ KUNIT_EXPECT_EQ_MSG(test, _of, of, \
+ "expected "fmt" "sym" "fmt" to%s overflow (type %s)\n", \
+ a, b, of ? "" : " not", #t); \
+ KUNIT_EXPECT_EQ_MSG(test, _r, r, \
+ "expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \
+ a, b, r, _r, #t); \
} while (0)
#define DEFINE_TEST_FUNC(t, fmt) \
-static int __init do_test_ ## t(const struct test_ ## t *p) \
+static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \
{ \
- int err = 0; \
- \
check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \
check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \
check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \
check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of); \
check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \
- \
- return err; \
} \
\
-static int __init test_ ## t ## _overflow(void) { \
- int err = 0; \
+static void t ## _overflow_test(struct kunit *test) { \
unsigned i; \
\
- pr_info("%-3s: %zu arithmetic tests\n", #t, \
- ARRAY_SIZE(t ## _tests)); \
for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \
- err |= do_test_ ## t(&t ## _tests[i]); \
- return err; \
+ do_test_ ## t(test, &t ## _tests[i]); \
+ kunit_info(test, "%zu %s arithmetic tests finished\n", \
+ ARRAY_SIZE(t ## _tests), #t); \
}
DEFINE_TEST_FUNC(u8, "%d");
@@ -270,199 +260,176 @@ DEFINE_TEST_FUNC(u64, "%llu");
DEFINE_TEST_FUNC(s64, "%lld");
#endif
-static int __init test_overflow_calculation(void)
-{
- int err = 0;
-
- err |= test_u8_overflow();
- err |= test_s8_overflow();
- err |= test_u16_overflow();
- err |= test_s16_overflow();
- err |= test_u32_overflow();
- err |= test_s32_overflow();
-#if BITS_PER_LONG == 64
- err |= test_u64_overflow();
- err |= test_s64_overflow();
-#endif
-
- return err;
-}
-
-static int __init test_overflow_shift(void)
+static void overflow_shift_test(struct kunit *test)
{
- int err = 0;
+ int count = 0;
/* Args are: value, shift, type, expected result, overflow expected */
-#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \
- int __failed = 0; \
+#define TEST_ONE_SHIFT(a, s, t, expect, of) do { \
typeof(a) __a = (a); \
typeof(s) __s = (s); \
t __e = (expect); \
t __d; \
bool __of = check_shl_overflow(__a, __s, &__d); \
if (__of != of) { \
- pr_warn("expected (%s)(%s << %s) to%s overflow\n", \
+ KUNIT_EXPECT_EQ_MSG(test, __of, of, \
+ "expected (%s)(%s << %s) to%s overflow\n", \
#t, #a, #s, of ? "" : " not"); \
- __failed = 1; \
} else if (!__of && __d != __e) { \
- pr_warn("expected (%s)(%s << %s) == %s\n", \
+ KUNIT_EXPECT_EQ_MSG(test, __d, __e, \
+ "expected (%s)(%s << %s) == %s\n", \
#t, #a, #s, #expect); \
if ((t)-1 < 0) \
- pr_warn("got %lld\n", (s64)__d); \
+ kunit_info(test, "got %lld\n", (s64)__d); \
else \
- pr_warn("got %llu\n", (u64)__d); \
- __failed = 1; \
+ kunit_info(test, "got %llu\n", (u64)__d); \
} \
- if (!__failed) \
- pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \
- of ? "overflow" : #expect); \
- __failed; \
-})
+ count++; \
+} while (0)
/* Sane shifts. */
- err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
- err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
- err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
- err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
- err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
- err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
- err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
- err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
- err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
- err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
- err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
- err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
- err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
- err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
- err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
- err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
- err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
- err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
- err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
- err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
- err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
- err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
- err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
- err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
- err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
- err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64,
- 0xFFFFFFFFULL << 32, false);
+ TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
+ TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
+ TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
+ TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
+ TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
+ TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
+ TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
+ TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
+ TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
+ TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
+ TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
+ TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
+ TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
+ TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
+ TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
+ TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
+ TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
+ TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
+ TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
+ TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
+ TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
+ TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
+ TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
+ TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
+ TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
+ TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, 0xFFFFFFFFULL << 32, false);
/* Sane shift: start and end with 0, without a too-wide shift. */
- err |= TEST_ONE_SHIFT(0, 7, u8, 0, false);
- err |= TEST_ONE_SHIFT(0, 15, u16, 0, false);
- err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
- err |= TEST_ONE_SHIFT(0, 31, u32, 0, false);
- err |= TEST_ONE_SHIFT(0, 63, u64, 0, false);
+ TEST_ONE_SHIFT(0, 7, u8, 0, false);
+ TEST_ONE_SHIFT(0, 15, u16, 0, false);
+ TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
+ TEST_ONE_SHIFT(0, 31, u32, 0, false);
+ TEST_ONE_SHIFT(0, 63, u64, 0, false);
/* Sane shift: start and end with 0, without reaching signed bit. */
- err |= TEST_ONE_SHIFT(0, 6, s8, 0, false);
- err |= TEST_ONE_SHIFT(0, 14, s16, 0, false);
- err |= TEST_ONE_SHIFT(0, 30, int, 0, false);
- err |= TEST_ONE_SHIFT(0, 30, s32, 0, false);
- err |= TEST_ONE_SHIFT(0, 62, s64, 0, false);
+ TEST_ONE_SHIFT(0, 6, s8, 0, false);
+ TEST_ONE_SHIFT(0, 14, s16, 0, false);
+ TEST_ONE_SHIFT(0, 30, int, 0, false);
+ TEST_ONE_SHIFT(0, 30, s32, 0, false);
+ TEST_ONE_SHIFT(0, 62, s64, 0, false);
/* Overflow: shifted the bit off the end. */
- err |= TEST_ONE_SHIFT(1, 8, u8, 0, true);
- err |= TEST_ONE_SHIFT(1, 16, u16, 0, true);
- err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
- err |= TEST_ONE_SHIFT(1, 32, u32, 0, true);
- err |= TEST_ONE_SHIFT(1, 64, u64, 0, true);
+ TEST_ONE_SHIFT(1, 8, u8, 0, true);
+ TEST_ONE_SHIFT(1, 16, u16, 0, true);
+ TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
+ TEST_ONE_SHIFT(1, 32, u32, 0, true);
+ TEST_ONE_SHIFT(1, 64, u64, 0, true);
/* Overflow: shifted into the signed bit. */
- err |= TEST_ONE_SHIFT(1, 7, s8, 0, true);
- err |= TEST_ONE_SHIFT(1, 15, s16, 0, true);
- err |= TEST_ONE_SHIFT(1, 31, int, 0, true);
- err |= TEST_ONE_SHIFT(1, 31, s32, 0, true);
- err |= TEST_ONE_SHIFT(1, 63, s64, 0, true);
+ TEST_ONE_SHIFT(1, 7, s8, 0, true);
+ TEST_ONE_SHIFT(1, 15, s16, 0, true);
+ TEST_ONE_SHIFT(1, 31, int, 0, true);
+ TEST_ONE_SHIFT(1, 31, s32, 0, true);
+ TEST_ONE_SHIFT(1, 63, s64, 0, true);
/* Overflow: high bit falls off unsigned types. */
/* 10010110 */
- err |= TEST_ONE_SHIFT(150, 1, u8, 0, true);
+ TEST_ONE_SHIFT(150, 1, u8, 0, true);
/* 1000100010010110 */
- err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true);
+ TEST_ONE_SHIFT(34966, 1, u16, 0, true);
/* 10000100000010001000100010010110 */
- err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
- err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
+ TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
+ TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
/* 1000001000010000010000000100000010000100000010001000100010010110 */
- err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
+ TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
/* Overflow: bit shifted into signed bit on signed types. */
/* 01001011 */
- err |= TEST_ONE_SHIFT(75, 1, s8, 0, true);
+ TEST_ONE_SHIFT(75, 1, s8, 0, true);
/* 0100010001001011 */
- err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true);
+ TEST_ONE_SHIFT(17483, 1, s16, 0, true);
/* 01000010000001000100010001001011 */
- err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
- err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
+ TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
+ TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
/* 0100000100001000001000000010000001000010000001000100010001001011 */
- err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
+ TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
/* Overflow: bit shifted past signed bit on signed types. */
/* 01001011 */
- err |= TEST_ONE_SHIFT(75, 2, s8, 0, true);
+ TEST_ONE_SHIFT(75, 2, s8, 0, true);
/* 0100010001001011 */
- err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true);
+ TEST_ONE_SHIFT(17483, 2, s16, 0, true);
/* 01000010000001000100010001001011 */
- err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
- err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
+ TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
+ TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
/* 0100000100001000001000000010000001000010000001000100010001001011 */
- err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
+ TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
/* Overflow: values larger than destination type. */
- err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
- err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
- err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
- err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
- err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
- err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
- err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
- err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
- err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
+ TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
+ TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
+ TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
+ TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
+ TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
+ TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
+ TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
+ TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
+ TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
/* Nonsense: negative initial value. */
- err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true);
- err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true);
- err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true);
- err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true);
- err |= TEST_ONE_SHIFT(-10, 0, int, 0, true);
- err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
- err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true);
- err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true);
- err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
- err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
+ TEST_ONE_SHIFT(-1, 0, s8, 0, true);
+ TEST_ONE_SHIFT(-1, 0, u8, 0, true);
+ TEST_ONE_SHIFT(-5, 0, s16, 0, true);
+ TEST_ONE_SHIFT(-5, 0, u16, 0, true);
+ TEST_ONE_SHIFT(-10, 0, int, 0, true);
+ TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
+ TEST_ONE_SHIFT(-100, 0, s32, 0, true);
+ TEST_ONE_SHIFT(-100, 0, u32, 0, true);
+ TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
+ TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
/* Nonsense: negative shift values. */
- err |= TEST_ONE_SHIFT(0, -5, s8, 0, true);
- err |= TEST_ONE_SHIFT(0, -5, u8, 0, true);
- err |= TEST_ONE_SHIFT(0, -10, s16, 0, true);
- err |= TEST_ONE_SHIFT(0, -10, u16, 0, true);
- err |= TEST_ONE_SHIFT(0, -15, int, 0, true);
- err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
- err |= TEST_ONE_SHIFT(0, -20, s32, 0, true);
- err |= TEST_ONE_SHIFT(0, -20, u32, 0, true);
- err |= TEST_ONE_SHIFT(0, -30, s64, 0, true);
- err |= TEST_ONE_SHIFT(0, -30, u64, 0, true);
+ TEST_ONE_SHIFT(0, -5, s8, 0, true);
+ TEST_ONE_SHIFT(0, -5, u8, 0, true);
+ TEST_ONE_SHIFT(0, -10, s16, 0, true);
+ TEST_ONE_SHIFT(0, -10, u16, 0, true);
+ TEST_ONE_SHIFT(0, -15, int, 0, true);
+ TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
+ TEST_ONE_SHIFT(0, -20, s32, 0, true);
+ TEST_ONE_SHIFT(0, -20, u32, 0, true);
+ TEST_ONE_SHIFT(0, -30, s64, 0, true);
+ TEST_ONE_SHIFT(0, -30, u64, 0, true);
/* Overflow: shifted at or beyond entire type's bit width. */
- err |= TEST_ONE_SHIFT(0, 8, u8, 0, true);
- err |= TEST_ONE_SHIFT(0, 9, u8, 0, true);
- err |= TEST_ONE_SHIFT(0, 8, s8, 0, true);
- err |= TEST_ONE_SHIFT(0, 9, s8, 0, true);
- err |= TEST_ONE_SHIFT(0, 16, u16, 0, true);
- err |= TEST_ONE_SHIFT(0, 17, u16, 0, true);
- err |= TEST_ONE_SHIFT(0, 16, s16, 0, true);
- err |= TEST_ONE_SHIFT(0, 17, s16, 0, true);
- err |= TEST_ONE_SHIFT(0, 32, u32, 0, true);
- err |= TEST_ONE_SHIFT(0, 33, u32, 0, true);
- err |= TEST_ONE_SHIFT(0, 32, int, 0, true);
- err |= TEST_ONE_SHIFT(0, 33, int, 0, true);
- err |= TEST_ONE_SHIFT(0, 32, s32, 0, true);
- err |= TEST_ONE_SHIFT(0, 33, s32, 0, true);
- err |= TEST_ONE_SHIFT(0, 64, u64, 0, true);
- err |= TEST_ONE_SHIFT(0, 65, u64, 0, true);
- err |= TEST_ONE_SHIFT(0, 64, s64, 0, true);
- err |= TEST_ONE_SHIFT(0, 65, s64, 0, true);
+ TEST_ONE_SHIFT(0, 8, u8, 0, true);
+ TEST_ONE_SHIFT(0, 9, u8, 0, true);
+ TEST_ONE_SHIFT(0, 8, s8, 0, true);
+ TEST_ONE_SHIFT(0, 9, s8, 0, true);
+ TEST_ONE_SHIFT(0, 16, u16, 0, true);
+ TEST_ONE_SHIFT(0, 17, u16, 0, true);
+ TEST_ONE_SHIFT(0, 16, s16, 0, true);
+ TEST_ONE_SHIFT(0, 17, s16, 0, true);
+ TEST_ONE_SHIFT(0, 32, u32, 0, true);
+ TEST_ONE_SHIFT(0, 33, u32, 0, true);
+ TEST_ONE_SHIFT(0, 32, int, 0, true);
+ TEST_ONE_SHIFT(0, 33, int, 0, true);
+ TEST_ONE_SHIFT(0, 32, s32, 0, true);
+ TEST_ONE_SHIFT(0, 33, s32, 0, true);
+ TEST_ONE_SHIFT(0, 64, u64, 0, true);
+ TEST_ONE_SHIFT(0, 65, u64, 0, true);
+ TEST_ONE_SHIFT(0, 64, s64, 0, true);
+ TEST_ONE_SHIFT(0, 65, s64, 0, true);
/*
* Corner case: for unsigned types, we fail when we've shifted
@@ -473,13 +440,14 @@ static int __init test_overflow_shift(void)
* signed bit). So, for now, we will test this condition but
* mark it as not expected to overflow.
*/
- err |= TEST_ONE_SHIFT(0, 7, s8, 0, false);
- err |= TEST_ONE_SHIFT(0, 15, s16, 0, false);
- err |= TEST_ONE_SHIFT(0, 31, int, 0, false);
- err |= TEST_ONE_SHIFT(0, 31, s32, 0, false);
- err |= TEST_ONE_SHIFT(0, 63, s64, 0, false);
-
- return err;
+ TEST_ONE_SHIFT(0, 7, s8, 0, false);
+ TEST_ONE_SHIFT(0, 15, s16, 0, false);
+ TEST_ONE_SHIFT(0, 31, int, 0, false);
+ TEST_ONE_SHIFT(0, 31, s32, 0, false);
+ TEST_ONE_SHIFT(0, 63, s64, 0, false);
+
+ kunit_info(test, "%d shift tests finished\n", count);
+#undef TEST_ONE_SHIFT
}
/*
@@ -499,7 +467,7 @@ static int __init test_overflow_shift(void)
#define TEST_SIZE (5 * 4096)
#define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\
-static int __init test_ ## func (void *arg) \
+static void test_ ## func (struct kunit *test, void *arg) \
{ \
volatile size_t a = TEST_SIZE; \
volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1; \
@@ -507,31 +475,24 @@ static int __init test_ ## func (void *arg) \
\
/* Tiny allocation test. */ \
ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\
- if (!ptr) { \
- pr_warn(#func " failed regular allocation?!\n"); \
- return 1; \
- } \
+ KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \
+ #func " failed regular allocation?!\n"); \
free ## want_arg (free_func, arg, ptr); \
\
/* Wrapped allocation test. */ \
ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \
a * b); \
- if (!ptr) { \
- pr_warn(#func " unexpectedly failed bad wrapping?!\n"); \
- return 1; \
- } \
+ KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \
+ #func " unexpectedly failed bad wrapping?!\n"); \
free ## want_arg (free_func, arg, ptr); \
\
/* Saturated allocation test. */ \
ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \
array_size(a, b)); \
if (ptr) { \
- pr_warn(#func " missed saturation!\n"); \
+ KUNIT_FAIL(test, #func " missed saturation!\n"); \
free ## want_arg (free_func, arg, ptr); \
- return 1; \
} \
- pr_info(#func " detected saturation\n"); \
- return 0; \
}
/*
@@ -544,10 +505,7 @@ DEFINE_TEST_ALLOC(kmalloc, kfree, 0, 1, 0);
DEFINE_TEST_ALLOC(kmalloc_node, kfree, 0, 1, 1);
DEFINE_TEST_ALLOC(kzalloc, kfree, 0, 1, 0);
DEFINE_TEST_ALLOC(kzalloc_node, kfree, 0, 1, 1);
-DEFINE_TEST_ALLOC(vmalloc, vfree, 0, 0, 0);
-DEFINE_TEST_ALLOC(vmalloc_node, vfree, 0, 0, 1);
-DEFINE_TEST_ALLOC(vzalloc, vfree, 0, 0, 0);
-DEFINE_TEST_ALLOC(vzalloc_node, vfree, 0, 0, 1);
+DEFINE_TEST_ALLOC(__vmalloc, vfree, 0, 1, 0);
DEFINE_TEST_ALLOC(kvmalloc, kvfree, 0, 1, 0);
DEFINE_TEST_ALLOC(kvmalloc_node, kvfree, 0, 1, 1);
DEFINE_TEST_ALLOC(kvzalloc, kvfree, 0, 1, 0);
@@ -555,60 +513,158 @@ DEFINE_TEST_ALLOC(kvzalloc_node, kvfree, 0, 1, 1);
DEFINE_TEST_ALLOC(devm_kmalloc, devm_kfree, 1, 1, 0);
DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0);
-static int __init test_overflow_allocation(void)
+static void overflow_allocation_test(struct kunit *test)
{
const char device_name[] = "overflow-test";
struct device *dev;
- int err = 0;
+ int count = 0;
+
+#define check_allocation_overflow(alloc) do { \
+ count++; \
+ test_ ## alloc(test, dev); \
+} while (0)
/* Create dummy device for devm_kmalloc()-family tests. */
dev = root_device_register(device_name);
- if (IS_ERR(dev)) {
- pr_warn("Cannot register test device\n");
- return 1;
- }
-
- err |= test_kmalloc(NULL);
- err |= test_kmalloc_node(NULL);
- err |= test_kzalloc(NULL);
- err |= test_kzalloc_node(NULL);
- err |= test_kvmalloc(NULL);
- err |= test_kvmalloc_node(NULL);
- err |= test_kvzalloc(NULL);
- err |= test_kvzalloc_node(NULL);
- err |= test_vmalloc(NULL);
- err |= test_vmalloc_node(NULL);
- err |= test_vzalloc(NULL);
- err |= test_vzalloc_node(NULL);
- err |= test_devm_kmalloc(dev);
- err |= test_devm_kzalloc(dev);
+ KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev),
+ "Cannot register test device\n");
+
+ check_allocation_overflow(kmalloc);
+ check_allocation_overflow(kmalloc_node);
+ check_allocation_overflow(kzalloc);
+ check_allocation_overflow(kzalloc_node);
+ check_allocation_overflow(__vmalloc);
+ check_allocation_overflow(kvmalloc);
+ check_allocation_overflow(kvmalloc_node);
+ check_allocation_overflow(kvzalloc);
+ check_allocation_overflow(kvzalloc_node);
+ check_allocation_overflow(devm_kmalloc);
+ check_allocation_overflow(devm_kzalloc);
device_unregister(dev);
- return err;
+ kunit_info(test, "%d allocation overflow tests finished\n", count);
+#undef check_allocation_overflow
}
-static int __init test_module_init(void)
+struct __test_flex_array {
+ unsigned long flags;
+ size_t count;
+ unsigned long data[];
+};
+
+static void overflow_size_helpers_test(struct kunit *test)
{
- int err = 0;
+ /* Make sure struct_size() can be used in a constant expression. */
+ u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)];
+ struct __test_flex_array *obj;
+ int count = 0;
+ int var;
+ volatile int unconst = 0;
+
+ /* Verify constant expression against runtime version. */
+ var = 55;
+ OPTIMIZER_HIDE_VAR(var);
+ KUNIT_EXPECT_EQ(test, sizeof(ce_array), struct_size(obj, data, var));
+
+#define check_one_size_helper(expected, func, args...) do { \
+ size_t _r = func(args); \
+ KUNIT_EXPECT_EQ_MSG(test, _r, expected, \
+ "expected " #func "(" #args ") to return %zu but got %zu instead\n", \
+ (size_t)(expected), _r); \
+ count++; \
+} while (0)
- err |= test_overflow_calculation();
- err |= test_overflow_shift();
- err |= test_overflow_allocation();
+ var = 4;
+ check_one_size_helper(20, size_mul, var++, 5);
+ check_one_size_helper(20, size_mul, 4, var++);
+ check_one_size_helper(0, size_mul, 0, 3);
+ check_one_size_helper(0, size_mul, 3, 0);
+ check_one_size_helper(6, size_mul, 2, 3);
+ check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1);
+ check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3);
+ check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3);
+
+ var = 4;
+ check_one_size_helper(9, size_add, var++, 5);
+ check_one_size_helper(9, size_add, 4, var++);
+ check_one_size_helper(9, size_add, 9, 0);
+ check_one_size_helper(9, size_add, 0, 9);
+ check_one_size_helper(5, size_add, 2, 3);
+ check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1);
+ check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3);
+ check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3);
+
+ var = 4;
+ check_one_size_helper(1, size_sub, var--, 3);
+ check_one_size_helper(1, size_sub, 4, var--);
+ check_one_size_helper(1, size_sub, 3, 2);
+ check_one_size_helper(9, size_sub, 9, 0);
+ check_one_size_helper(SIZE_MAX, size_sub, 9, -3);
+ check_one_size_helper(SIZE_MAX, size_sub, 0, 9);
+ check_one_size_helper(SIZE_MAX, size_sub, 2, 3);
+ check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0);
+ check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10);
+ check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX);
+ check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX);
+ check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1);
+ check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3);
+ check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3);
+
+ var = 4;
+ check_one_size_helper(4 * sizeof(*obj->data),
+ flex_array_size, obj, data, var++);
+ check_one_size_helper(5 * sizeof(*obj->data),
+ flex_array_size, obj, data, var++);
+ check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst);
+ check_one_size_helper(sizeof(*obj->data),
+ flex_array_size, obj, data, 1 + unconst);
+ check_one_size_helper(7 * sizeof(*obj->data),
+ flex_array_size, obj, data, 7 + unconst);
+ check_one_size_helper(SIZE_MAX,
+ flex_array_size, obj, data, -1 + unconst);
+ check_one_size_helper(SIZE_MAX,
+ flex_array_size, obj, data, SIZE_MAX - 4 + unconst);
+
+ var = 4;
+ check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
+ struct_size, obj, data, var++);
+ check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
+ struct_size, obj, data, var++);
+ check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst);
+ check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
+ struct_size, obj, data, 1 + unconst);
+ check_one_size_helper(SIZE_MAX,
+ struct_size, obj, data, -3 + unconst);
+ check_one_size_helper(SIZE_MAX,
+ struct_size, obj, data, SIZE_MAX - 3 + unconst);
+
+ kunit_info(test, "%d overflow size helper tests finished\n", count);
+#undef check_one_size_helper
+}
- if (err) {
- pr_warn("FAIL!\n");
- err = -EINVAL;
- } else {
- pr_info("all tests passed\n");
- }
+static struct kunit_case overflow_test_cases[] = {
+ KUNIT_CASE(u8_overflow_test),
+ KUNIT_CASE(s8_overflow_test),
+ KUNIT_CASE(u16_overflow_test),
+ KUNIT_CASE(s16_overflow_test),
+ KUNIT_CASE(u32_overflow_test),
+ KUNIT_CASE(s32_overflow_test),
+#if BITS_PER_LONG == 64
+ KUNIT_CASE(u64_overflow_test),
+ KUNIT_CASE(s64_overflow_test),
+#endif
+ KUNIT_CASE(overflow_shift_test),
+ KUNIT_CASE(overflow_allocation_test),
+ KUNIT_CASE(overflow_size_helpers_test),
+ {}
+};
- return err;
-}
+static struct kunit_suite overflow_test_suite = {
+ .name = "overflow",
+ .test_cases = overflow_test_cases,
+};
-static void __exit test_module_exit(void)
-{ }
+kunit_test_suite(overflow_test_suite);
-module_init(test_module_init);
-module_exit(test_module_exit);
MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index a4c7cd74cff5..4fb7700a741b 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -4,6 +4,8 @@
# from userspace.
#
+pound := \#
+
CC = gcc
OPTFLAGS = -O2 # Adjust as desired
CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
@@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes)
OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
- HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
+ HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
CFLAGS += -I../../../arch/powerpc/include
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index a3cf071941ab..841a55242aba 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -19,7 +19,6 @@
#define NDISKS 16 /* Including P and Q */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
-struct raid6_calls raid6_call;
char *dataptrs[NDISKS];
char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
index 10475dc423c1..1bfb127fbfe8 100644
--- a/lib/raid6/vpermxor.uc
+++ b/lib/raid6/vpermxor.uc
@@ -24,9 +24,9 @@
#ifdef CONFIG_ALTIVEC
#include <altivec.h>
+#include <asm/ppc-opcode.h>
#ifdef __KERNEL__
#include <asm/cputable.h>
-#include <asm/ppc-opcode.h>
#include <asm/switch_to.h>
#endif
diff --git a/lib/random32.c b/lib/random32.c
index a57a0e18819d..976632003ec6 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -41,7 +41,6 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
-#include <trace/events/random.h>
/**
* prandom_u32_state - seeded pseudo-random number generator.
@@ -387,7 +386,6 @@ u32 prandom_u32(void)
struct siprand_state *state = get_cpu_ptr(&net_rand_state);
u32 res = siprand_u32(state);
- trace_prandom_u32(res);
put_cpu_ptr(&net_rand_state);
return res;
}
@@ -553,9 +551,11 @@ static void prandom_reseed(struct timer_list *unused)
* To avoid worrying about whether it's safe to delay that interrupt
* long enough to seed all CPUs, just schedule an immediate timer event.
*/
-static void prandom_timer_start(struct random_ready_callback *unused)
+static int prandom_timer_start(struct notifier_block *nb,
+ unsigned long action, void *data)
{
mod_timer(&seed_timer, jiffies);
+ return 0;
}
#ifdef CONFIG_RANDOM32_SELFTEST
@@ -619,13 +619,13 @@ core_initcall(prandom32_state_selftest);
*/
static int __init prandom_init_late(void)
{
- static struct random_ready_callback random_ready = {
- .func = prandom_timer_start
+ static struct notifier_block random_ready = {
+ .notifier_call = prandom_timer_start
};
- int ret = add_random_ready_callback(&random_ready);
+ int ret = register_random_ready_notifier(&random_ready);
if (ret == -EALREADY) {
- prandom_timer_start(&random_ready);
+ prandom_timer_start(&random_ready, 0, NULL);
ret = 0;
}
return ret;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 6220fa67fb7e..2eb3de18ded3 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -85,7 +85,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
bool alloc_hint)
{
unsigned int bits_per_word;
- unsigned int i;
if (shift < 0)
shift = sbitmap_calculate_shift(depth);
@@ -117,10 +116,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
return -ENOMEM;
}
- for (i = 0; i < sb->map_nr; i++) {
- sb->map[i].depth = min(depth, bits_per_word);
- depth -= sb->map[i].depth;
- }
return 0;
}
EXPORT_SYMBOL_GPL(sbitmap_init_node);
@@ -135,11 +130,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
sb->depth = depth;
sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
-
- for (i = 0; i < sb->map_nr; i++) {
- sb->map[i].depth = min(depth, bits_per_word);
- depth -= sb->map[i].depth;
- }
}
EXPORT_SYMBOL_GPL(sbitmap_resize);
@@ -184,8 +174,8 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
int nr;
do {
- nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint,
- !sb->round_robin);
+ nr = __sbitmap_get_word(&map->word, __map_depth(sb, index),
+ alloc_hint, !sb->round_robin);
if (nr != -1)
break;
if (!sbitmap_deferred_clear(map))
@@ -257,7 +247,9 @@ static int __sbitmap_get_shallow(struct sbitmap *sb,
for (i = 0; i < sb->map_nr; i++) {
again:
nr = __sbitmap_get_word(&sb->map[index].word,
- min(sb->map[index].depth, shallow_depth),
+ min_t(unsigned int,
+ __map_depth(sb, index),
+ shallow_depth),
SB_NR_TO_BIT(sb, alloc_hint), true);
if (nr != -1) {
nr += index << sb->shift;
@@ -315,11 +307,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
for (i = 0; i < sb->map_nr; i++) {
const struct sbitmap_word *word = &sb->map[i];
+ unsigned int word_depth = __map_depth(sb, i);
if (set)
- weight += bitmap_weight(&word->word, word->depth);
+ weight += bitmap_weight(&word->word, word_depth);
else
- weight += bitmap_weight(&word->cleared, word->depth);
+ weight += bitmap_weight(&word->cleared, word_depth);
}
return weight;
}
@@ -367,7 +360,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
for (i = 0; i < sb->map_nr; i++) {
unsigned long word = READ_ONCE(sb->map[i].word);
unsigned long cleared = READ_ONCE(sb->map[i].cleared);
- unsigned int word_bits = READ_ONCE(sb->map[i].depth);
+ unsigned int word_bits = __map_depth(sb, i);
word &= ~cleared;
@@ -488,9 +481,13 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
unsigned int users)
{
unsigned int wake_batch;
+ unsigned int min_batch;
+ unsigned int depth = (sbq->sb.depth + users - 1) / users;
+
+ min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1;
- wake_batch = clamp_val((sbq->sb.depth + users - 1) /
- users, 4, SBQ_WAKE_BATCH);
+ wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
+ min_batch, SBQ_WAKE_BATCH);
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
@@ -527,15 +524,16 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
for (i = 0; i < sb->map_nr; i++) {
struct sbitmap_word *map = &sb->map[index];
unsigned long get_mask;
+ unsigned int map_depth = __map_depth(sb, index);
sbitmap_deferred_clear(map);
- if (map->word == (1UL << (map->depth - 1)) - 1)
+ if (map->word == (1UL << (map_depth - 1)) - 1)
continue;
- nr = find_first_zero_bit(&map->word, map->depth);
- if (nr + nr_tags <= map->depth) {
+ nr = find_first_zero_bit(&map->word, map_depth);
+ if (nr + nr_tags <= map_depth) {
atomic_long_t *ptr = (atomic_long_t *) &map->word;
- int map_tags = min_t(int, nr_tags, map->depth);
+ int map_tags = min_t(int, nr_tags, map_depth);
unsigned long val, ret;
get_mask = ((1UL << map_tags) - 1) << nr;
@@ -559,14 +557,14 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
return 0;
}
-int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
- unsigned int shallow_depth)
+int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+ unsigned int shallow_depth)
{
WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
return sbitmap_get_shallow(&sbq->sb, shallow_depth);
}
-EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow);
void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
unsigned int min_shallow_depth)
diff --git a/lib/test_stackinit.c b/lib/stackinit_kunit.c
index a3c74e6a21ff..35c69aa425b2 100644
--- a/lib/test_stackinit.c
+++ b/lib/stackinit_kunit.c
@@ -2,76 +2,21 @@
/*
* Test cases for compiler-based stack variable zeroing via
* -ftrivial-auto-var-init={zero,pattern} or CONFIG_GCC_PLUGIN_STRUCTLEAK*.
+ * For example, see:
+ * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests
+ * ./tools/testing/kunit/kunit.py run stackinit [--raw_output] \
+ * --make_option LLVM=1 \
+ * --kconfig_add CONFIG_INIT_STACK_ALL_ZERO=y
*
- * External build example:
- * clang -O2 -Wall -ftrivial-auto-var-init=pattern \
- * -o test_stackinit test_stackinit.c
*/
-#ifdef __KERNEL__
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <kunit/test.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
-#else
-
-/* Userspace headers. */
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <sys/types.h>
-
-/* Linux kernel-ism stubs for stand-alone userspace build. */
-#define KBUILD_MODNAME "stackinit"
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#define pr_err(fmt, ...) fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...) fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...) fprintf(stdout, pr_fmt(fmt), ##__VA_ARGS__)
-#define __init /**/
-#define __exit /**/
-#define __user /**/
-#define noinline __attribute__((__noinline__))
-#define __aligned(x) __attribute__((__aligned__(x)))
-#ifdef __clang__
-# define __compiletime_error(message) /**/
-#else
-# define __compiletime_error(message) __attribute__((__error__(message)))
-#endif
-#define __compiletime_assert(condition, msg, prefix, suffix) \
- do { \
- extern void prefix ## suffix(void) __compiletime_error(msg); \
- if (!(condition)) \
- prefix ## suffix(); \
- } while (0)
-#define _compiletime_assert(condition, msg, prefix, suffix) \
- __compiletime_assert(condition, msg, prefix, suffix)
-#define compiletime_assert(condition, msg) \
- _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-#define BUILD_BUG_ON(condition) \
- BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-#define module_init(func) static int (*do_init)(void) = func
-#define module_exit(func) static void (*do_exit)(void) = func
-#define MODULE_LICENSE(str) int main(void) { \
- int rc; \
- /* License: str */ \
- rc = do_init(); \
- if (rc == 0) \
- do_exit(); \
- return rc; \
- }
-
-#endif /* __KERNEL__ */
-
/* Exfiltration buffer. */
#define MAX_VAR_SIZE 128
static u8 check_buf[MAX_VAR_SIZE];
@@ -201,7 +146,7 @@ static bool range_contains(char *haystack_start, size_t haystack_size,
*/
#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \
/* Returns 0 on success, 1 on failure. */ \
-static noinline __init int test_ ## name (void) \
+static noinline void test_ ## name (struct kunit *test) \
{ \
var_type zero INIT_CLONE_ ## which; \
int ignored; \
@@ -220,10 +165,8 @@ static noinline __init int test_ ## name (void) \
/* Verify all bytes overwritten with 0xFF. */ \
for (sum = 0, i = 0; i < target_size; i++) \
sum += (check_buf[i] != 0xFF); \
- if (sum) { \
- pr_err(#name ": leaf fill was not 0xFF!?\n"); \
- return 1; \
- } \
+ KUNIT_ASSERT_EQ_MSG(test, sum, 0, \
+ "leaf fill was not 0xFF!?\n"); \
/* Clear entire check buffer for later bit tests. */ \
memset(check_buf, 0x00, sizeof(check_buf)); \
/* Extract stack-defined variable contents. */ \
@@ -231,32 +174,29 @@ static noinline __init int test_ ## name (void) \
FETCH_ARG_ ## which(zero)); \
\
/* Validate that compiler lined up fill and target. */ \
- if (!range_contains(fill_start, fill_size, \
- target_start, target_size)) { \
- pr_err(#name ": stack fill missed target!?\n"); \
- pr_err(#name ": fill %zu wide\n", fill_size); \
- pr_err(#name ": target offset by %d\n", \
- (int)((ssize_t)(uintptr_t)fill_start - \
- (ssize_t)(uintptr_t)target_start)); \
- return 1; \
- } \
+ KUNIT_ASSERT_TRUE_MSG(test, \
+ range_contains(fill_start, fill_size, \
+ target_start, target_size), \
+ "stack fill missed target!? " \
+ "(fill %zu wide, target offset by %d)\n", \
+ fill_size, \
+ (int)((ssize_t)(uintptr_t)fill_start - \
+ (ssize_t)(uintptr_t)target_start)); \
\
/* Look for any bytes still 0xFF in check region. */ \
for (sum = 0, i = 0; i < target_size; i++) \
sum += (check_buf[i] == 0xFF); \
\
- if (sum == 0) { \
- pr_info(#name " ok\n"); \
- return 0; \
- } else { \
- pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \
- (xfail) ? "X" : "", sum); \
- return (xfail) ? 0 : 1; \
- } \
+ if (sum != 0 && xfail) \
+ kunit_skip(test, \
+ "XFAIL uninit bytes: %d\n", \
+ sum); \
+ KUNIT_ASSERT_EQ_MSG(test, sum, 0, \
+ "uninit bytes: %d\n", sum); \
}
#define DEFINE_TEST(name, var_type, which, init_level, xfail) \
/* no-op to force compiler into ignoring "uninitialized" vars */\
-static noinline __init DO_NOTHING_TYPE_ ## which(var_type) \
+static noinline DO_NOTHING_TYPE_ ## which(var_type) \
do_nothing_ ## name(var_type *ptr) \
{ \
/* Will always be true, but compiler doesn't know. */ \
@@ -265,9 +205,8 @@ do_nothing_ ## name(var_type *ptr) \
else \
return DO_NOTHING_RETURN_ ## which(ptr + 1); \
} \
-static noinline __init int leaf_ ## name(unsigned long sp, \
- bool fill, \
- var_type *arg) \
+static noinline int leaf_ ## name(unsigned long sp, bool fill, \
+ var_type *arg) \
{ \
char buf[VAR_BUFFER]; \
var_type var \
@@ -341,6 +280,27 @@ struct test_user {
unsigned long four;
};
+#define ALWAYS_PASS WANT_SUCCESS
+#define ALWAYS_FAIL XFAIL
+
+#ifdef CONFIG_INIT_STACK_NONE
+# define USER_PASS XFAIL
+# define BYREF_PASS XFAIL
+# define STRONG_PASS XFAIL
+#elif defined(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER)
+# define USER_PASS WANT_SUCCESS
+# define BYREF_PASS XFAIL
+# define STRONG_PASS XFAIL
+#elif defined(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF)
+# define USER_PASS WANT_SUCCESS
+# define BYREF_PASS WANT_SUCCESS
+# define STRONG_PASS XFAIL
+#else
+# define USER_PASS WANT_SUCCESS
+# define BYREF_PASS WANT_SUCCESS
+# define STRONG_PASS WANT_SUCCESS
+#endif
+
#define DEFINE_SCALAR_TEST(name, init, xfail) \
DEFINE_TEST(name ## _ ## init, name, SCALAR, \
init, xfail)
@@ -364,27 +324,26 @@ struct test_user {
DEFINE_STRUCT_TEST(trailing_hole, init, xfail); \
DEFINE_STRUCT_TEST(packed, init, xfail)
-#define DEFINE_STRUCT_INITIALIZER_TESTS(base) \
+#define DEFINE_STRUCT_INITIALIZER_TESTS(base, xfail) \
DEFINE_STRUCT_TESTS(base ## _ ## partial, \
- WANT_SUCCESS); \
- DEFINE_STRUCT_TESTS(base ## _ ## all, \
- WANT_SUCCESS)
+ xfail); \
+ DEFINE_STRUCT_TESTS(base ## _ ## all, xfail)
/* These should be fully initialized all the time! */
-DEFINE_SCALAR_TESTS(zero, WANT_SUCCESS);
-DEFINE_STRUCT_TESTS(zero, WANT_SUCCESS);
+DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS);
+DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS);
/* Struct initializers: padding may be left uninitialized. */
-DEFINE_STRUCT_INITIALIZER_TESTS(static);
-DEFINE_STRUCT_INITIALIZER_TESTS(dynamic);
-DEFINE_STRUCT_INITIALIZER_TESTS(runtime);
-DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static);
-DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic);
-DEFINE_STRUCT_TESTS(assigned_copy, XFAIL);
+DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
+DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL);
/* No initialization without compiler instrumentation. */
-DEFINE_SCALAR_TESTS(none, WANT_SUCCESS);
-DEFINE_STRUCT_TESTS(none, WANT_SUCCESS);
+DEFINE_SCALAR_TESTS(none, STRONG_PASS);
+DEFINE_STRUCT_TESTS(none, BYREF_PASS);
/* Initialization of members with __user attribute. */
-DEFINE_TEST(user, struct test_user, STRUCT, none, WANT_SUCCESS);
+DEFINE_TEST(user, struct test_user, STRUCT, none, USER_PASS);
/*
* Check two uses through a variable declaration outside either path,
@@ -398,7 +357,7 @@ static int noinline __leaf_switch_none(int path, bool fill)
* This is intentionally unreachable. To silence the
* warning, build with -Wno-switch-unreachable
*/
- uint64_t var;
+ uint64_t var[10];
case 1:
target_start = &var;
@@ -423,19 +382,19 @@ static int noinline __leaf_switch_none(int path, bool fill)
memcpy(check_buf, target_start, target_size);
break;
default:
- var = 5;
- return var & forced_mask;
+ var[1] = 5;
+ return var[1] & forced_mask;
}
return 0;
}
-static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill,
+static noinline int leaf_switch_1_none(unsigned long sp, bool fill,
uint64_t *arg)
{
return __leaf_switch_none(1, fill);
}
-static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
+static noinline int leaf_switch_2_none(unsigned long sp, bool fill,
uint64_t *arg)
{
return __leaf_switch_none(2, fill);
@@ -447,68 +406,56 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
* non-code areas (i.e. in a switch statement before the first "case").
* https://bugs.llvm.org/show_bug.cgi?id=44916
*/
-DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, XFAIL);
-DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, XFAIL);
-
-static int __init test_stackinit_init(void)
-{
- unsigned int failures = 0;
-
-#define test_scalars(init) do { \
- failures += test_u8_ ## init (); \
- failures += test_u16_ ## init (); \
- failures += test_u32_ ## init (); \
- failures += test_u64_ ## init (); \
- failures += test_char_array_ ## init (); \
- } while (0)
-
-#define test_structs(init) do { \
- failures += test_small_hole_ ## init (); \
- failures += test_big_hole_ ## init (); \
- failures += test_trailing_hole_ ## init (); \
- failures += test_packed_ ## init (); \
- } while (0)
-
+DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, ALWAYS_FAIL);
+DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL);
+
+#define KUNIT_test_scalars(init) \
+ KUNIT_CASE(test_u8_ ## init), \
+ KUNIT_CASE(test_u16_ ## init), \
+ KUNIT_CASE(test_u32_ ## init), \
+ KUNIT_CASE(test_u64_ ## init), \
+ KUNIT_CASE(test_char_array_ ## init)
+
+#define KUNIT_test_structs(init) \
+ KUNIT_CASE(test_small_hole_ ## init), \
+ KUNIT_CASE(test_big_hole_ ## init), \
+ KUNIT_CASE(test_trailing_hole_ ## init),\
+ KUNIT_CASE(test_packed_ ## init) \
+
+static struct kunit_case stackinit_test_cases[] = {
/* These are explicitly initialized and should always pass. */
- test_scalars(zero);
- test_structs(zero);
+ KUNIT_test_scalars(zero),
+ KUNIT_test_structs(zero),
/* Padding here appears to be accidentally always initialized? */
- test_structs(dynamic_partial);
- test_structs(assigned_dynamic_partial);
+ KUNIT_test_structs(dynamic_partial),
+ KUNIT_test_structs(assigned_dynamic_partial),
/* Padding initialization depends on compiler behaviors. */
- test_structs(static_partial);
- test_structs(static_all);
- test_structs(dynamic_all);
- test_structs(runtime_partial);
- test_structs(runtime_all);
- test_structs(assigned_static_partial);
- test_structs(assigned_static_all);
- test_structs(assigned_dynamic_all);
+ KUNIT_test_structs(static_partial),
+ KUNIT_test_structs(static_all),
+ KUNIT_test_structs(dynamic_all),
+ KUNIT_test_structs(runtime_partial),
+ KUNIT_test_structs(runtime_all),
+ KUNIT_test_structs(assigned_static_partial),
+ KUNIT_test_structs(assigned_static_all),
+ KUNIT_test_structs(assigned_dynamic_all),
/* Everything fails this since it effectively performs a memcpy(). */
- test_structs(assigned_copy);
-
+ KUNIT_test_structs(assigned_copy),
/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
- test_scalars(none);
- failures += test_switch_1_none();
- failures += test_switch_2_none();
-
+ KUNIT_test_scalars(none),
+ KUNIT_CASE(test_switch_1_none),
+ KUNIT_CASE(test_switch_2_none),
/* STRUCTLEAK_BYREF should cover from here down. */
- test_structs(none);
-
+ KUNIT_test_structs(none),
/* STRUCTLEAK will only cover this. */
- failures += test_user();
-
- if (failures == 0)
- pr_info("all tests passed!\n");
- else
- pr_err("failures: %u\n", failures);
+ KUNIT_CASE(test_user),
+ {}
+};
- return failures ? -EINVAL : 0;
-}
-module_init(test_stackinit_init);
+static struct kunit_suite stackinit_test_suite = {
+ .name = "stackinit",
+ .test_cases = stackinit_test_cases,
+};
-static void __exit test_stackinit_exit(void)
-{ }
-module_exit(test_stackinit_exit);
+kunit_test_suites(&stackinit_test_suite);
MODULE_LICENSE("GPL");
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 767538089a62..cfe632047839 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/cdev.h>
#include <linux/device.h>
+#include <linux/memremap.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
@@ -26,6 +27,8 @@
#include <linux/sched/mm.h>
#include <linux/platform_device.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
#include "test_hmm_uapi.h"
@@ -563,7 +566,6 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
}
dpage->zone_device_data = rpage;
- get_page(dpage);
lock_page(dpage);
return dpage;
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 847cdbefab46..3b413f8c8a71 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -492,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test)
ptr = kmalloc(size, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
memset(ptr, 0, size + KASAN_GRANULE_SIZE));
@@ -515,6 +516,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset((char *)ptr, 0, 64);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(invalid_size);
KUNIT_EXPECT_KASAN_FAIL(test,
memmove((char *)ptr, (char *)ptr + 4, invalid_size));
@@ -531,6 +533,7 @@ static void kmalloc_memmove_invalid_size(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset((char *)ptr, 0, 64);
+ OPTIMIZER_HIDE_VAR(ptr);
KUNIT_EXPECT_KASAN_FAIL(test,
memmove((char *)ptr, (char *)ptr + 4, invalid_size));
kfree(ptr);
@@ -866,11 +869,14 @@ static void kmem_cache_invalid_free(struct kunit *test)
kmem_cache_destroy(cache);
}
+static void empty_cache_ctor(void *object) { }
+
static void kmem_cache_double_destroy(struct kunit *test)
{
struct kmem_cache *cache;
- cache = kmem_cache_create("test_cache", 200, 0, 0, NULL);
+ /* Provide a constructor to prevent cache merging. */
+ cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
kmem_cache_destroy(cache);
KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache));
@@ -893,6 +899,7 @@ static void kasan_memchr(struct kunit *test)
ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
kasan_ptr_result = memchr(ptr, '1', size + 1));
@@ -919,6 +926,7 @@ static void kasan_memcmp(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
memset(arr, 0, sizeof(arr));
+ OPTIMIZER_HIDE_VAR(ptr);
OPTIMIZER_HIDE_VAR(size);
KUNIT_EXPECT_KASAN_FAIL(test,
kasan_int_result = memcmp(ptr, arr, size+1));
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8129dd374c..53fe73a48adf 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -49,10 +49,15 @@
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/byteorder.h> /* cpu_to_le16 */
+#include <asm/unaligned.h>
#include <linux/string_helpers.h>
#include "kstrtox.h"
+/* Disable pointer hashing if requested */
+bool no_hash_pointers __ro_after_init;
+EXPORT_SYMBOL_GPL(no_hash_pointers);
+
static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base)
{
const char *cp;
@@ -757,14 +762,16 @@ static void enable_ptr_key_workfn(struct work_struct *work)
static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static int fill_random_ptr_key(struct notifier_block *nb,
+ unsigned long action, void *data)
{
/* This may be in an interrupt handler. */
queue_work(system_unbound_wq, &enable_ptr_key_work);
+ return 0;
}
-static struct random_ready_callback random_ready = {
- .func = fill_random_ptr_key
+static struct notifier_block random_ready = {
+ .notifier_call = fill_random_ptr_key
};
static int __init initialize_ptr_random(void)
@@ -778,7 +785,7 @@ static int __init initialize_ptr_random(void)
return 0;
}
- ret = add_random_ready_callback(&random_ready);
+ ret = register_random_ready_notifier(&random_ready);
if (!ret) {
return 0;
} else if (ret == -EALREADY) {
@@ -848,6 +855,19 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
return pointer_string(buf, end, (const void *)hashval, spec);
}
+static char *default_pointer(char *buf, char *end, const void *ptr,
+ struct printf_spec spec)
+{
+ /*
+ * default is to _not_ leak addresses, so hash before printing,
+ * unless no_hash_pointers is specified on the command line.
+ */
+ if (unlikely(no_hash_pointers))
+ return pointer_string(buf, end, ptr, spec);
+
+ return ptr_to_id(buf, end, ptr, spec);
+}
+
int kptr_restrict __read_mostly;
static noinline_for_stack
@@ -857,7 +877,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
switch (kptr_restrict) {
case 0:
/* Handle as %p, hash and do _not_ leak addresses. */
- return ptr_to_id(buf, end, ptr, spec);
+ return default_pointer(buf, end, ptr, spec);
case 1: {
const struct cred *cred;
@@ -1761,7 +1781,7 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
char output[sizeof("0123 little-endian (0x01234567)")];
char *p = output;
unsigned int i;
- u32 val;
+ u32 orig, val;
if (fmt[1] != 'c' || fmt[2] != 'c')
return error_string(buf, end, "(%p4?)", spec);
@@ -1769,21 +1789,23 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
if (check_pointer(&buf, end, fourcc, spec))
return buf;
- val = *fourcc & ~BIT(31);
+ orig = get_unaligned(fourcc);
+ val = orig & ~BIT(31);
- for (i = 0; i < sizeof(*fourcc); i++) {
+ for (i = 0; i < sizeof(u32); i++) {
unsigned char c = val >> (i * 8);
/* Print non-control ASCII characters as-is, dot otherwise */
*p++ = isascii(c) && isprint(c) ? c : '.';
}
- strcpy(p, *fourcc & BIT(31) ? " big-endian" : " little-endian");
+ *p++ = ' ';
+ strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
p += strlen(p);
*p++ = ' ';
*p++ = '(';
- p = special_hex_number(p, output + sizeof(output) - 2, *fourcc, sizeof(u32));
+ p = special_hex_number(p, output + sizeof(output) - 2, orig, sizeof(u32));
*p++ = ')';
*p = '\0';
@@ -2223,10 +2245,6 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
return widen_string(buf, buf - buf_start, end, spec);
}
-/* Disable pointer hashing if requested */
-bool no_hash_pointers __ro_after_init;
-EXPORT_SYMBOL_GPL(no_hash_pointers);
-
int __init no_hash_pointers_enable(char *str)
{
if (no_hash_pointers)
@@ -2455,7 +2473,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
case 'e':
/* %pe with a non-ERR_PTR gets treated as plain %p */
if (!IS_ERR(ptr))
- break;
+ return default_pointer(buf, end, ptr, spec);
return err_ptr(buf, end, ptr, spec);
case 'u':
case 'k':
@@ -2465,16 +2483,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
default:
return error_string(buf, end, "(einval)", spec);
}
+ default:
+ return default_pointer(buf, end, ptr, spec);
}
-
- /*
- * default is to _not_ leak addresses, so hash before printing,
- * unless no_hash_pointers is specified on the command line.
- */
- if (unlikely(no_hash_pointers))
- return pointer_string(buf, end, ptr, spec);
- else
- return ptr_to_id(buf, end, ptr, spec);
}
/*
diff --git a/lib/xarray.c b/lib/xarray.c
index 6f47f6375808..b95e92598b9c 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -302,7 +302,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
}
if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
gfp |= __GFP_ACCOUNT;
- xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
if (!xas->xa_alloc)
return false;
xas->xa_alloc->parent = NULL;
@@ -334,10 +334,10 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
gfp |= __GFP_ACCOUNT;
if (gfpflags_allow_blocking(gfp)) {
xas_unlock_type(xas, lock_type);
- xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
xas_lock_type(xas, lock_type);
} else {
- xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
}
if (!xas->xa_alloc)
return false;
@@ -371,7 +371,7 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
gfp |= __GFP_ACCOUNT;
- node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
if (!node) {
xas_set_err(xas, -ENOMEM);
return NULL;
@@ -1014,7 +1014,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
void *sibling = NULL;
struct xa_node *node;
- node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
if (!node)
goto nomem;
node->array = xas->xa;
diff --git a/mm/Kconfig b/mm/Kconfig
index 3326ee3903f3..761f5021ba51 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -249,6 +249,9 @@ config MIGRATION
pages as migration can relocate pages to satisfy a huge page
allocation instead of reclaiming.
+config DEVICE_MIGRATION
+ def_bool MIGRATION && ZONE_DEVICE
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
@@ -262,6 +265,9 @@ config HUGETLB_PAGE_SIZE_VARIABLE
HUGETLB_PAGE_ORDER when there are multiple HugeTLB page sizes available
on a platform.
+ Note that the pageblock_order cannot exceed MAX_ORDER - 1 and will be
+ clamped down to MAX_ORDER - 1.
+
config CONTIG_ALLOC
def_bool (MEMORY_ISOLATION && COMPACTION) || CMA
@@ -411,6 +417,9 @@ choice
benefit.
endchoice
+config ARCH_WANT_GENERAL_HUGETLB
+ bool
+
config ARCH_WANTS_THP_SWAP
def_bool n
@@ -744,6 +753,15 @@ config IDLE_PAGE_TRACKING
config ARCH_HAS_CACHE_LINE_SIZE
bool
+config ARCH_HAS_CURRENT_STACK_POINTER
+ bool
+ help
+ In support of HARDENED_USERCOPY performing stack variable lifetime
+ checking, an architecture-agnostic way to find the stack pointer
+ is needed. Once an architecture defines an unsigned long global
+ register alias named "current_stack_pointer", this config can be
+ selected.
+
config ARCH_HAS_PTE_DEVMAP
bool
@@ -776,9 +794,6 @@ config ZONE_DEVICE
If FS_DAX is enabled, then say Y.
-config DEV_PAGEMAP_OPS
- bool
-
#
# Helpers to mirror range of the CPU page tables of a process into device page
# tables.
@@ -790,7 +805,6 @@ config HMM_MIRROR
config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)"
depends on ZONE_DEVICE
- select DEV_PAGEMAP_OPS
help
Allows creation of struct pages to represent unaddressable device
diff --git a/mm/Makefile b/mm/Makefile
index 70d4309c9ce3..4cc13f3179a5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
+obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index eae96dfe0261..7176af65b103 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1005,60 +1005,3 @@ const char *bdi_dev_name(struct backing_dev_info *bdi)
return bdi->dev_name;
}
EXPORT_SYMBOL_GPL(bdi_dev_name);
-
-static wait_queue_head_t congestion_wqh[2] = {
- __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
- __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
- };
-static atomic_t nr_wb_congested[2];
-
-void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
- wait_queue_head_t *wqh = &congestion_wqh[sync];
- enum wb_congested_state bit;
-
- bit = sync ? WB_sync_congested : WB_async_congested;
- if (test_and_clear_bit(bit, &bdi->wb.congested))
- atomic_dec(&nr_wb_congested[sync]);
- smp_mb__after_atomic();
- if (waitqueue_active(wqh))
- wake_up(wqh);
-}
-EXPORT_SYMBOL(clear_bdi_congested);
-
-void set_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
- enum wb_congested_state bit;
-
- bit = sync ? WB_sync_congested : WB_async_congested;
- if (!test_and_set_bit(bit, &bdi->wb.congested))
- atomic_inc(&nr_wb_congested[sync]);
-}
-EXPORT_SYMBOL(set_bdi_congested);
-
-/**
- * congestion_wait - wait for a backing_dev to become uncongested
- * @sync: SYNC or ASYNC IO
- * @timeout: timeout in jiffies
- *
- * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
- * write congestion. If no backing_devs are congested then just wait for the
- * next write to be completed.
- */
-long congestion_wait(int sync, long timeout)
-{
- long ret;
- unsigned long start = jiffies;
- DEFINE_WAIT(wait);
- wait_queue_head_t *wqh = &congestion_wqh[sync];
-
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
- ret = io_schedule_timeout(timeout);
- finish_wait(wqh, &wait);
-
- trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
- jiffies_to_usecs(jiffies - start));
-
- return ret;
-}
-EXPORT_SYMBOL(congestion_wait);
diff --git a/mm/cma.c b/mm/cma.c
index bc9ca8f3c487..eaa4b5c920a2 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -131,8 +131,10 @@ not_in_zone:
bitmap_free(cma->bitmap);
out_error:
/* Expose all pages to the buddy, they are useless for CMA. */
- for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++)
- free_reserved_page(pfn_to_page(pfn));
+ if (!cma->reserve_pages_on_error) {
+ for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++)
+ free_reserved_page(pfn_to_page(pfn));
+ }
totalcma_pages -= cma->count;
cma->count = 0;
pr_err("CMA area %s could not be activated\n", cma->name);
@@ -150,6 +152,11 @@ static int __init cma_init_reserved_areas(void)
}
core_initcall(cma_init_reserved_areas);
+void __init cma_reserve_pages_on_error(struct cma *cma)
+{
+ cma->reserve_pages_on_error = true;
+}
+
/**
* cma_init_reserved_mem() - create custom contiguous area from reserved memory
* @base: Base address of the reserved area
@@ -168,7 +175,6 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
struct cma **res_cma)
{
struct cma *cma;
- phys_addr_t alignment;
/* Sanity checks */
if (cma_area_count == ARRAY_SIZE(cma_areas)) {
@@ -179,15 +185,12 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
if (!size || !memblock_is_region_reserved(base, size))
return -EINVAL;
- /* ensure minimal alignment required by mm core */
- alignment = PAGE_SIZE <<
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
-
/* alignment should be aligned with order_per_bit */
- if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
+ if (!IS_ALIGNED(CMA_MIN_ALIGNMENT_PAGES, 1 << order_per_bit))
return -EINVAL;
- if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
+ /* ensure minimal alignment required by mm core */
+ if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
return -EINVAL;
/*
@@ -262,14 +265,8 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
if (alignment && !is_power_of_2(alignment))
return -EINVAL;
- /*
- * Sanitise input arguments.
- * Pages both ends in CMA area could be merged into adjacent unmovable
- * migratetype page by page allocator's buddy algorithm. In the case,
- * you couldn't get a contiguous memory, which is not what we want.
- */
- alignment = max(alignment, (phys_addr_t)PAGE_SIZE <<
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
+ /* Sanitise input arguments. */
+ alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
if (fixed && base & (alignment - 1)) {
ret = -EINVAL;
pr_err("Region at %pa must be aligned to %pa bytes\n",
diff --git a/mm/cma.h b/mm/cma.h
index 2c775877eae2..88a0595670b7 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -30,6 +30,7 @@ struct cma {
/* kobject requires dynamic object */
struct cma_kobject *cma_kobj;
#endif
+ bool reserve_pages_on_error;
};
extern struct cma cma_areas[MAX_CMA_AREAS];
diff --git a/mm/compaction.c b/mm/compaction.c
index b4e94cda3019..c3e37aa9ff9e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -785,7 +785,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
* @cc: Compaction control structure.
* @low_pfn: The first PFN to isolate
* @end_pfn: The one-past-the-last PFN to isolate, within same pageblock
- * @isolate_mode: Isolation mode to be used.
+ * @mode: Isolation mode to be used.
*
* Isolate all pages that can be migrated from the range specified by
* [low_pfn, end_pfn). The range is expected to be within same pageblock.
@@ -798,7 +798,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
*/
static int
isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
- unsigned long end_pfn, isolate_mode_t isolate_mode)
+ unsigned long end_pfn, isolate_mode_t mode)
{
pg_data_t *pgdat = cc->zone->zone_pgdat;
unsigned long nr_scanned = 0, nr_isolated = 0;
@@ -806,6 +806,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
unsigned long flags = 0;
struct lruvec *locked = NULL;
struct page *page = NULL, *valid_page = NULL;
+ struct address_space *mapping;
unsigned long start_pfn = low_pfn;
bool skip_on_failure = false;
unsigned long next_skip_pfn = 0;
@@ -990,7 +991,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
locked = NULL;
}
- if (!isolate_movable_page(page, isolate_mode))
+ if (!isolate_movable_page(page, mode))
goto isolate_success;
}
@@ -1002,15 +1003,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
- if (!page_mapping(page) &&
- page_count(page) > page_mapcount(page))
+ mapping = page_mapping(page);
+ if (!mapping && page_count(page) > page_mapcount(page))
goto isolate_fail;
/*
* Only allow to migrate anonymous pages in GFP_NOFS context
* because those do not depend on fs locks.
*/
- if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
+ if (!(cc->gfp_mask & __GFP_FS) && mapping)
goto isolate_fail;
/*
@@ -1021,9 +1022,45 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (unlikely(!get_page_unless_zero(page)))
goto isolate_fail;
- if (!__isolate_lru_page_prepare(page, isolate_mode))
+ /* Only take pages on LRU: a check now makes later tests safe */
+ if (!PageLRU(page))
+ goto isolate_fail_put;
+
+ /* Compaction might skip unevictable pages but CMA takes them */
+ if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page))
+ goto isolate_fail_put;
+
+ /*
+ * To minimise LRU disruption, the caller can indicate with
+ * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages
+ * it will be able to migrate without blocking - clean pages
+ * for the most part. PageWriteback would require blocking.
+ */
+ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page))
goto isolate_fail_put;
+ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) {
+ bool migrate_dirty;
+
+ /*
+ * Only pages without mappings or that have a
+ * ->migratepage callback are possible to migrate
+ * without blocking. However, we can be racing with
+ * truncation so it's necessary to lock the page
+ * to stabilise the mapping as truncation holds
+ * the page lock until after the page is removed
+ * from the page cache.
+ */
+ if (!trylock_page(page))
+ goto isolate_fail_put;
+
+ mapping = page_mapping(page);
+ migrate_dirty = !mapping || mapping->a_ops->migratepage;
+ unlock_page(page);
+ if (!migrate_dirty)
+ goto isolate_fail_put;
+ }
+
/* Try isolate the page */
if (!TestClearPageLRU(page))
goto isolate_fail_put;
@@ -2350,8 +2387,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
update_cached = !sync &&
cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
- trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
- cc->free_pfn, end_pfn, sync);
+ trace_mm_compaction_begin(cc, start_pfn, end_pfn, sync);
/* lru_add_drain_all could be expensive with involving other CPUs */
lru_add_drain();
@@ -2401,8 +2437,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
compaction_free, (unsigned long)cc, cc->mode,
MR_COMPACTION, &nr_succeeded);
- trace_mm_compaction_migratepages(cc->nr_migratepages,
- nr_succeeded);
+ trace_mm_compaction_migratepages(cc, nr_succeeded);
/* All pages were either migrated or will be released */
cc->nr_migratepages = 0;
@@ -2478,8 +2513,7 @@ out:
count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
- trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
- cc->free_pfn, end_pfn, sync, ret);
+ trace_mm_compaction_end(cc, start_pfn, end_pfn, sync, ret);
return ret;
}
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index 5bcf05851ad0..9b559c76d6dd 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -25,33 +25,40 @@ config DAMON_KUNIT_TEST
If unsure, say N.
config DAMON_VADDR
- bool "Data access monitoring primitives for virtual address spaces"
+ bool "Data access monitoring operations for virtual address spaces"
depends on DAMON && MMU
select PAGE_IDLE_FLAG
help
- This builds the default data access monitoring primitives for DAMON
+ This builds the default data access monitoring operations for DAMON
that work for virtual address spaces.
config DAMON_PADDR
- bool "Data access monitoring primitives for the physical address space"
+ bool "Data access monitoring operations for the physical address space"
depends on DAMON && MMU
select PAGE_IDLE_FLAG
help
- This builds the default data access monitoring primitives for DAMON
+ This builds the default data access monitoring operations for DAMON
that works for the physical address space.
config DAMON_VADDR_KUNIT_TEST
- bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS
+ bool "Test for DAMON operations" if !KUNIT_ALL_TESTS
depends on DAMON_VADDR && KUNIT=y
default KUNIT_ALL_TESTS
help
- This builds the DAMON virtual addresses primitives Kunit test suite.
+ This builds the DAMON virtual addresses operations Kunit test suite.
For more information on KUnit and unit tests in general, please refer
to the KUnit documentation.
If unsure, say N.
+config DAMON_SYSFS
+ bool "DAMON sysfs interface"
+ depends on DAMON && SYSFS
+ help
+ This builds the sysfs interface for DAMON. The user space can use
+ the interface for arbitrary data access monitoring.
+
config DAMON_DBGFS
bool "DAMON debugfs interface"
depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index f7d5ac377a2b..dbf7190b4144 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DAMON) := core.o
-obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o
-obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o
+obj-y := core.o
+obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o
+obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o
+obj-$(CONFIG_DAMON_SYSFS) += sysfs.o
obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o
obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o
diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h
index 7008c3735e99..b4085deb9fa0 100644
--- a/mm/damon/core-test.h
+++ b/mm/damon/core-test.h
@@ -24,7 +24,7 @@ static void damon_test_regions(struct kunit *test)
KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
- t = damon_new_target(42);
+ t = damon_new_target();
KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
damon_add_region(r, t);
@@ -52,8 +52,7 @@ static void damon_test_target(struct kunit *test)
struct damon_ctx *c = damon_new_ctx();
struct damon_target *t;
- t = damon_new_target(42);
- KUNIT_EXPECT_EQ(test, 42ul, t->id);
+ t = damon_new_target();
KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
damon_add_target(c, t);
@@ -78,7 +77,6 @@ static void damon_test_target(struct kunit *test)
static void damon_test_aggregate(struct kunit *test)
{
struct damon_ctx *ctx = damon_new_ctx();
- unsigned long target_ids[] = {1, 2, 3};
unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} };
unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} };
unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} };
@@ -86,7 +84,10 @@ static void damon_test_aggregate(struct kunit *test)
struct damon_region *r;
int it, ir;
- damon_set_targets(ctx, target_ids, 3);
+ for (it = 0; it < 3; it++) {
+ t = damon_new_target();
+ damon_add_target(ctx, t);
+ }
it = 0;
damon_for_each_target(t, ctx) {
@@ -122,7 +123,7 @@ static void damon_test_split_at(struct kunit *test)
struct damon_target *t;
struct damon_region *r;
- t = damon_new_target(42);
+ t = damon_new_target();
r = damon_new_region(0, 100);
damon_add_region(r, t);
damon_split_region_at(c, t, r, 25);
@@ -143,7 +144,7 @@ static void damon_test_merge_two(struct kunit *test)
struct damon_region *r, *r2, *r3;
int i;
- t = damon_new_target(42);
+ t = damon_new_target();
r = damon_new_region(0, 100);
r->nr_accesses = 10;
damon_add_region(r, t);
@@ -191,7 +192,7 @@ static void damon_test_merge_regions_of(struct kunit *test)
unsigned long eaddrs[] = {112, 130, 156, 170, 230};
int i;
- t = damon_new_target(42);
+ t = damon_new_target();
for (i = 0; i < ARRAY_SIZE(sa); i++) {
r = damon_new_region(sa[i], ea[i]);
r->nr_accesses = nrs[i];
@@ -215,14 +216,14 @@ static void damon_test_split_regions_of(struct kunit *test)
struct damon_target *t;
struct damon_region *r;
- t = damon_new_target(42);
+ t = damon_new_target();
r = damon_new_region(0, 22);
damon_add_region(r, t);
damon_split_regions_of(c, t, 2);
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
damon_free_target(t);
- t = damon_new_target(42);
+ t = damon_new_target();
r = damon_new_region(0, 220);
damon_add_region(r, t);
damon_split_regions_of(c, t, 4);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 1dd153c31c9e..c1e0fed4e877 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -24,6 +24,73 @@
static DEFINE_MUTEX(damon_lock);
static int nr_running_ctxs;
+static bool running_exclusive_ctxs;
+
+static DEFINE_MUTEX(damon_ops_lock);
+static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
+
+/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
+static bool damon_registered_ops_id(enum damon_ops_id id)
+{
+ struct damon_operations empty_ops = {};
+
+ if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
+ return false;
+ return true;
+}
+
+/**
+ * damon_register_ops() - Register a monitoring operations set to DAMON.
+ * @ops: monitoring operations set to register.
+ *
+ * This function registers a monitoring operations set of valid &struct
+ * damon_operations->id so that others can find and use them later.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_register_ops(struct damon_operations *ops)
+{
+ int err = 0;
+
+ if (ops->id >= NR_DAMON_OPS)
+ return -EINVAL;
+ mutex_lock(&damon_ops_lock);
+ /* Fail for already registered ops */
+ if (damon_registered_ops_id(ops->id)) {
+ err = -EINVAL;
+ goto out;
+ }
+ damon_registered_ops[ops->id] = *ops;
+out:
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
+
+/**
+ * damon_select_ops() - Select a monitoring operations to use with the context.
+ * @ctx: monitoring context to use the operations.
+ * @id: id of the registered monitoring operations to select.
+ *
+ * This function finds registered monitoring operations set of @id and make
+ * @ctx to use it.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
+{
+ int err = 0;
+
+ if (id >= NR_DAMON_OPS)
+ return -EINVAL;
+
+ mutex_lock(&damon_ops_lock);
+ if (!damon_registered_ops_id(id))
+ err = -EINVAL;
+ else
+ ctx->ops = damon_registered_ops[id];
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
/*
* Construct a damon_region struct
@@ -144,7 +211,7 @@ void damon_destroy_scheme(struct damos *s)
*
* Returns the pointer to the new struct if success, or NULL otherwise
*/
-struct damon_target *damon_new_target(unsigned long id)
+struct damon_target *damon_new_target(void)
{
struct damon_target *t;
@@ -152,7 +219,7 @@ struct damon_target *damon_new_target(unsigned long id)
if (!t)
return NULL;
- t->id = id;
+ t->pid = NULL;
t->nr_regions = 0;
INIT_LIST_HEAD(&t->regions_list);
@@ -204,10 +271,10 @@ struct damon_ctx *damon_new_ctx(void)
ctx->sample_interval = 5 * 1000;
ctx->aggr_interval = 100 * 1000;
- ctx->primitive_update_interval = 60 * 1000 * 1000;
+ ctx->ops_update_interval = 60 * 1000 * 1000;
ktime_get_coarse_ts64(&ctx->last_aggregation);
- ctx->last_primitive_update = ctx->last_aggregation;
+ ctx->last_ops_update = ctx->last_aggregation;
mutex_init(&ctx->kdamond_lock);
@@ -224,8 +291,8 @@ static void damon_destroy_targets(struct damon_ctx *ctx)
{
struct damon_target *t, *next_t;
- if (ctx->primitive.cleanup) {
- ctx->primitive.cleanup(ctx);
+ if (ctx->ops.cleanup) {
+ ctx->ops.cleanup(ctx);
return;
}
@@ -246,43 +313,11 @@ void damon_destroy_ctx(struct damon_ctx *ctx)
}
/**
- * damon_set_targets() - Set monitoring targets.
- * @ctx: monitoring context
- * @ids: array of target ids
- * @nr_ids: number of entries in @ids
- *
- * This function should not be called while the kdamond is running.
- *
- * Return: 0 on success, negative error code otherwise.
- */
-int damon_set_targets(struct damon_ctx *ctx,
- unsigned long *ids, ssize_t nr_ids)
-{
- ssize_t i;
- struct damon_target *t, *next;
-
- damon_destroy_targets(ctx);
-
- for (i = 0; i < nr_ids; i++) {
- t = damon_new_target(ids[i]);
- if (!t) {
- /* The caller should do cleanup of the ids itself */
- damon_for_each_target_safe(t, next, ctx)
- damon_destroy_target(t);
- return -ENOMEM;
- }
- damon_add_target(ctx, t);
- }
-
- return 0;
-}
-
-/**
* damon_set_attrs() - Set attributes for the monitoring.
* @ctx: monitoring context
* @sample_int: time interval between samplings
* @aggr_int: time interval between aggregations
- * @primitive_upd_int: time interval between monitoring primitive updates
+ * @ops_upd_int: time interval between monitoring operations updates
* @min_nr_reg: minimal number of regions
* @max_nr_reg: maximum number of regions
*
@@ -292,7 +327,7 @@ int damon_set_targets(struct damon_ctx *ctx,
* Return: 0 on success, negative error code otherwise.
*/
int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
- unsigned long aggr_int, unsigned long primitive_upd_int,
+ unsigned long aggr_int, unsigned long ops_upd_int,
unsigned long min_nr_reg, unsigned long max_nr_reg)
{
if (min_nr_reg < 3)
@@ -302,7 +337,7 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
ctx->sample_interval = sample_int;
ctx->aggr_interval = aggr_int;
- ctx->primitive_update_interval = primitive_upd_int;
+ ctx->ops_update_interval = ops_upd_int;
ctx->min_nr_regions = min_nr_reg;
ctx->max_nr_regions = max_nr_reg;
@@ -400,22 +435,25 @@ static int __damon_start(struct damon_ctx *ctx)
* damon_start() - Starts the monitorings for a given group of contexts.
* @ctxs: an array of the pointers for contexts to start monitoring
* @nr_ctxs: size of @ctxs
+ * @exclusive: exclusiveness of this contexts group
*
* This function starts a group of monitoring threads for a group of monitoring
* contexts. One thread per each context is created and run in parallel. The
- * caller should handle synchronization between the threads by itself. If a
- * group of threads that created by other 'damon_start()' call is currently
- * running, this function does nothing but returns -EBUSY.
+ * caller should handle synchronization between the threads by itself. If
+ * @exclusive is true and a group of threads that created by other
+ * 'damon_start()' call is currently running, this function does nothing but
+ * returns -EBUSY.
*
* Return: 0 on success, negative error code otherwise.
*/
-int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
{
int i;
int err = 0;
mutex_lock(&damon_lock);
- if (nr_running_ctxs) {
+ if ((exclusive && nr_running_ctxs) ||
+ (!exclusive && running_exclusive_ctxs)) {
mutex_unlock(&damon_lock);
return -EBUSY;
}
@@ -426,13 +464,15 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
break;
nr_running_ctxs++;
}
+ if (exclusive && nr_running_ctxs)
+ running_exclusive_ctxs = true;
mutex_unlock(&damon_lock);
return err;
}
/*
- * __damon_stop() - Stops monitoring of given context.
+ * __damon_stop() - Stops monitoring of a given context.
* @ctx: monitoring context
*
* Return: 0 on success, negative error code otherwise.
@@ -470,9 +510,8 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
/* nr_running_ctxs is decremented in kdamond_fn */
err = __damon_stop(ctxs[i]);
if (err)
- return err;
+ break;
}
-
return err;
}
@@ -548,10 +587,10 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
{
bool ret = __damos_valid_target(r, s);
- if (!ret || !s->quota.esz || !c->primitive.get_scheme_score)
+ if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
return ret;
- return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score;
+ return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
}
static void damon_do_apply_schemes(struct damon_ctx *c,
@@ -608,7 +647,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
continue;
/* Apply the scheme */
- if (c->primitive.apply_scheme) {
+ if (c->ops.apply_scheme) {
if (quota->esz &&
quota->charged_sz + sz > quota->esz) {
sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
@@ -618,7 +657,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_split_region_at(c, t, r, sz);
}
ktime_get_coarse_ts64(&begin);
- sz_applied = c->primitive.apply_scheme(c, t, r, s);
+ sz_applied = c->ops.apply_scheme(c, t, r, s);
ktime_get_coarse_ts64(&end);
quota->total_charged_ns += timespec64_to_ns(&end) -
timespec64_to_ns(&begin);
@@ -692,7 +731,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
damos_set_effective_quota(quota);
}
- if (!c->primitive.get_scheme_score)
+ if (!c->ops.get_scheme_score)
continue;
/* Fill up the score histogram */
@@ -701,7 +740,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
damon_for_each_region(r, t) {
if (!__damos_valid_target(r, s))
continue;
- score = c->primitive.get_scheme_score(
+ score = c->ops.get_scheme_score(
c, t, r, s);
quota->histogram[score] +=
r->ar.end - r->ar.start;
@@ -880,14 +919,15 @@ static void kdamond_split_regions(struct damon_ctx *ctx)
}
/*
- * Check whether it is time to check and apply the target monitoring regions
+ * Check whether it is time to check and apply the operations-related data
+ * structures.
*
* Returns true if it is.
*/
-static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
+static bool kdamond_need_update_operations(struct damon_ctx *ctx)
{
- return damon_check_reset_time_interval(&ctx->last_primitive_update,
- ctx->primitive_update_interval);
+ return damon_check_reset_time_interval(&ctx->last_ops_update,
+ ctx->ops_update_interval);
}
/*
@@ -905,11 +945,11 @@ static bool kdamond_need_stop(struct damon_ctx *ctx)
if (kthread_should_stop())
return true;
- if (!ctx->primitive.target_valid)
+ if (!ctx->ops.target_valid)
return false;
damon_for_each_target(t, ctx) {
- if (ctx->primitive.target_valid(t))
+ if (ctx->ops.target_valid(t))
return false;
}
@@ -1008,8 +1048,8 @@ static int kdamond_fn(void *data)
pr_debug("kdamond (%d) starts\n", current->pid);
- if (ctx->primitive.init)
- ctx->primitive.init(ctx);
+ if (ctx->ops.init)
+ ctx->ops.init(ctx);
if (ctx->callback.before_start && ctx->callback.before_start(ctx))
done = true;
@@ -1019,16 +1059,16 @@ static int kdamond_fn(void *data)
if (kdamond_wait_activation(ctx))
continue;
- if (ctx->primitive.prepare_access_checks)
- ctx->primitive.prepare_access_checks(ctx);
+ if (ctx->ops.prepare_access_checks)
+ ctx->ops.prepare_access_checks(ctx);
if (ctx->callback.after_sampling &&
ctx->callback.after_sampling(ctx))
done = true;
kdamond_usleep(ctx->sample_interval);
- if (ctx->primitive.check_accesses)
- max_nr_accesses = ctx->primitive.check_accesses(ctx);
+ if (ctx->ops.check_accesses)
+ max_nr_accesses = ctx->ops.check_accesses(ctx);
if (kdamond_aggregate_interval_passed(ctx)) {
kdamond_merge_regions(ctx,
@@ -1040,13 +1080,13 @@ static int kdamond_fn(void *data)
kdamond_apply_schemes(ctx);
kdamond_reset_aggregated(ctx);
kdamond_split_regions(ctx);
- if (ctx->primitive.reset_aggregated)
- ctx->primitive.reset_aggregated(ctx);
+ if (ctx->ops.reset_aggregated)
+ ctx->ops.reset_aggregated(ctx);
}
- if (kdamond_need_update_primitive(ctx)) {
- if (ctx->primitive.update)
- ctx->primitive.update(ctx);
+ if (kdamond_need_update_operations(ctx)) {
+ if (ctx->ops.update)
+ ctx->ops.update(ctx);
sz_limit = damon_region_sz_limit(ctx);
}
}
@@ -1057,8 +1097,8 @@ static int kdamond_fn(void *data)
if (ctx->callback.before_terminate)
ctx->callback.before_terminate(ctx);
- if (ctx->primitive.cleanup)
- ctx->primitive.cleanup(ctx);
+ if (ctx->ops.cleanup)
+ ctx->ops.cleanup(ctx);
pr_debug("kdamond (%d) finishes\n", current->pid);
mutex_lock(&ctx->kdamond_lock);
@@ -1067,6 +1107,8 @@ static int kdamond_fn(void *data)
mutex_lock(&damon_lock);
nr_running_ctxs--;
+ if (!nr_running_ctxs && running_exclusive_ctxs)
+ running_exclusive_ctxs = false;
mutex_unlock(&damon_lock);
return 0;
diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h
index 86b9f9528231..0bb0d532b159 100644
--- a/mm/damon/dbgfs-test.h
+++ b/mm/damon/dbgfs-test.h
@@ -12,66 +12,58 @@
#include <kunit/test.h>
-static void damon_dbgfs_test_str_to_target_ids(struct kunit *test)
+static void damon_dbgfs_test_str_to_ints(struct kunit *test)
{
char *question;
- unsigned long *answers;
- unsigned long expected[] = {12, 35, 46};
+ int *answers;
+ int expected[] = {12, 35, 46};
ssize_t nr_integers = 0, i;
question = "123";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
- KUNIT_EXPECT_EQ(test, 123ul, answers[0]);
+ KUNIT_EXPECT_EQ(test, 123, answers[0]);
kfree(answers);
question = "123abc";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
- KUNIT_EXPECT_EQ(test, 123ul, answers[0]);
+ KUNIT_EXPECT_EQ(test, 123, answers[0]);
kfree(answers);
question = "a123";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
kfree(answers);
question = "12 35";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
for (i = 0; i < nr_integers; i++)
KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
kfree(answers);
question = "12 35 46";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
for (i = 0; i < nr_integers; i++)
KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
kfree(answers);
question = "12 35 abc 46";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
for (i = 0; i < 2; i++)
KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
kfree(answers);
question = "";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
kfree(answers);
question = "\n";
- answers = str_to_target_ids(question, strlen(question),
- &nr_integers);
+ answers = str_to_ints(question, strlen(question), &nr_integers);
KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
kfree(answers);
}
@@ -79,30 +71,20 @@ static void damon_dbgfs_test_str_to_target_ids(struct kunit *test)
static void damon_dbgfs_test_set_targets(struct kunit *test)
{
struct damon_ctx *ctx = dbgfs_new_ctx();
- unsigned long ids[] = {1, 2, 3};
char buf[64];
- /* Make DAMON consider target id as plain number */
- ctx->primitive.target_valid = NULL;
- ctx->primitive.cleanup = NULL;
+ /* Make DAMON consider target has no pid */
+ damon_select_ops(ctx, DAMON_OPS_PADDR);
- damon_set_targets(ctx, ids, 3);
- sprint_target_ids(ctx, buf, 64);
- KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n");
-
- damon_set_targets(ctx, NULL, 0);
+ dbgfs_set_targets(ctx, 0, NULL);
sprint_target_ids(ctx, buf, 64);
KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
- damon_set_targets(ctx, (unsigned long []){1, 2}, 2);
+ dbgfs_set_targets(ctx, 1, NULL);
sprint_target_ids(ctx, buf, 64);
- KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2\n");
+ KUNIT_EXPECT_STREQ(test, (char *)buf, "42\n");
- damon_set_targets(ctx, (unsigned long []){2}, 1);
- sprint_target_ids(ctx, buf, 64);
- KUNIT_EXPECT_STREQ(test, (char *)buf, "2\n");
-
- damon_set_targets(ctx, NULL, 0);
+ dbgfs_set_targets(ctx, 0, NULL);
sprint_target_ids(ctx, buf, 64);
KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
@@ -112,25 +94,26 @@ static void damon_dbgfs_test_set_targets(struct kunit *test)
static void damon_dbgfs_test_set_init_regions(struct kunit *test)
{
struct damon_ctx *ctx = damon_new_ctx();
- unsigned long ids[] = {1, 2, 3};
- /* Each line represents one region in ``<target id> <start> <end>`` */
- char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45",
- "2 10 20\n",
- "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n",
+ /* Each line represents one region in ``<target idx> <start> <end>`` */
+ char * const valid_inputs[] = {"1 10 20\n 1 20 30\n1 35 45",
+ "1 10 20\n",
+ "1 10 20\n0 39 59\n0 70 134\n 1 20 25\n",
""};
/* Reading the file again will show sorted, clean output */
- char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n",
- "2 10 20\n",
- "1 39 59\n1 70 134\n2 10 20\n2 20 25\n",
+ char * const valid_expects[] = {"1 10 20\n1 20 30\n1 35 45\n",
+ "1 10 20\n",
+ "0 39 59\n0 70 134\n1 10 20\n1 20 25\n",
""};
- char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */
- "2 10 20\n 2 14 26\n", /* regions overlap */
- "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */
+ char * const invalid_inputs[] = {"3 10 20\n", /* target not exists */
+ "1 10 20\n 1 14 26\n", /* regions overlap */
+ "0 10 20\n1 30 40\n 0 5 8"}; /* not sorted by address */
char *input, *expect;
int i, rc;
char buf[256];
- damon_set_targets(ctx, ids, 3);
+ damon_select_ops(ctx, DAMON_OPS_PADDR);
+
+ dbgfs_set_targets(ctx, 3, NULL);
/* Put valid inputs and check the results */
for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) {
@@ -158,12 +141,12 @@ static void damon_dbgfs_test_set_init_regions(struct kunit *test)
KUNIT_EXPECT_STREQ(test, (char *)buf, "");
}
- damon_set_targets(ctx, NULL, 0);
+ dbgfs_set_targets(ctx, 0, NULL);
damon_destroy_ctx(ctx);
}
static struct kunit_case damon_test_cases[] = {
- KUNIT_CASE(damon_dbgfs_test_str_to_target_ids),
+ KUNIT_CASE(damon_dbgfs_test_str_to_ints),
KUNIT_CASE(damon_dbgfs_test_set_targets),
KUNIT_CASE(damon_dbgfs_test_set_init_regions),
{},
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 5b899601e56c..a0dab8b5e45f 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -56,7 +56,7 @@ static ssize_t dbgfs_attrs_read(struct file *file,
mutex_lock(&ctx->kdamond_lock);
ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n",
ctx->sample_interval, ctx->aggr_interval,
- ctx->primitive_update_interval, ctx->min_nr_regions,
+ ctx->ops_update_interval, ctx->min_nr_regions,
ctx->max_nr_regions);
mutex_unlock(&ctx->kdamond_lock);
@@ -275,25 +275,27 @@ out:
return ret;
}
-static inline bool targetid_is_pid(const struct damon_ctx *ctx)
+static inline bool target_has_pid(const struct damon_ctx *ctx)
{
- return ctx->primitive.target_valid == damon_va_target_valid;
+ return ctx->ops.id == DAMON_OPS_VADDR;
}
static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len)
{
struct damon_target *t;
- unsigned long id;
+ int id;
int written = 0;
int rc;
damon_for_each_target(t, ctx) {
- id = t->id;
- if (targetid_is_pid(ctx))
+ if (target_has_pid(ctx))
/* Show pid numbers to debugfs users */
- id = (unsigned long)pid_vnr((struct pid *)id);
+ id = pid_vnr(t->pid);
+ else
+ /* Show 42 for physical address space, just for fun */
+ id = 42;
- rc = scnprintf(&buf[written], len - written, "%lu ", id);
+ rc = scnprintf(&buf[written], len - written, "%d ", id);
if (!rc)
return -ENOMEM;
written += rc;
@@ -321,54 +323,129 @@ static ssize_t dbgfs_target_ids_read(struct file *file,
}
/*
- * Converts a string into an array of unsigned long integers
+ * Converts a string into an integers array
*
- * Returns an array of unsigned long integers if the conversion success, or
- * NULL otherwise.
+ * Returns an array of integers array if the conversion success, or NULL
+ * otherwise.
*/
-static unsigned long *str_to_target_ids(const char *str, ssize_t len,
- ssize_t *nr_ids)
+static int *str_to_ints(const char *str, ssize_t len, ssize_t *nr_ints)
{
- unsigned long *ids;
- const int max_nr_ids = 32;
- unsigned long id;
+ int *array;
+ const int max_nr_ints = 32;
+ int nr;
int pos = 0, parsed, ret;
- *nr_ids = 0;
- ids = kmalloc_array(max_nr_ids, sizeof(id), GFP_KERNEL);
- if (!ids)
+ *nr_ints = 0;
+ array = kmalloc_array(max_nr_ints, sizeof(*array), GFP_KERNEL);
+ if (!array)
return NULL;
- while (*nr_ids < max_nr_ids && pos < len) {
- ret = sscanf(&str[pos], "%lu%n", &id, &parsed);
+ while (*nr_ints < max_nr_ints && pos < len) {
+ ret = sscanf(&str[pos], "%d%n", &nr, &parsed);
pos += parsed;
if (ret != 1)
break;
- ids[*nr_ids] = id;
- *nr_ids += 1;
+ array[*nr_ints] = nr;
+ *nr_ints += 1;
}
- return ids;
+ return array;
}
-static void dbgfs_put_pids(unsigned long *ids, int nr_ids)
+static void dbgfs_put_pids(struct pid **pids, int nr_pids)
{
int i;
- for (i = 0; i < nr_ids; i++)
- put_pid((struct pid *)ids[i]);
+ for (i = 0; i < nr_pids; i++)
+ put_pid(pids[i]);
+}
+
+/*
+ * Converts a string into an struct pid pointers array
+ *
+ * Returns an array of struct pid pointers if the conversion success, or NULL
+ * otherwise.
+ */
+static struct pid **str_to_pids(const char *str, ssize_t len, ssize_t *nr_pids)
+{
+ int *ints;
+ ssize_t nr_ints;
+ struct pid **pids;
+
+ *nr_pids = 0;
+
+ ints = str_to_ints(str, len, &nr_ints);
+ if (!ints)
+ return NULL;
+
+ pids = kmalloc_array(nr_ints, sizeof(*pids), GFP_KERNEL);
+ if (!pids)
+ goto out;
+
+ for (; *nr_pids < nr_ints; (*nr_pids)++) {
+ pids[*nr_pids] = find_get_pid(ints[*nr_pids]);
+ if (!pids[*nr_pids]) {
+ dbgfs_put_pids(pids, *nr_pids);
+ kfree(ints);
+ kfree(pids);
+ return NULL;
+ }
+ }
+
+out:
+ kfree(ints);
+ return pids;
+}
+
+/*
+ * dbgfs_set_targets() - Set monitoring targets.
+ * @ctx: monitoring context
+ * @nr_targets: number of targets
+ * @pids: array of target pids (size is same to @nr_targets)
+ *
+ * This function should not be called while the kdamond is running. @pids is
+ * ignored if the context is not configured to have pid in each target. On
+ * failure, reference counts of all pids in @pids are decremented.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int dbgfs_set_targets(struct damon_ctx *ctx, ssize_t nr_targets,
+ struct pid **pids)
+{
+ ssize_t i;
+ struct damon_target *t, *next;
+
+ damon_for_each_target_safe(t, next, ctx) {
+ if (target_has_pid(ctx))
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+
+ for (i = 0; i < nr_targets; i++) {
+ t = damon_new_target();
+ if (!t) {
+ damon_for_each_target_safe(t, next, ctx)
+ damon_destroy_target(t);
+ if (target_has_pid(ctx))
+ dbgfs_put_pids(pids, nr_targets);
+ return -ENOMEM;
+ }
+ if (target_has_pid(ctx))
+ t->pid = pids[i];
+ damon_add_target(ctx, t);
+ }
+
+ return 0;
}
static ssize_t dbgfs_target_ids_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct damon_ctx *ctx = file->private_data;
- struct damon_target *t, *next_t;
bool id_is_pid = true;
char *kbuf;
- unsigned long *targets;
+ struct pid **target_pids = NULL;
ssize_t nr_targets;
ssize_t ret;
- int i;
kbuf = user_input_str(buf, count, ppos);
if (IS_ERR(kbuf))
@@ -376,61 +453,47 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
if (!strncmp(kbuf, "paddr\n", count)) {
id_is_pid = false;
- /* target id is meaningless here, but we set it just for fun */
- scnprintf(kbuf, count, "42 ");
- }
-
- targets = str_to_target_ids(kbuf, count, &nr_targets);
- if (!targets) {
- ret = -ENOMEM;
- goto out;
+ nr_targets = 1;
}
if (id_is_pid) {
- for (i = 0; i < nr_targets; i++) {
- targets[i] = (unsigned long)find_get_pid(
- (int)targets[i]);
- if (!targets[i]) {
- dbgfs_put_pids(targets, i);
- ret = -EINVAL;
- goto free_targets_out;
- }
+ target_pids = str_to_pids(kbuf, count, &nr_targets);
+ if (!target_pids) {
+ ret = -ENOMEM;
+ goto out;
}
}
mutex_lock(&ctx->kdamond_lock);
if (ctx->kdamond) {
if (id_is_pid)
- dbgfs_put_pids(targets, nr_targets);
+ dbgfs_put_pids(target_pids, nr_targets);
ret = -EBUSY;
goto unlock_out;
}
/* remove previously set targets */
- damon_for_each_target_safe(t, next_t, ctx) {
- if (targetid_is_pid(ctx))
- put_pid((struct pid *)t->id);
- damon_destroy_target(t);
+ dbgfs_set_targets(ctx, 0, NULL);
+ if (!nr_targets) {
+ ret = count;
+ goto unlock_out;
}
/* Configure the context for the address space type */
if (id_is_pid)
- damon_va_set_primitives(ctx);
+ ret = damon_select_ops(ctx, DAMON_OPS_VADDR);
else
- damon_pa_set_primitives(ctx);
+ ret = damon_select_ops(ctx, DAMON_OPS_PADDR);
+ if (ret)
+ goto unlock_out;
- ret = damon_set_targets(ctx, targets, nr_targets);
- if (ret) {
- if (id_is_pid)
- dbgfs_put_pids(targets, nr_targets);
- } else {
+ ret = dbgfs_set_targets(ctx, nr_targets, target_pids);
+ if (!ret)
ret = count;
- }
unlock_out:
mutex_unlock(&ctx->kdamond_lock);
-free_targets_out:
- kfree(targets);
+ kfree(target_pids);
out:
kfree(kbuf);
return ret;
@@ -440,18 +503,20 @@ static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len)
{
struct damon_target *t;
struct damon_region *r;
+ int target_idx = 0;
int written = 0;
int rc;
damon_for_each_target(t, c) {
damon_for_each_region(r, t) {
rc = scnprintf(&buf[written], len - written,
- "%lu %lu %lu\n",
- t->id, r->ar.start, r->ar.end);
+ "%d %lu %lu\n",
+ target_idx, r->ar.start, r->ar.end);
if (!rc)
return -ENOMEM;
written += rc;
}
+ target_idx++;
}
return written;
}
@@ -485,22 +550,19 @@ out:
return len;
}
-static int add_init_region(struct damon_ctx *c,
- unsigned long target_id, struct damon_addr_range *ar)
+static int add_init_region(struct damon_ctx *c, int target_idx,
+ struct damon_addr_range *ar)
{
struct damon_target *t;
struct damon_region *r, *prev;
- unsigned long id;
+ unsigned long idx = 0;
int rc = -EINVAL;
if (ar->start >= ar->end)
return -EINVAL;
damon_for_each_target(t, c) {
- id = t->id;
- if (targetid_is_pid(c))
- id = (unsigned long)pid_vnr((struct pid *)id);
- if (id == target_id) {
+ if (idx++ == target_idx) {
r = damon_new_region(ar->start, ar->end);
if (!r)
return -ENOMEM;
@@ -523,7 +585,7 @@ static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len)
struct damon_target *t;
struct damon_region *r, *next;
int pos = 0, parsed, ret;
- unsigned long target_id;
+ int target_idx;
struct damon_addr_range ar;
int err;
@@ -533,11 +595,11 @@ static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len)
}
while (pos < len) {
- ret = sscanf(&str[pos], "%lu %lu %lu%n",
- &target_id, &ar.start, &ar.end, &parsed);
+ ret = sscanf(&str[pos], "%d %lu %lu%n",
+ &target_idx, &ar.start, &ar.end, &parsed);
if (ret != 3)
break;
- err = add_init_region(c, target_id, &ar);
+ err = add_init_region(c, target_idx, &ar);
if (err)
goto fail;
pos += parsed;
@@ -660,12 +722,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx)
{
struct damon_target *t, *next;
- if (!targetid_is_pid(ctx))
+ if (!target_has_pid(ctx))
return;
mutex_lock(&ctx->kdamond_lock);
damon_for_each_target_safe(t, next, ctx) {
- put_pid((struct pid *)t->id);
+ put_pid(t->pid);
damon_destroy_target(t);
}
mutex_unlock(&ctx->kdamond_lock);
@@ -679,7 +741,11 @@ static struct damon_ctx *dbgfs_new_ctx(void)
if (!ctx)
return NULL;
- damon_va_set_primitives(ctx);
+ if (damon_select_ops(ctx, DAMON_OPS_VADDR) &&
+ damon_select_ops(ctx, DAMON_OPS_PADDR)) {
+ damon_destroy_ctx(ctx);
+ return NULL;
+ }
ctx->callback.before_terminate = dbgfs_before_terminate;
return ctx;
}
@@ -901,7 +967,7 @@ static ssize_t dbgfs_monitor_on_write(struct file *file,
return -EINVAL;
}
}
- ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
+ ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs, true);
} else if (!strncmp(kbuf, "off", count)) {
ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
} else {
diff --git a/mm/damon/prmtv-common.c b/mm/damon/ops-common.c
index 92a04f5831d6..e346cc10d143 100644
--- a/mm/damon/prmtv-common.c
+++ b/mm/damon/ops-common.c
@@ -10,7 +10,7 @@
#include <linux/pagemap.h>
#include <linux/rmap.h>
-#include "prmtv-common.h"
+#include "ops-common.h"
/*
* Get an online page for a pfn if it's in the LRU list. Otherwise, returns
diff --git a/mm/damon/prmtv-common.h b/mm/damon/ops-common.h
index e790cb5f8fe0..e790cb5f8fe0 100644
--- a/mm/damon/prmtv-common.h
+++ b/mm/damon/ops-common.h
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 5e8244f65a1a..21474ae63bc7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -14,16 +14,12 @@
#include <linux/swap.h>
#include "../internal.h"
-#include "prmtv-common.h"
+#include "ops-common.h"
-static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
+static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, void *arg)
{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
while (page_vma_mapped_walk(&pvmw)) {
addr = pvmw.address;
@@ -37,32 +33,34 @@ static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
static void damon_pa_mkold(unsigned long paddr)
{
+ struct folio *folio;
struct page *page = damon_get_page(PHYS_PFN(paddr));
struct rmap_walk_control rwc = {
.rmap_one = __damon_pa_mkold,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
bool need_lock;
if (!page)
return;
+ folio = page_folio(page);
- if (!page_mapped(page) || !page_rmapping(page)) {
- set_page_idle(page);
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ folio_set_idle(folio);
goto out;
}
- need_lock = !PageAnon(page) || PageKsm(page);
- if (need_lock && !trylock_page(page))
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio))
goto out;
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
if (need_lock)
- unlock_page(page);
+ folio_unlock(folio);
out:
- put_page(page);
+ folio_put(folio);
}
static void __damon_pa_prepare_access_check(struct damon_ctx *ctx,
@@ -89,15 +87,11 @@ struct damon_pa_access_chk_result {
bool accessed;
};
-static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma,
+static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, void *arg)
{
struct damon_pa_access_chk_result *result = arg;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
result->accessed = false;
result->page_sz = PAGE_SIZE;
@@ -105,12 +99,12 @@ static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma,
addr = pvmw.address;
if (pvmw.pte) {
result->accessed = pte_young(*pvmw.pte) ||
- !page_is_idle(page) ||
+ !folio_test_idle(folio) ||
mmu_notifier_test_young(vma->vm_mm, addr);
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
result->accessed = pmd_young(*pvmw.pmd) ||
- !page_is_idle(page) ||
+ !folio_test_idle(folio) ||
mmu_notifier_test_young(vma->vm_mm, addr);
result->page_sz = ((1UL) << HPAGE_PMD_SHIFT);
#else
@@ -129,6 +123,7 @@ static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma,
static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz)
{
+ struct folio *folio;
struct page *page = damon_get_page(PHYS_PFN(paddr));
struct damon_pa_access_chk_result result = {
.page_sz = PAGE_SIZE,
@@ -137,33 +132,34 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz)
struct rmap_walk_control rwc = {
.arg = &result,
.rmap_one = __damon_pa_young,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
bool need_lock;
if (!page)
return false;
+ folio = page_folio(page);
- if (!page_mapped(page) || !page_rmapping(page)) {
- if (page_is_idle(page))
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ if (folio_test_idle(folio))
result.accessed = false;
else
result.accessed = true;
- put_page(page);
+ folio_put(folio);
goto out;
}
- need_lock = !PageAnon(page) || PageKsm(page);
- if (need_lock && !trylock_page(page)) {
- put_page(page);
- return NULL;
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio)) {
+ folio_put(folio);
+ return false;
}
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
if (need_lock)
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out:
*page_sz = result.page_sz;
@@ -208,11 +204,6 @@ static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
return max_nr_accesses;
}
-bool damon_pa_target_valid(void *t)
-{
- return true;
-}
-
static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme)
@@ -261,15 +252,22 @@ static int damon_pa_scheme_score(struct damon_ctx *context,
return DAMOS_MAX_SCORE;
}
-void damon_pa_set_primitives(struct damon_ctx *ctx)
+static int __init damon_pa_initcall(void)
{
- ctx->primitive.init = NULL;
- ctx->primitive.update = NULL;
- ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks;
- ctx->primitive.check_accesses = damon_pa_check_accesses;
- ctx->primitive.reset_aggregated = NULL;
- ctx->primitive.target_valid = damon_pa_target_valid;
- ctx->primitive.cleanup = NULL;
- ctx->primitive.apply_scheme = damon_pa_apply_scheme;
- ctx->primitive.get_scheme_score = damon_pa_scheme_score;
-}
+ struct damon_operations ops = {
+ .id = DAMON_OPS_PADDR,
+ .init = NULL,
+ .update = NULL,
+ .prepare_access_checks = damon_pa_prepare_access_checks,
+ .check_accesses = damon_pa_check_accesses,
+ .reset_aggregated = NULL,
+ .target_valid = NULL,
+ .cleanup = NULL,
+ .apply_scheme = damon_pa_apply_scheme,
+ .get_scheme_score = damon_pa_scheme_score,
+ };
+
+ return damon_register_ops(&ops);
+};
+
+subsys_initcall(damon_pa_initcall);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index bc476cef688e..e34c4d0c4d93 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -330,7 +330,7 @@ static int damon_reclaim_turn(bool on)
if (err)
goto free_scheme_out;
- err = damon_start(&ctx, 1);
+ err = damon_start(&ctx, 1, true);
if (!err) {
kdamond_pid = ctx->kdamond->pid;
return 0;
@@ -384,11 +384,12 @@ static int __init damon_reclaim_init(void)
if (!ctx)
return -ENOMEM;
- damon_pa_set_primitives(ctx);
+ if (damon_select_ops(ctx, DAMON_OPS_PADDR))
+ return -EINVAL;
+
ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
- /* 4242 means nothing but fun */
- target = damon_new_target(4242);
+ target = damon_new_target();
if (!target) {
damon_destroy_ctx(ctx);
return -ENOMEM;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
new file mode 100644
index 000000000000..48e434cd43d8
--- /dev/null
+++ b/mm/damon/sysfs.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+#include <linux/kobject.h>
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(damon_sysfs_lock);
+
+/*
+ * unsigned long range directory
+ */
+
+struct damon_sysfs_ul_range {
+ struct kobject kobj;
+ unsigned long min;
+ unsigned long max;
+};
+
+static struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+ unsigned long min,
+ unsigned long max)
+{
+ struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
+ GFP_KERNEL);
+
+ if (!range)
+ return NULL;
+ range->kobj = (struct kobject){};
+ range->min = min;
+ range->max = max;
+
+ return range;
+}
+
+static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->min);
+}
+
+static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long min;
+ int err;
+
+ err = kstrtoul(buf, 0, &min);
+ if (err)
+ return -EINVAL;
+
+ range->min = min;
+ return count;
+}
+
+static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->max);
+}
+
+static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long max;
+ int err;
+
+ err = kstrtoul(buf, 0, &max);
+ if (err)
+ return -EINVAL;
+
+ range->max = max;
+ return count;
+}
+
+static void damon_sysfs_ul_range_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_ul_range_min_attr =
+ __ATTR_RW_MODE(min, 0600);
+
+static struct kobj_attribute damon_sysfs_ul_range_max_attr =
+ __ATTR_RW_MODE(max, 0600);
+
+static struct attribute *damon_sysfs_ul_range_attrs[] = {
+ &damon_sysfs_ul_range_min_attr.attr,
+ &damon_sysfs_ul_range_max_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
+
+static struct kobj_type damon_sysfs_ul_range_ktype = {
+ .release = damon_sysfs_ul_range_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ul_range_groups,
+};
+
+/*
+ * schemes/stats directory
+ */
+
+struct damon_sysfs_stats {
+ struct kobject kobj;
+ unsigned long nr_tried;
+ unsigned long sz_tried;
+ unsigned long nr_applied;
+ unsigned long sz_applied;
+ unsigned long qt_exceeds;
+};
+
+static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL);
+}
+
+static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_tried);
+}
+
+static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_tried);
+}
+
+static ssize_t nr_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_applied);
+}
+
+static ssize_t sz_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_applied);
+}
+
+static ssize_t qt_exceeds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->qt_exceeds);
+}
+
+static void damon_sysfs_stats_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_stats, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_stats_nr_tried_attr =
+ __ATTR_RO_MODE(nr_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_tried_attr =
+ __ATTR_RO_MODE(sz_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_nr_applied_attr =
+ __ATTR_RO_MODE(nr_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_applied_attr =
+ __ATTR_RO_MODE(sz_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr =
+ __ATTR_RO_MODE(qt_exceeds, 0400);
+
+static struct attribute *damon_sysfs_stats_attrs[] = {
+ &damon_sysfs_stats_nr_tried_attr.attr,
+ &damon_sysfs_stats_sz_tried_attr.attr,
+ &damon_sysfs_stats_nr_applied_attr.attr,
+ &damon_sysfs_stats_sz_applied_attr.attr,
+ &damon_sysfs_stats_qt_exceeds_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_stats);
+
+static struct kobj_type damon_sysfs_stats_ktype = {
+ .release = damon_sysfs_stats_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_stats_groups,
+};
+
+/*
+ * watermarks directory
+ */
+
+struct damon_sysfs_watermarks {
+ struct kobject kobj;
+ enum damos_wmark_metric metric;
+ unsigned long interval_us;
+ unsigned long high;
+ unsigned long mid;
+ unsigned long low;
+};
+
+static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc(
+ enum damos_wmark_metric metric, unsigned long interval_us,
+ unsigned long high, unsigned long mid, unsigned long low)
+{
+ struct damon_sysfs_watermarks *watermarks = kmalloc(
+ sizeof(*watermarks), GFP_KERNEL);
+
+ if (!watermarks)
+ return NULL;
+ watermarks->kobj = (struct kobject){};
+ watermarks->metric = metric;
+ watermarks->interval_us = interval_us;
+ watermarks->high = high;
+ watermarks->mid = mid;
+ watermarks->low = low;
+ return watermarks;
+}
+
+/* Should match with enum damos_wmark_metric */
+static const char * const damon_sysfs_wmark_metric_strs[] = {
+ "none",
+ "free_mem_rate",
+};
+
+static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ damon_sysfs_wmark_metric_strs[watermarks->metric]);
+}
+
+static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ enum damos_wmark_metric metric;
+
+ for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) {
+ if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) {
+ watermarks->metric = metric;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t interval_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->interval_us);
+}
+
+static ssize_t interval_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->interval_us);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t high_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->high);
+}
+
+static ssize_t high_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->high);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t mid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->mid);
+}
+
+static ssize_t mid_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->mid);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t low_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->low);
+}
+
+static ssize_t low_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->low);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_watermarks_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_watermarks_metric_attr =
+ __ATTR_RW_MODE(metric, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr =
+ __ATTR_RW_MODE(interval_us, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_high_attr =
+ __ATTR_RW_MODE(high, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_mid_attr =
+ __ATTR_RW_MODE(mid, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_low_attr =
+ __ATTR_RW_MODE(low, 0600);
+
+static struct attribute *damon_sysfs_watermarks_attrs[] = {
+ &damon_sysfs_watermarks_metric_attr.attr,
+ &damon_sysfs_watermarks_interval_us_attr.attr,
+ &damon_sysfs_watermarks_high_attr.attr,
+ &damon_sysfs_watermarks_mid_attr.attr,
+ &damon_sysfs_watermarks_low_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_watermarks);
+
+static struct kobj_type damon_sysfs_watermarks_ktype = {
+ .release = damon_sysfs_watermarks_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_watermarks_groups,
+};
+
+/*
+ * scheme/weights directory
+ */
+
+struct damon_sysfs_weights {
+ struct kobject kobj;
+ unsigned int sz;
+ unsigned int nr_accesses;
+ unsigned int age;
+};
+
+static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz,
+ unsigned int nr_accesses, unsigned int age)
+{
+ struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights),
+ GFP_KERNEL);
+
+ if (!weights)
+ return NULL;
+ weights->kobj = (struct kobject){};
+ weights->sz = sz;
+ weights->nr_accesses = nr_accesses;
+ weights->age = age;
+ return weights;
+}
+
+static ssize_t sz_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->sz);
+}
+
+static ssize_t sz_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->sz);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t nr_accesses_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->nr_accesses);
+}
+
+static ssize_t nr_accesses_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->nr_accesses);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t age_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->age);
+}
+
+static ssize_t age_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->age);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_weights_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_weights, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_weights_sz_attr =
+ __ATTR_RW_MODE(sz_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr =
+ __ATTR_RW_MODE(nr_accesses_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_age_attr =
+ __ATTR_RW_MODE(age_permil, 0600);
+
+static struct attribute *damon_sysfs_weights_attrs[] = {
+ &damon_sysfs_weights_sz_attr.attr,
+ &damon_sysfs_weights_nr_accesses_attr.attr,
+ &damon_sysfs_weights_age_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_weights);
+
+static struct kobj_type damon_sysfs_weights_ktype = {
+ .release = damon_sysfs_weights_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_weights_groups,
+};
+
+/*
+ * quotas directory
+ */
+
+struct damon_sysfs_quotas {
+ struct kobject kobj;
+ struct damon_sysfs_weights *weights;
+ unsigned long ms;
+ unsigned long sz;
+ unsigned long reset_interval_ms;
+};
+
+static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL);
+}
+
+static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas)
+{
+ struct damon_sysfs_weights *weights;
+ int err;
+
+ weights = damon_sysfs_weights_alloc(0, 0, 0);
+ if (!weights)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype,
+ &quotas->kobj, "weights");
+ if (err)
+ kobject_put(&weights->kobj);
+ else
+ quotas->weights = weights;
+ return err;
+}
+
+static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas)
+{
+ kobject_put(&quotas->weights->kobj);
+}
+
+static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->ms);
+}
+
+static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->sz);
+}
+
+static ssize_t bytes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->sz);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t reset_interval_ms_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms);
+}
+
+static ssize_t reset_interval_ms_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->reset_interval_ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_quotas_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_quotas_ms_attr =
+ __ATTR_RW_MODE(ms, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_sz_attr =
+ __ATTR_RW_MODE(bytes, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
+ __ATTR_RW_MODE(reset_interval_ms, 0600);
+
+static struct attribute *damon_sysfs_quotas_attrs[] = {
+ &damon_sysfs_quotas_ms_attr.attr,
+ &damon_sysfs_quotas_sz_attr.attr,
+ &damon_sysfs_quotas_reset_interval_ms_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_quotas);
+
+static struct kobj_type damon_sysfs_quotas_ktype = {
+ .release = damon_sysfs_quotas_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_quotas_groups,
+};
+
+/*
+ * access_pattern directory
+ */
+
+struct damon_sysfs_access_pattern {
+ struct kobject kobj;
+ struct damon_sysfs_ul_range *sz;
+ struct damon_sysfs_ul_range *nr_accesses;
+ struct damon_sysfs_ul_range *age;
+};
+
+static
+struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ kmalloc(sizeof(*access_pattern), GFP_KERNEL);
+
+ if (!access_pattern)
+ return NULL;
+ access_pattern->kobj = (struct kobject){};
+ return access_pattern;
+}
+
+static int damon_sysfs_access_pattern_add_range_dir(
+ struct damon_sysfs_access_pattern *access_pattern,
+ struct damon_sysfs_ul_range **range_dir_ptr,
+ char *name)
+{
+ struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0);
+ int err;
+
+ if (!range)
+ return -ENOMEM;
+ err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype,
+ &access_pattern->kobj, name);
+ if (err)
+ kobject_put(&range->kobj);
+ else
+ *range_dir_ptr = range;
+ return err;
+}
+
+static int damon_sysfs_access_pattern_add_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ int err;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->sz, "sz");
+ if (err)
+ goto put_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->nr_accesses, "nr_accesses");
+ if (err)
+ goto put_nr_accesses_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->age, "age");
+ if (err)
+ goto put_age_nr_accesses_sz_out;
+ return 0;
+
+put_age_nr_accesses_sz_out:
+ kobject_put(&access_pattern->age->kobj);
+ access_pattern->age = NULL;
+put_nr_accesses_sz_out:
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ access_pattern->nr_accesses = NULL;
+put_sz_out:
+ kobject_put(&access_pattern->sz->kobj);
+ access_pattern->sz = NULL;
+ return err;
+}
+
+static void damon_sysfs_access_pattern_rm_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ kobject_put(&access_pattern->sz->kobj);
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ kobject_put(&access_pattern->age->kobj);
+}
+
+static void damon_sysfs_access_pattern_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj));
+}
+
+static struct attribute *damon_sysfs_access_pattern_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_access_pattern);
+
+static struct kobj_type damon_sysfs_access_pattern_ktype = {
+ .release = damon_sysfs_access_pattern_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_access_pattern_groups,
+};
+
+/*
+ * scheme directory
+ */
+
+struct damon_sysfs_scheme {
+ struct kobject kobj;
+ enum damos_action action;
+ struct damon_sysfs_access_pattern *access_pattern;
+ struct damon_sysfs_quotas *quotas;
+ struct damon_sysfs_watermarks *watermarks;
+ struct damon_sysfs_stats *stats;
+};
+
+/* This should match with enum damos_action */
+static const char * const damon_sysfs_damos_action_strs[] = {
+ "willneed",
+ "cold",
+ "pageout",
+ "hugepage",
+ "nohugepage",
+ "stat",
+};
+
+static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
+ enum damos_action action)
+{
+ struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme),
+ GFP_KERNEL);
+
+ if (!scheme)
+ return NULL;
+ scheme->kobj = (struct kobject){};
+ scheme->action = action;
+ return scheme;
+}
+
+static int damon_sysfs_scheme_set_access_pattern(
+ struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern;
+ int err;
+
+ access_pattern = damon_sysfs_access_pattern_alloc();
+ if (!access_pattern)
+ return -ENOMEM;
+ err = kobject_init_and_add(&access_pattern->kobj,
+ &damon_sysfs_access_pattern_ktype, &scheme->kobj,
+ "access_pattern");
+ if (err)
+ goto out;
+ err = damon_sysfs_access_pattern_add_dirs(access_pattern);
+ if (err)
+ goto out;
+ scheme->access_pattern = access_pattern;
+ return 0;
+
+out:
+ kobject_put(&access_pattern->kobj);
+ return err;
+}
+
+static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc();
+ int err;
+
+ if (!quotas)
+ return -ENOMEM;
+ err = kobject_init_and_add(&quotas->kobj, &damon_sysfs_quotas_ktype,
+ &scheme->kobj, "quotas");
+ if (err)
+ goto out;
+ err = damon_sysfs_quotas_add_dirs(quotas);
+ if (err)
+ goto out;
+ scheme->quotas = quotas;
+ return 0;
+
+out:
+ kobject_put(&quotas->kobj);
+ return err;
+}
+
+static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_watermarks *watermarks =
+ damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0);
+ int err;
+
+ if (!watermarks)
+ return -ENOMEM;
+ err = kobject_init_and_add(&watermarks->kobj,
+ &damon_sysfs_watermarks_ktype, &scheme->kobj,
+ "watermarks");
+ if (err)
+ kobject_put(&watermarks->kobj);
+ else
+ scheme->watermarks = watermarks;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc();
+ int err;
+
+ if (!stats)
+ return -ENOMEM;
+ err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype,
+ &scheme->kobj, "stats");
+ if (err)
+ kobject_put(&stats->kobj);
+ else
+ scheme->stats = stats;
+ return err;
+}
+
+static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
+{
+ int err;
+
+ err = damon_sysfs_scheme_set_access_pattern(scheme);
+ if (err)
+ return err;
+ err = damon_sysfs_scheme_set_quotas(scheme);
+ if (err)
+ goto put_access_pattern_out;
+ err = damon_sysfs_scheme_set_watermarks(scheme);
+ if (err)
+ goto put_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_stats(scheme);
+ if (err)
+ goto put_watermarks_quotas_access_pattern_out;
+ return 0;
+
+put_watermarks_quotas_access_pattern_out:
+ kobject_put(&scheme->watermarks->kobj);
+ scheme->watermarks = NULL;
+put_quotas_access_pattern_out:
+ kobject_put(&scheme->quotas->kobj);
+ scheme->quotas = NULL;
+put_access_pattern_out:
+ kobject_put(&scheme->access_pattern->kobj);
+ scheme->access_pattern = NULL;
+ return err;
+}
+
+static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
+{
+ damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
+ kobject_put(&scheme->access_pattern->kobj);
+ damon_sysfs_quotas_rm_dirs(scheme->quotas);
+ kobject_put(&scheme->quotas->kobj);
+ kobject_put(&scheme->watermarks->kobj);
+ kobject_put(&scheme->stats->kobj);
+}
+
+static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ damon_sysfs_damos_action_strs[scheme->action]);
+}
+
+static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ enum damos_action action;
+
+ for (action = 0; action < NR_DAMOS_ACTIONS; action++) {
+ if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) {
+ scheme->action = action;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static void damon_sysfs_scheme_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_action_attr =
+ __ATTR_RW_MODE(action, 0600);
+
+static struct attribute *damon_sysfs_scheme_attrs[] = {
+ &damon_sysfs_scheme_action_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme);
+
+static struct kobj_type damon_sysfs_scheme_ktype = {
+ .release = damon_sysfs_scheme_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_groups,
+};
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes {
+ struct kobject kobj;
+ struct damon_sysfs_scheme **schemes_arr;
+ int nr;
+};
+
+static struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL);
+}
+
+static void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr;
+ int i;
+
+ for (i = 0; i < schemes->nr; i++) {
+ damon_sysfs_scheme_rm_dirs(schemes_arr[i]);
+ kobject_put(&schemes_arr[i]->kobj);
+ }
+ schemes->nr = 0;
+ kfree(schemes_arr);
+ schemes->schemes_arr = NULL;
+}
+
+static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes,
+ int nr_schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr, *scheme;
+ int err, i;
+
+ damon_sysfs_schemes_rm_dirs(schemes);
+ if (!nr_schemes)
+ return 0;
+
+ schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!schemes_arr)
+ return -ENOMEM;
+ schemes->schemes_arr = schemes_arr;
+
+ for (i = 0; i < nr_schemes; i++) {
+ scheme = damon_sysfs_scheme_alloc(DAMOS_STAT);
+ if (!scheme) {
+ damon_sysfs_schemes_rm_dirs(schemes);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&scheme->kobj,
+ &damon_sysfs_scheme_ktype, &schemes->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+ err = damon_sysfs_scheme_add_dirs(scheme);
+ if (err)
+ goto out;
+
+ schemes_arr[i] = scheme;
+ schemes->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_schemes_rm_dirs(schemes);
+ kobject_put(&scheme->kobj);
+ return err;
+}
+
+static ssize_t nr_schemes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_schemes *schemes = container_of(kobj,
+ struct damon_sysfs_schemes, kobj);
+
+ return sysfs_emit(buf, "%d\n", schemes->nr);
+}
+
+static ssize_t nr_schemes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_schemes *schemes = container_of(kobj,
+ struct damon_sysfs_schemes, kobj);
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_schemes_add_dirs(schemes, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+ return count;
+}
+
+static void damon_sysfs_schemes_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_schemes, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_schemes_nr_attr =
+ __ATTR_RW_MODE(nr_schemes, 0600);
+
+static struct attribute *damon_sysfs_schemes_attrs[] = {
+ &damon_sysfs_schemes_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_schemes);
+
+static struct kobj_type damon_sysfs_schemes_ktype = {
+ .release = damon_sysfs_schemes_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_schemes_groups,
+};
+
+/*
+ * init region directory
+ */
+
+struct damon_sysfs_region {
+ struct kobject kobj;
+ unsigned long start;
+ unsigned long end;
+};
+
+static struct damon_sysfs_region *damon_sysfs_region_alloc(
+ unsigned long start,
+ unsigned long end)
+{
+ struct damon_sysfs_region *region = kmalloc(sizeof(*region),
+ GFP_KERNEL);
+
+ if (!region)
+ return NULL;
+ region->kobj = (struct kobject){};
+ region->start = start;
+ region->end = end;
+ return region;
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->start);
+}
+
+static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->start);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->end);
+}
+
+static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->end);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_region_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_region, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_region_start_attr =
+ __ATTR_RW_MODE(start, 0600);
+
+static struct kobj_attribute damon_sysfs_region_end_attr =
+ __ATTR_RW_MODE(end, 0600);
+
+static struct attribute *damon_sysfs_region_attrs[] = {
+ &damon_sysfs_region_start_attr.attr,
+ &damon_sysfs_region_end_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_region);
+
+static struct kobj_type damon_sysfs_region_ktype = {
+ .release = damon_sysfs_region_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_region_groups,
+};
+
+/*
+ * init_regions directory
+ */
+
+struct damon_sysfs_regions {
+ struct kobject kobj;
+ struct damon_sysfs_region **regions_arr;
+ int nr;
+};
+
+static struct damon_sysfs_regions *damon_sysfs_regions_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_regions), GFP_KERNEL);
+}
+
+static void damon_sysfs_regions_rm_dirs(struct damon_sysfs_regions *regions)
+{
+ struct damon_sysfs_region **regions_arr = regions->regions_arr;
+ int i;
+
+ for (i = 0; i < regions->nr; i++)
+ kobject_put(&regions_arr[i]->kobj);
+ regions->nr = 0;
+ kfree(regions_arr);
+ regions->regions_arr = NULL;
+}
+
+static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions,
+ int nr_regions)
+{
+ struct damon_sysfs_region **regions_arr, *region;
+ int err, i;
+
+ damon_sysfs_regions_rm_dirs(regions);
+ if (!nr_regions)
+ return 0;
+
+ regions_arr = kmalloc_array(nr_regions, sizeof(*regions_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!regions_arr)
+ return -ENOMEM;
+ regions->regions_arr = regions_arr;
+
+ for (i = 0; i < nr_regions; i++) {
+ region = damon_sysfs_region_alloc(0, 0);
+ if (!region) {
+ damon_sysfs_regions_rm_dirs(regions);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&region->kobj,
+ &damon_sysfs_region_ktype, &regions->kobj,
+ "%d", i);
+ if (err) {
+ kobject_put(&region->kobj);
+ damon_sysfs_regions_rm_dirs(regions);
+ return err;
+ }
+
+ regions_arr[i] = region;
+ regions->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_regions_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_regions *regions = container_of(kobj,
+ struct damon_sysfs_regions, kobj);
+
+ return sysfs_emit(buf, "%d\n", regions->nr);
+}
+
+static ssize_t nr_regions_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_regions *regions = container_of(kobj,
+ struct damon_sysfs_regions, kobj);
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_regions_add_dirs(regions, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_regions_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_regions, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_regions_nr_attr =
+ __ATTR_RW_MODE(nr_regions, 0600);
+
+static struct attribute *damon_sysfs_regions_attrs[] = {
+ &damon_sysfs_regions_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_regions);
+
+static struct kobj_type damon_sysfs_regions_ktype = {
+ .release = damon_sysfs_regions_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_regions_groups,
+};
+
+/*
+ * target directory
+ */
+
+struct damon_sysfs_target {
+ struct kobject kobj;
+ struct damon_sysfs_regions *regions;
+ int pid;
+};
+
+static struct damon_sysfs_target *damon_sysfs_target_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_target), GFP_KERNEL);
+}
+
+static int damon_sysfs_target_add_dirs(struct damon_sysfs_target *target)
+{
+ struct damon_sysfs_regions *regions = damon_sysfs_regions_alloc();
+ int err;
+
+ if (!regions)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&regions->kobj, &damon_sysfs_regions_ktype,
+ &target->kobj, "regions");
+ if (err)
+ kobject_put(&regions->kobj);
+ else
+ target->regions = regions;
+ return err;
+}
+
+static void damon_sysfs_target_rm_dirs(struct damon_sysfs_target *target)
+{
+ damon_sysfs_regions_rm_dirs(target->regions);
+ kobject_put(&target->regions->kobj);
+}
+
+static ssize_t pid_target_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+
+ return sysfs_emit(buf, "%d\n", target->pid);
+}
+
+static ssize_t pid_target_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+ int err = kstrtoint(buf, 0, &target->pid);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_target_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_target, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_target_pid_attr =
+ __ATTR_RW_MODE(pid_target, 0600);
+
+static struct attribute *damon_sysfs_target_attrs[] = {
+ &damon_sysfs_target_pid_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_target);
+
+static struct kobj_type damon_sysfs_target_ktype = {
+ .release = damon_sysfs_target_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_target_groups,
+};
+
+/*
+ * targets directory
+ */
+
+struct damon_sysfs_targets {
+ struct kobject kobj;
+ struct damon_sysfs_target **targets_arr;
+ int nr;
+};
+
+static struct damon_sysfs_targets *damon_sysfs_targets_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_targets), GFP_KERNEL);
+}
+
+static void damon_sysfs_targets_rm_dirs(struct damon_sysfs_targets *targets)
+{
+ struct damon_sysfs_target **targets_arr = targets->targets_arr;
+ int i;
+
+ for (i = 0; i < targets->nr; i++) {
+ damon_sysfs_target_rm_dirs(targets_arr[i]);
+ kobject_put(&targets_arr[i]->kobj);
+ }
+ targets->nr = 0;
+ kfree(targets_arr);
+ targets->targets_arr = NULL;
+}
+
+static int damon_sysfs_targets_add_dirs(struct damon_sysfs_targets *targets,
+ int nr_targets)
+{
+ struct damon_sysfs_target **targets_arr, *target;
+ int err, i;
+
+ damon_sysfs_targets_rm_dirs(targets);
+ if (!nr_targets)
+ return 0;
+
+ targets_arr = kmalloc_array(nr_targets, sizeof(*targets_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!targets_arr)
+ return -ENOMEM;
+ targets->targets_arr = targets_arr;
+
+ for (i = 0; i < nr_targets; i++) {
+ target = damon_sysfs_target_alloc();
+ if (!target) {
+ damon_sysfs_targets_rm_dirs(targets);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&target->kobj,
+ &damon_sysfs_target_ktype, &targets->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_target_add_dirs(target);
+ if (err)
+ goto out;
+
+ targets_arr[i] = target;
+ targets->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_targets_rm_dirs(targets);
+ kobject_put(&target->kobj);
+ return err;
+}
+
+static ssize_t nr_targets_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_targets *targets = container_of(kobj,
+ struct damon_sysfs_targets, kobj);
+
+ return sysfs_emit(buf, "%d\n", targets->nr);
+}
+
+static ssize_t nr_targets_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_targets *targets = container_of(kobj,
+ struct damon_sysfs_targets, kobj);
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_targets_add_dirs(targets, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_targets_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_targets, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_targets_nr_attr =
+ __ATTR_RW_MODE(nr_targets, 0600);
+
+static struct attribute *damon_sysfs_targets_attrs[] = {
+ &damon_sysfs_targets_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_targets);
+
+static struct kobj_type damon_sysfs_targets_ktype = {
+ .release = damon_sysfs_targets_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_targets_groups,
+};
+
+/*
+ * intervals directory
+ */
+
+struct damon_sysfs_intervals {
+ struct kobject kobj;
+ unsigned long sample_us;
+ unsigned long aggr_us;
+ unsigned long update_us;
+};
+
+static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc(
+ unsigned long sample_us, unsigned long aggr_us,
+ unsigned long update_us)
+{
+ struct damon_sysfs_intervals *intervals = kmalloc(sizeof(*intervals),
+ GFP_KERNEL);
+
+ if (!intervals)
+ return NULL;
+
+ intervals->kobj = (struct kobject){};
+ intervals->sample_us = sample_us;
+ intervals->aggr_us = aggr_us;
+ intervals->update_us = update_us;
+ return intervals;
+}
+
+static ssize_t sample_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->sample_us);
+}
+
+static ssize_t sample_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return -EINVAL;
+
+ intervals->sample_us = us;
+ return count;
+}
+
+static ssize_t aggr_us_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->aggr_us);
+}
+
+static ssize_t aggr_us_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return -EINVAL;
+
+ intervals->aggr_us = us;
+ return count;
+}
+
+static ssize_t update_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->update_us);
+}
+
+static ssize_t update_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return -EINVAL;
+
+ intervals->update_us = us;
+ return count;
+}
+
+static void damon_sysfs_intervals_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_intervals, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_intervals_sample_us_attr =
+ __ATTR_RW_MODE(sample_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_aggr_us_attr =
+ __ATTR_RW_MODE(aggr_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_update_us_attr =
+ __ATTR_RW_MODE(update_us, 0600);
+
+static struct attribute *damon_sysfs_intervals_attrs[] = {
+ &damon_sysfs_intervals_sample_us_attr.attr,
+ &damon_sysfs_intervals_aggr_us_attr.attr,
+ &damon_sysfs_intervals_update_us_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_intervals);
+
+static struct kobj_type damon_sysfs_intervals_ktype = {
+ .release = damon_sysfs_intervals_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_intervals_groups,
+};
+
+/*
+ * monitoring_attrs directory
+ */
+
+struct damon_sysfs_attrs {
+ struct kobject kobj;
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+};
+
+static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void)
+{
+ struct damon_sysfs_attrs *attrs = kmalloc(sizeof(*attrs), GFP_KERNEL);
+
+ if (!attrs)
+ return NULL;
+ attrs->kobj = (struct kobject){};
+ return attrs;
+}
+
+static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
+{
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+ int err;
+
+ intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000);
+ if (!intervals)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&intervals->kobj,
+ &damon_sysfs_intervals_ktype, &attrs->kobj,
+ "intervals");
+ if (err)
+ goto put_intervals_out;
+ attrs->intervals = intervals;
+
+ nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000);
+ if (!nr_regions_range) {
+ err = -ENOMEM;
+ goto put_intervals_out;
+ }
+
+ err = kobject_init_and_add(&nr_regions_range->kobj,
+ &damon_sysfs_ul_range_ktype, &attrs->kobj,
+ "nr_regions");
+ if (err)
+ goto put_nr_regions_intervals_out;
+ attrs->nr_regions_range = nr_regions_range;
+ return 0;
+
+put_nr_regions_intervals_out:
+ kobject_put(&nr_regions_range->kobj);
+ attrs->nr_regions_range = NULL;
+put_intervals_out:
+ kobject_put(&intervals->kobj);
+ attrs->intervals = NULL;
+ return err;
+}
+
+static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs)
+{
+ kobject_put(&attrs->nr_regions_range->kobj);
+ kobject_put(&attrs->intervals->kobj);
+}
+
+static void damon_sysfs_attrs_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_attrs, kobj));
+}
+
+static struct attribute *damon_sysfs_attrs_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_attrs);
+
+static struct kobj_type damon_sysfs_attrs_ktype = {
+ .release = damon_sysfs_attrs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_attrs_groups,
+};
+
+/*
+ * context directory
+ */
+
+/* This should match with enum damon_ops_id */
+static const char * const damon_sysfs_ops_strs[] = {
+ "vaddr",
+ "paddr",
+};
+
+struct damon_sysfs_context {
+ struct kobject kobj;
+ enum damon_ops_id ops_id;
+ struct damon_sysfs_attrs *attrs;
+ struct damon_sysfs_targets *targets;
+ struct damon_sysfs_schemes *schemes;
+};
+
+static struct damon_sysfs_context *damon_sysfs_context_alloc(
+ enum damon_ops_id ops_id)
+{
+ struct damon_sysfs_context *context = kmalloc(sizeof(*context),
+ GFP_KERNEL);
+
+ if (!context)
+ return NULL;
+ context->kobj = (struct kobject){};
+ context->ops_id = ops_id;
+ return context;
+}
+
+static int damon_sysfs_context_set_attrs(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_attrs *attrs = damon_sysfs_attrs_alloc();
+ int err;
+
+ if (!attrs)
+ return -ENOMEM;
+ err = kobject_init_and_add(&attrs->kobj, &damon_sysfs_attrs_ktype,
+ &context->kobj, "monitoring_attrs");
+ if (err)
+ goto out;
+ err = damon_sysfs_attrs_add_dirs(attrs);
+ if (err)
+ goto out;
+ context->attrs = attrs;
+ return 0;
+
+out:
+ kobject_put(&attrs->kobj);
+ return err;
+}
+
+static int damon_sysfs_context_set_targets(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_targets *targets = damon_sysfs_targets_alloc();
+ int err;
+
+ if (!targets)
+ return -ENOMEM;
+ err = kobject_init_and_add(&targets->kobj, &damon_sysfs_targets_ktype,
+ &context->kobj, "targets");
+ if (err) {
+ kobject_put(&targets->kobj);
+ return err;
+ }
+ context->targets = targets;
+ return 0;
+}
+
+static int damon_sysfs_context_set_schemes(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_schemes *schemes = damon_sysfs_schemes_alloc();
+ int err;
+
+ if (!schemes)
+ return -ENOMEM;
+ err = kobject_init_and_add(&schemes->kobj, &damon_sysfs_schemes_ktype,
+ &context->kobj, "schemes");
+ if (err) {
+ kobject_put(&schemes->kobj);
+ return err;
+ }
+ context->schemes = schemes;
+ return 0;
+}
+
+static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
+{
+ int err;
+
+ err = damon_sysfs_context_set_attrs(context);
+ if (err)
+ return err;
+
+ err = damon_sysfs_context_set_targets(context);
+ if (err)
+ goto put_attrs_out;
+
+ err = damon_sysfs_context_set_schemes(context);
+ if (err)
+ goto put_targets_attrs_out;
+ return 0;
+
+put_targets_attrs_out:
+ kobject_put(&context->targets->kobj);
+ context->targets = NULL;
+put_attrs_out:
+ kobject_put(&context->attrs->kobj);
+ context->attrs = NULL;
+ return err;
+}
+
+static void damon_sysfs_context_rm_dirs(struct damon_sysfs_context *context)
+{
+ damon_sysfs_attrs_rm_dirs(context->attrs);
+ kobject_put(&context->attrs->kobj);
+ damon_sysfs_targets_rm_dirs(context->targets);
+ kobject_put(&context->targets->kobj);
+ damon_sysfs_schemes_rm_dirs(context->schemes);
+ kobject_put(&context->schemes->kobj);
+}
+
+static ssize_t operations_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+
+ return sysfs_emit(buf, "%s\n", damon_sysfs_ops_strs[context->ops_id]);
+}
+
+static ssize_t operations_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+ enum damon_ops_id id;
+
+ for (id = 0; id < NR_DAMON_OPS; id++) {
+ if (sysfs_streq(buf, damon_sysfs_ops_strs[id])) {
+ context->ops_id = id;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static void damon_sysfs_context_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_context, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_context_operations_attr =
+ __ATTR_RW_MODE(operations, 0600);
+
+static struct attribute *damon_sysfs_context_attrs[] = {
+ &damon_sysfs_context_operations_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_context);
+
+static struct kobj_type damon_sysfs_context_ktype = {
+ .release = damon_sysfs_context_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_context_groups,
+};
+
+/*
+ * contexts directory
+ */
+
+struct damon_sysfs_contexts {
+ struct kobject kobj;
+ struct damon_sysfs_context **contexts_arr;
+ int nr;
+};
+
+static struct damon_sysfs_contexts *damon_sysfs_contexts_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_contexts), GFP_KERNEL);
+}
+
+static void damon_sysfs_contexts_rm_dirs(struct damon_sysfs_contexts *contexts)
+{
+ struct damon_sysfs_context **contexts_arr = contexts->contexts_arr;
+ int i;
+
+ for (i = 0; i < contexts->nr; i++) {
+ damon_sysfs_context_rm_dirs(contexts_arr[i]);
+ kobject_put(&contexts_arr[i]->kobj);
+ }
+ contexts->nr = 0;
+ kfree(contexts_arr);
+ contexts->contexts_arr = NULL;
+}
+
+static int damon_sysfs_contexts_add_dirs(struct damon_sysfs_contexts *contexts,
+ int nr_contexts)
+{
+ struct damon_sysfs_context **contexts_arr, *context;
+ int err, i;
+
+ damon_sysfs_contexts_rm_dirs(contexts);
+ if (!nr_contexts)
+ return 0;
+
+ contexts_arr = kmalloc_array(nr_contexts, sizeof(*contexts_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!contexts_arr)
+ return -ENOMEM;
+ contexts->contexts_arr = contexts_arr;
+
+ for (i = 0; i < nr_contexts; i++) {
+ context = damon_sysfs_context_alloc(DAMON_OPS_VADDR);
+ if (!context) {
+ damon_sysfs_contexts_rm_dirs(contexts);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&context->kobj,
+ &damon_sysfs_context_ktype, &contexts->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_context_add_dirs(context);
+ if (err)
+ goto out;
+
+ contexts_arr[i] = context;
+ contexts->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_contexts_rm_dirs(contexts);
+ kobject_put(&context->kobj);
+ return err;
+}
+
+static ssize_t nr_contexts_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_contexts *contexts = container_of(kobj,
+ struct damon_sysfs_contexts, kobj);
+
+ return sysfs_emit(buf, "%d\n", contexts->nr);
+}
+
+static ssize_t nr_contexts_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_contexts *contexts = container_of(kobj,
+ struct damon_sysfs_contexts, kobj);
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ /* TODO: support multiple contexts per kdamond */
+ if (nr < 0 || 1 < nr)
+ return -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_contexts_add_dirs(contexts, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_contexts_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_contexts, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_contexts_nr_attr
+ = __ATTR_RW_MODE(nr_contexts, 0600);
+
+static struct attribute *damon_sysfs_contexts_attrs[] = {
+ &damon_sysfs_contexts_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_contexts);
+
+static struct kobj_type damon_sysfs_contexts_ktype = {
+ .release = damon_sysfs_contexts_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_contexts_groups,
+};
+
+/*
+ * kdamond directory
+ */
+
+struct damon_sysfs_kdamond {
+ struct kobject kobj;
+ struct damon_sysfs_contexts *contexts;
+ struct damon_ctx *damon_ctx;
+};
+
+static struct damon_sysfs_kdamond *damon_sysfs_kdamond_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamond), GFP_KERNEL);
+}
+
+static int damon_sysfs_kdamond_add_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_sysfs_contexts *contexts;
+ int err;
+
+ contexts = damon_sysfs_contexts_alloc();
+ if (!contexts)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&contexts->kobj,
+ &damon_sysfs_contexts_ktype, &kdamond->kobj,
+ "contexts");
+ if (err) {
+ kobject_put(&contexts->kobj);
+ return err;
+ }
+ kdamond->contexts = contexts;
+
+ return err;
+}
+
+static void damon_sysfs_kdamond_rm_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ damon_sysfs_contexts_rm_dirs(kdamond->contexts);
+ kobject_put(&kdamond->contexts->kobj);
+}
+
+static bool damon_sysfs_ctx_running(struct damon_ctx *ctx)
+{
+ bool running;
+
+ mutex_lock(&ctx->kdamond_lock);
+ running = ctx->kdamond != NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+ return running;
+}
+
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+ bool running;
+
+ if (!ctx)
+ running = false;
+ else
+ running = damon_sysfs_ctx_running(ctx);
+
+ return sysfs_emit(buf, "%s\n", running ? "on" : "off");
+}
+
+static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
+ struct damon_sysfs_attrs *sys_attrs)
+{
+ struct damon_sysfs_intervals *sys_intervals = sys_attrs->intervals;
+ struct damon_sysfs_ul_range *sys_nr_regions =
+ sys_attrs->nr_regions_range;
+
+ return damon_set_attrs(ctx, sys_intervals->sample_us,
+ sys_intervals->aggr_us, sys_intervals->update_us,
+ sys_nr_regions->min, sys_nr_regions->max);
+}
+
+static void damon_sysfs_destroy_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next;
+
+ damon_for_each_target_safe(t, next, ctx) {
+ if (ctx->ops.id == DAMON_OPS_VADDR)
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+}
+
+static int damon_sysfs_set_regions(struct damon_target *t,
+ struct damon_sysfs_regions *sysfs_regions)
+{
+ int i;
+
+ for (i = 0; i < sysfs_regions->nr; i++) {
+ struct damon_sysfs_region *sys_region =
+ sysfs_regions->regions_arr[i];
+ struct damon_region *prev, *r;
+
+ if (sys_region->start > sys_region->end)
+ return -EINVAL;
+ r = damon_new_region(sys_region->start, sys_region->end);
+ if (!r)
+ return -ENOMEM;
+ damon_add_region(r, t);
+ if (damon_nr_regions(t) > 1) {
+ prev = damon_prev_region(r);
+ if (prev->ar.end > r->ar.start) {
+ damon_destroy_region(r, t);
+ return -EINVAL;
+ }
+ }
+ }
+ return 0;
+}
+
+static int damon_sysfs_set_targets(struct damon_ctx *ctx,
+ struct damon_sysfs_targets *sysfs_targets)
+{
+ int i, err;
+
+ for (i = 0; i < sysfs_targets->nr; i++) {
+ struct damon_sysfs_target *sys_target =
+ sysfs_targets->targets_arr[i];
+ struct damon_target *t = damon_new_target();
+
+ if (!t) {
+ damon_sysfs_destroy_targets(ctx);
+ return -ENOMEM;
+ }
+ if (ctx->ops.id == DAMON_OPS_VADDR) {
+ t->pid = find_get_pid(sys_target->pid);
+ if (!t->pid) {
+ damon_sysfs_destroy_targets(ctx);
+ return -EINVAL;
+ }
+ }
+ damon_add_target(ctx, t);
+ err = damon_sysfs_set_regions(t, sys_target->regions);
+ if (err) {
+ damon_sysfs_destroy_targets(ctx);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static struct damos *damon_sysfs_mk_scheme(
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+ struct damos_quota quota = {
+ .ms = sysfs_quotas->ms,
+ .sz = sysfs_quotas->sz,
+ .reset_interval = sysfs_quotas->reset_interval_ms,
+ .weight_sz = sysfs_weights->sz,
+ .weight_nr_accesses = sysfs_weights->nr_accesses,
+ .weight_age = sysfs_weights->age,
+ };
+ struct damos_watermarks wmarks = {
+ .metric = sysfs_wmarks->metric,
+ .interval = sysfs_wmarks->interval_us,
+ .high = sysfs_wmarks->high,
+ .mid = sysfs_wmarks->mid,
+ .low = sysfs_wmarks->low,
+ };
+
+ return damon_new_scheme(pattern->sz->min, pattern->sz->max,
+ pattern->nr_accesses->min, pattern->nr_accesses->max,
+ pattern->age->min, pattern->age->max,
+ sysfs_scheme->action, &quota, &wmarks);
+}
+
+static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+ struct damon_sysfs_schemes *sysfs_schemes)
+{
+ int i;
+
+ for (i = 0; i < sysfs_schemes->nr; i++) {
+ struct damos *scheme, *next;
+
+ scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+ if (!scheme) {
+ damon_for_each_scheme_safe(scheme, next, ctx)
+ damon_destroy_scheme(scheme);
+ return -ENOMEM;
+ }
+ damon_add_scheme(ctx, scheme);
+ }
+ return 0;
+}
+
+static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next;
+
+ if (ctx->ops.id != DAMON_OPS_VADDR)
+ return;
+
+ mutex_lock(&ctx->kdamond_lock);
+ damon_for_each_target_safe(t, next, ctx) {
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+}
+
+static struct damon_ctx *damon_sysfs_build_ctx(
+ struct damon_sysfs_context *sys_ctx)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ int err;
+
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ err = damon_select_ops(ctx, sys_ctx->ops_id);
+ if (err)
+ goto out;
+ err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs);
+ if (err)
+ goto out;
+ err = damon_sysfs_set_targets(ctx, sys_ctx->targets);
+ if (err)
+ goto out;
+ err = damon_sysfs_set_schemes(ctx, sys_ctx->schemes);
+ if (err)
+ goto out;
+
+ ctx->callback.before_terminate = damon_sysfs_before_terminate;
+ return ctx;
+
+out:
+ damon_destroy_ctx(ctx);
+ return ERR_PTR(err);
+}
+
+static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx;
+ int err;
+
+ if (kdamond->damon_ctx &&
+ damon_sysfs_ctx_running(kdamond->damon_ctx))
+ return -EBUSY;
+ /* TODO: support multiple contexts per kdamond */
+ if (kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kdamond->damon_ctx = NULL;
+
+ ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+ err = damon_start(&ctx, 1, false);
+ if (err) {
+ damon_destroy_ctx(ctx);
+ return err;
+ }
+ kdamond->damon_ctx = ctx;
+ return err;
+}
+
+static int damon_sysfs_turn_damon_off(struct damon_sysfs_kdamond *kdamond)
+{
+ if (!kdamond->damon_ctx)
+ return -EINVAL;
+ return damon_stop(&kdamond->damon_ctx, 1);
+ /*
+ * To allow users show final monitoring results of already turned-off
+ * DAMON, we free kdamond->damon_ctx in next
+ * damon_sysfs_turn_damon_on(), or kdamonds_nr_store()
+ */
+}
+
+static int damon_sysfs_update_schemes_stats(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+ struct damos *scheme;
+ int schemes_idx = 0;
+
+ if (!ctx)
+ return -EINVAL;
+ mutex_lock(&ctx->kdamond_lock);
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_schemes *sysfs_schemes;
+ struct damon_sysfs_stats *sysfs_stats;
+
+ sysfs_schemes = kdamond->contexts->contexts_arr[0]->schemes;
+ sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
+ sysfs_stats->nr_tried = scheme->stat.nr_tried;
+ sysfs_stats->sz_tried = scheme->stat.sz_tried;
+ sysfs_stats->nr_applied = scheme->stat.nr_applied;
+ sysfs_stats->sz_applied = scheme->stat.sz_applied;
+ sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+ return 0;
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ ssize_t ret;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ if (sysfs_streq(buf, "on"))
+ ret = damon_sysfs_turn_damon_on(kdamond);
+ else if (sysfs_streq(buf, "off"))
+ ret = damon_sysfs_turn_damon_off(kdamond);
+ else if (sysfs_streq(buf, "update_schemes_stats"))
+ ret = damon_sysfs_update_schemes_stats(kdamond);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&damon_sysfs_lock);
+ if (!ret)
+ ret = count;
+ return ret;
+}
+
+static ssize_t pid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx;
+ int pid;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ ctx = kdamond->damon_ctx;
+ if (!ctx) {
+ pid = -1;
+ goto out;
+ }
+ mutex_lock(&ctx->kdamond_lock);
+ if (!ctx->kdamond)
+ pid = -1;
+ else
+ pid = ctx->kdamond->pid;
+ mutex_unlock(&ctx->kdamond_lock);
+out:
+ mutex_unlock(&damon_sysfs_lock);
+ return sysfs_emit(buf, "%d\n", pid);
+}
+
+static void damon_sysfs_kdamond_release(struct kobject *kobj)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kfree(kdamond);
+}
+
+static struct kobj_attribute damon_sysfs_kdamond_state_attr =
+ __ATTR_RW_MODE(state, 0600);
+
+static struct kobj_attribute damon_sysfs_kdamond_pid_attr =
+ __ATTR_RO_MODE(pid, 0400);
+
+static struct attribute *damon_sysfs_kdamond_attrs[] = {
+ &damon_sysfs_kdamond_state_attr.attr,
+ &damon_sysfs_kdamond_pid_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamond);
+
+static struct kobj_type damon_sysfs_kdamond_ktype = {
+ .release = damon_sysfs_kdamond_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamond_groups,
+};
+
+/*
+ * kdamonds directory
+ */
+
+struct damon_sysfs_kdamonds {
+ struct kobject kobj;
+ struct damon_sysfs_kdamond **kdamonds_arr;
+ int nr;
+};
+
+static struct damon_sysfs_kdamonds *damon_sysfs_kdamonds_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamonds), GFP_KERNEL);
+}
+
+static void damon_sysfs_kdamonds_rm_dirs(struct damon_sysfs_kdamonds *kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr = kdamonds->kdamonds_arr;
+ int i;
+
+ for (i = 0; i < kdamonds->nr; i++) {
+ damon_sysfs_kdamond_rm_dirs(kdamonds_arr[i]);
+ kobject_put(&kdamonds_arr[i]->kobj);
+ }
+ kdamonds->nr = 0;
+ kfree(kdamonds_arr);
+ kdamonds->kdamonds_arr = NULL;
+}
+
+static int damon_sysfs_nr_running_ctxs(struct damon_sysfs_kdamond **kdamonds,
+ int nr_kdamonds)
+{
+ int nr_running_ctxs = 0;
+ int i;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ struct damon_ctx *ctx = kdamonds[i]->damon_ctx;
+
+ if (!ctx)
+ continue;
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond)
+ nr_running_ctxs++;
+ mutex_unlock(&ctx->kdamond_lock);
+ }
+ return nr_running_ctxs;
+}
+
+static int damon_sysfs_kdamonds_add_dirs(struct damon_sysfs_kdamonds *kdamonds,
+ int nr_kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr, *kdamond;
+ int err, i;
+
+ if (damon_sysfs_nr_running_ctxs(kdamonds->kdamonds_arr, kdamonds->nr))
+ return -EBUSY;
+
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ if (!nr_kdamonds)
+ return 0;
+
+ kdamonds_arr = kmalloc_array(nr_kdamonds, sizeof(*kdamonds_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!kdamonds_arr)
+ return -ENOMEM;
+ kdamonds->kdamonds_arr = kdamonds_arr;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ kdamond = damon_sysfs_kdamond_alloc();
+ if (!kdamond) {
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&kdamond->kobj,
+ &damon_sysfs_kdamond_ktype, &kdamonds->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_kdamond_add_dirs(kdamond);
+ if (err)
+ goto out;
+
+ kdamonds_arr[i] = kdamond;
+ kdamonds->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ kobject_put(&kdamond->kobj);
+ return err;
+}
+
+static ssize_t nr_kdamonds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamonds *kdamonds = container_of(kobj,
+ struct damon_sysfs_kdamonds, kobj);
+
+ return sysfs_emit(buf, "%d\n", kdamonds->nr);
+}
+
+static ssize_t nr_kdamonds_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamonds *kdamonds = container_of(kobj,
+ struct damon_sysfs_kdamonds, kobj);
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_kdamonds_add_dirs(kdamonds, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_kdamonds_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_kdamonds, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_kdamonds_nr_attr =
+ __ATTR_RW_MODE(nr_kdamonds, 0600);
+
+static struct attribute *damon_sysfs_kdamonds_attrs[] = {
+ &damon_sysfs_kdamonds_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamonds);
+
+static struct kobj_type damon_sysfs_kdamonds_ktype = {
+ .release = damon_sysfs_kdamonds_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamonds_groups,
+};
+
+/*
+ * damon user interface directory
+ */
+
+struct damon_sysfs_ui_dir {
+ struct kobject kobj;
+ struct damon_sysfs_kdamonds *kdamonds;
+};
+
+static struct damon_sysfs_ui_dir *damon_sysfs_ui_dir_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_ui_dir), GFP_KERNEL);
+}
+
+static int damon_sysfs_ui_dir_add_dirs(struct damon_sysfs_ui_dir *ui_dir)
+{
+ struct damon_sysfs_kdamonds *kdamonds;
+ int err;
+
+ kdamonds = damon_sysfs_kdamonds_alloc();
+ if (!kdamonds)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&kdamonds->kobj,
+ &damon_sysfs_kdamonds_ktype, &ui_dir->kobj,
+ "kdamonds");
+ if (err) {
+ kobject_put(&kdamonds->kobj);
+ return err;
+ }
+ ui_dir->kdamonds = kdamonds;
+ return err;
+}
+
+static void damon_sysfs_ui_dir_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ui_dir, kobj));
+}
+
+static struct attribute *damon_sysfs_ui_dir_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ui_dir);
+
+static struct kobj_type damon_sysfs_ui_dir_ktype = {
+ .release = damon_sysfs_ui_dir_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ui_dir_groups,
+};
+
+static int __init damon_sysfs_init(void)
+{
+ struct kobject *damon_sysfs_root;
+ struct damon_sysfs_ui_dir *admin;
+ int err;
+
+ damon_sysfs_root = kobject_create_and_add("damon", mm_kobj);
+ if (!damon_sysfs_root)
+ return -ENOMEM;
+
+ admin = damon_sysfs_ui_dir_alloc();
+ if (!admin) {
+ kobject_put(damon_sysfs_root);
+ return -ENOMEM;
+ }
+ err = kobject_init_and_add(&admin->kobj, &damon_sysfs_ui_dir_ktype,
+ damon_sysfs_root, "admin");
+ if (err)
+ goto out;
+ err = damon_sysfs_ui_dir_add_dirs(admin);
+ if (err)
+ goto out;
+ return 0;
+
+out:
+ kobject_put(&admin->kobj);
+ kobject_put(damon_sysfs_root);
+ return err;
+}
+subsys_initcall(damon_sysfs_init);
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
index 6a1b9272ea12..1a55bb6c36c3 100644
--- a/mm/damon/vaddr-test.h
+++ b/mm/damon/vaddr-test.h
@@ -139,7 +139,7 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
struct damon_region *r;
int i;
- t = damon_new_target(42);
+ t = damon_new_target();
for (i = 0; i < nr_regions / 2; i++) {
r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
damon_add_region(r, t);
@@ -251,7 +251,7 @@ static void damon_test_apply_three_regions4(struct kunit *test)
static void damon_test_split_evenly_fail(struct kunit *test,
unsigned long start, unsigned long end, unsigned int nr_pieces)
{
- struct damon_target *t = damon_new_target(42);
+ struct damon_target *t = damon_new_target();
struct damon_region *r = damon_new_region(start, end);
damon_add_region(r, t);
@@ -270,7 +270,7 @@ static void damon_test_split_evenly_fail(struct kunit *test,
static void damon_test_split_evenly_succ(struct kunit *test,
unsigned long start, unsigned long end, unsigned int nr_pieces)
{
- struct damon_target *t = damon_new_target(42);
+ struct damon_target *t = damon_new_target();
struct damon_region *r = damon_new_region(start, end);
unsigned long expected_width = (end - start) / nr_pieces;
unsigned long i = 0;
@@ -314,7 +314,7 @@ static struct kunit_case damon_test_cases[] = {
};
static struct kunit_suite damon_test_suite = {
- .name = "damon-primitives",
+ .name = "damon-operations",
.test_cases = damon_test_cases,
};
kunit_test_suite(damon_test_suite);
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 89b6468da2b9..b2ec0aa1ff45 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -15,7 +15,7 @@
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
-#include "prmtv-common.h"
+#include "ops-common.h"
#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
#undef DAMON_MIN_REGION
@@ -23,12 +23,12 @@
#endif
/*
- * 't->id' should be the pointer to the relevant 'struct pid' having reference
+ * 't->pid' should be the pointer to the relevant 'struct pid' having reference
* count. Caller must put the returned task, unless it is NULL.
*/
static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
{
- return get_pid_task((struct pid *)t->id, PIDTYPE_PID);
+ return get_pid_task(t->pid, PIDTYPE_PID);
}
/*
@@ -402,9 +402,6 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
pte_t entry = huge_ptep_get(pte);
struct page *page = pte_page(entry);
- if (!page)
- return;
-
get_page(page);
if (pte_young(entry)) {
@@ -564,9 +561,6 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
goto out;
page = pte_page(entry);
- if (!page)
- goto out;
-
get_page(page);
if (pte_young(entry) || !page_is_idle(page) ||
@@ -659,7 +653,7 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
* Functions for the target validity check and cleanup
*/
-bool damon_va_target_valid(void *target)
+static bool damon_va_target_valid(void *target)
{
struct damon_target *t = target;
struct task_struct *task;
@@ -745,17 +739,24 @@ static int damon_va_scheme_score(struct damon_ctx *context,
return DAMOS_MAX_SCORE;
}
-void damon_va_set_primitives(struct damon_ctx *ctx)
+static int __init damon_va_initcall(void)
{
- ctx->primitive.init = damon_va_init;
- ctx->primitive.update = damon_va_update;
- ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks;
- ctx->primitive.check_accesses = damon_va_check_accesses;
- ctx->primitive.reset_aggregated = NULL;
- ctx->primitive.target_valid = damon_va_target_valid;
- ctx->primitive.cleanup = NULL;
- ctx->primitive.apply_scheme = damon_va_apply_scheme;
- ctx->primitive.get_scheme_score = damon_va_scheme_score;
-}
+ struct damon_operations ops = {
+ .id = DAMON_OPS_VADDR,
+ .init = damon_va_init,
+ .update = damon_va_update,
+ .prepare_access_checks = damon_va_prepare_access_checks,
+ .check_accesses = damon_va_check_accesses,
+ .reset_aggregated = NULL,
+ .target_valid = damon_va_target_valid,
+ .cleanup = NULL,
+ .apply_scheme = damon_va_apply_scheme,
+ .get_scheme_score = damon_va_scheme_score,
+ };
+
+ return damon_register_ops(&ops);
+};
+
+subsys_initcall(damon_va_initcall);
#include "vaddr-test.h"
diff --git a/mm/debug.c b/mm/debug.c
index bc9ac87f0e08..eeb7ea3ca292 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -48,7 +48,8 @@ const struct trace_print_flags vmaflag_names[] = {
static void __dump_page(struct page *page)
{
- struct page *head = compound_head(page);
+ struct folio *folio = page_folio(page);
+ struct page *head = &folio->page;
struct address_space *mapping;
bool compound = PageCompound(page);
/*
@@ -76,6 +77,7 @@ static void __dump_page(struct page *page)
else
mapping = (void *)(tmp & ~PAGE_MAPPING_FLAGS);
head = page;
+ folio = (struct folio *)page;
compound = false;
} else {
mapping = page_mapping(page);
@@ -92,16 +94,10 @@ static void __dump_page(struct page *page)
page, page_ref_count(head), mapcount, mapping,
page_to_pgoff(page), page_to_pfn(page));
if (compound) {
- if (hpage_pincount_available(page)) {
- pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
- head, compound_order(head),
- head_compound_mapcount(head),
- head_compound_pincount(head));
- } else {
- pr_warn("head:%p order:%u compound_mapcount:%d\n",
- head, compound_order(head),
- head_compound_mapcount(head));
- }
+ pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
+ head, compound_order(head),
+ folio_entire_mapcount(folio),
+ head_compound_pincount(head));
}
#ifdef CONFIG_MEMCG
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index a7ac97c76762..db2abd9e415b 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -171,6 +171,8 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
pte = ptep_get(args->ptep);
WARN_ON(pte_young(pte));
+
+ ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
}
static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index 74984c23a87e..9bc12e526ed0 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -17,6 +17,7 @@
#include <linux/vmalloc.h>
#include <asm/fixmap.h>
#include <asm/early_ioremap.h>
+#include "internal.h"
#ifdef CONFIG_MMU
static int early_ioremap_debug __initdata;
diff --git a/mm/fadvise.c b/mm/fadvise.c
index d6baa4f451c5..338f16022012 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -109,9 +109,8 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
case POSIX_FADV_NOREUSE:
break;
case POSIX_FADV_DONTNEED:
- if (!inode_write_congested(mapping->host))
- __filemap_fdatawrite_range(mapping, offset, endbyte,
- WB_SYNC_NONE);
+ __filemap_fdatawrite_range(mapping, offset, endbyte,
+ WB_SYNC_NONE);
/*
* First and last FULL page! Partial pages are deliberately
diff --git a/mm/filemap.c b/mm/filemap.c
index ad8c39d90bf9..d2e6a79fe69d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -72,7 +72,7 @@
* Lock ordering:
*
* ->i_mmap_rwsem (truncate_pagecache)
- * ->private_lock (__free_pte->__set_page_dirty_buffers)
+ * ->private_lock (__free_pte->block_dirty_folio)
* ->swap_lock (exclusive_swap_page, others)
* ->i_pages lock
*
@@ -115,7 +115,7 @@
* ->memcg->move_lock (page_remove_rmap->lock_page_memcg)
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
- * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
+ * ->private_lock (zap_pte_range->block_dirty_folio)
*
* ->i_mmap_rwsem
* ->tasklist_lock (memory_failure, collect_procs_ao)
@@ -842,26 +842,27 @@ noinline int __filemap_add_folio(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, index);
int huge = folio_test_hugetlb(folio);
- int error;
bool charged = false;
+ long nr = 1;
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
mapping_set_update(&xas, mapping);
- folio_get(folio);
- folio->mapping = mapping;
- folio->index = index;
-
if (!huge) {
- error = mem_cgroup_charge(folio, NULL, gfp);
+ int error = mem_cgroup_charge(folio, NULL, gfp);
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
- goto error;
+ return error;
charged = true;
+ xas_set_order(&xas, index, folio_order(folio));
+ nr = folio_nr_pages(folio);
}
gfp &= GFP_RECLAIM_MASK;
+ folio_ref_add(folio, nr);
+ folio->mapping = mapping;
+ folio->index = xas.xa_index;
do {
unsigned int order = xa_get_order(xas.xa, xas.xa_index);
@@ -885,6 +886,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
/* entry may have been split before we acquired lock */
order = xa_get_order(xas.xa, xas.xa_index);
if (order > folio_order(folio)) {
+ /* How to handle large swap entries? */
+ BUG_ON(shmem_mapping(mapping));
xas_split(&xas, old, order);
xas_reset(&xas);
}
@@ -894,29 +897,31 @@ noinline int __filemap_add_folio(struct address_space *mapping,
if (xas_error(&xas))
goto unlock;
- mapping->nrpages++;
+ mapping->nrpages += nr;
/* hugetlb pages do not participate in page cache accounting */
- if (!huge)
- __lruvec_stat_add_folio(folio, NR_FILE_PAGES);
+ if (!huge) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, nr);
+ }
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
- if (xas_error(&xas)) {
- error = xas_error(&xas);
- if (charged)
- mem_cgroup_uncharge(folio);
+ if (xas_error(&xas))
goto error;
- }
trace_mm_filemap_add_to_page_cache(folio);
return 0;
error:
+ if (charged)
+ mem_cgroup_uncharge(folio);
folio->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
- folio_put(folio);
- return error;
+ folio_put_refs(folio, nr);
+ return xas_error(&xas);
}
ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
@@ -1054,6 +1059,12 @@ void __init pagecache_init(void)
init_waitqueue_head(&folio_wait_table[i]);
page_writeback_init();
+
+ /*
+ * tmpfs uses the ZERO_PAGE for reading holes: it is up-to-date,
+ * and splice's page_cache_pipe_buf_confirm() needs to see that.
+ */
+ SetPageUptodate(ZERO_PAGE(0));
}
/*
@@ -2229,8 +2240,9 @@ out:
* @nr_pages: The maximum number of pages
* @pages: Where the resulting pages are placed
*
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
+ * find_get_pages_contig() works exactly like find_get_pages_range(),
+ * except that the returned number of pages are guaranteed to be
+ * contiguous.
*
* Return: the number of pages which were found.
*/
@@ -2290,9 +2302,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
* @nr_pages: the maximum number of pages
* @pages: where the resulting pages are placed
*
- * Like find_get_pages(), except we only return head pages which are tagged
- * with @tag. @index is updated to the index immediately after the last
- * page we return, ready for the next iteration.
+ * Like find_get_pages_range(), except we only return head pages which are
+ * tagged with @tag. @index is updated to the index immediately after the
+ * last page we return, ready for the next iteration.
*
* Return: the number of pages which were found.
*/
@@ -2452,7 +2464,7 @@ static bool filemap_range_uptodate(struct address_space *mapping,
pos -= folio_pos(folio);
}
- return mapping->a_ops->is_partially_uptodate(&folio->page, pos, count);
+ return mapping->a_ops->is_partially_uptodate(folio, pos, count);
}
static int filemap_update_page(struct kiocb *iocb,
@@ -2844,7 +2856,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas,
offset = offset_in_folio(folio, start) & ~(bsz - 1);
do {
- if (ops->is_partially_uptodate(&folio->page, offset, bsz) ==
+ if (ops->is_partially_uptodate(folio, offset, bsz) ==
seek_data)
break;
start = (start + bsz) & ~(bsz - 1);
@@ -2990,6 +3002,24 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct file *fpin = NULL;
unsigned int mmap_miss;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* Use the readahead code, even if readahead is disabled */
+ if (vmf->vma->vm_flags & VM_HUGEPAGE) {
+ fpin = maybe_unlock_mmap_for_io(vmf, fpin);
+ ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
+ ra->size = HPAGE_PMD_NR;
+ /*
+ * Fetch two PMD folios, so we get the chance to actually
+ * readahead, unless we've been told not to.
+ */
+ if (!(vmf->vma->vm_flags & VM_RAND_READ))
+ ra->size *= 2;
+ ra->async_size = HPAGE_PMD_NR;
+ page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
+ return fpin;
+ }
+#endif
+
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ)
return fpin;
@@ -3022,7 +3052,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
ra->size = ra->ra_pages;
ra->async_size = ra->ra_pages / 4;
ractl._index = ra->start;
- do_page_cache_ra(&ractl, ra->size, ra->async_size);
+ page_cache_ra_order(&ractl, ra, 0);
return fpin;
}
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 749555a232a8..46fa179e32fb 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -7,6 +7,7 @@
#include <linux/migrate.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include "internal.h"
struct address_space *page_mapping(struct page *page)
{
@@ -151,3 +152,15 @@ int try_to_release_page(struct page *page, gfp_t gfp)
return filemap_release_folio(page_folio(page), gfp);
}
EXPORT_SYMBOL(try_to_release_page);
+
+int isolate_lru_page(struct page *page)
+{
+ if (WARN_RATELIMIT(PageTail(page), "trying to isolate tail page"))
+ return -EBUSY;
+ return folio_isolate_lru((struct folio *)page);
+}
+
+void putback_lru_page(struct page *page)
+{
+ folio_putback_lru(page_folio(page));
+}
diff --git a/mm/gup.c b/mm/gup.c
index f0af462ac1e2..271fbe8195d7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,107 +29,71 @@ struct follow_page_context {
unsigned int page_mask;
};
-static void hpage_pincount_add(struct page *page, int refs)
-{
- VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
- VM_BUG_ON_PAGE(page != compound_head(page), page);
-
- atomic_add(refs, compound_pincount_ptr(page));
-}
-
-static void hpage_pincount_sub(struct page *page, int refs)
-{
- VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
- VM_BUG_ON_PAGE(page != compound_head(page), page);
-
- atomic_sub(refs, compound_pincount_ptr(page));
-}
-
-/* Equivalent to calling put_page() @refs times. */
-static void put_page_refs(struct page *page, int refs)
-{
-#ifdef CONFIG_DEBUG_VM
- if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
- return;
-#endif
-
- /*
- * Calling put_page() for each ref is unnecessarily slow. Only the last
- * ref needs a put_page().
- */
- if (refs > 1)
- page_ref_sub(page, refs - 1);
- put_page(page);
-}
-
/*
- * Return the compound head page with ref appropriately incremented,
+ * Return the folio with ref appropriately incremented,
* or NULL if that failed.
*/
-static inline struct page *try_get_compound_head(struct page *page, int refs)
+static inline struct folio *try_get_folio(struct page *page, int refs)
{
- struct page *head = compound_head(page);
+ struct folio *folio;
- if (WARN_ON_ONCE(page_ref_count(head) < 0))
+retry:
+ folio = page_folio(page);
+ if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
return NULL;
- if (unlikely(!page_cache_add_speculative(head, refs)))
+ if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
return NULL;
/*
- * At this point we have a stable reference to the head page; but it
- * could be that between the compound_head() lookup and the refcount
- * increment, the compound page was split, in which case we'd end up
- * holding a reference on a page that has nothing to do with the page
+ * At this point we have a stable reference to the folio; but it
+ * could be that between calling page_folio() and the refcount
+ * increment, the folio was split, in which case we'd end up
+ * holding a reference on a folio that has nothing to do with the page
* we were given anymore.
- * So now that the head page is stable, recheck that the pages still
- * belong together.
+ * So now that the folio is stable, recheck that the page still
+ * belongs to this folio.
*/
- if (unlikely(compound_head(page) != head)) {
- put_page_refs(head, refs);
- return NULL;
+ if (unlikely(page_folio(page) != folio)) {
+ folio_put_refs(folio, refs);
+ goto retry;
}
- return head;
+ return folio;
}
/**
- * try_grab_compound_head() - attempt to elevate a page's refcount, by a
- * flags-dependent amount.
- *
- * Even though the name includes "compound_head", this function is still
- * appropriate for callers that have a non-compound @page to get.
- *
+ * try_grab_folio() - Attempt to get or pin a folio.
* @page: pointer to page to be grabbed
- * @refs: the value to (effectively) add to the page's refcount
+ * @refs: the value to (effectively) add to the folio's refcount
* @flags: gup flags: these are the FOLL_* flag values.
*
* "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
*
* Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
* same time. (That's true throughout the get_user_pages*() and
* pin_user_pages*() APIs.) Cases:
*
- * FOLL_GET: page's refcount will be incremented by @refs.
+ * FOLL_GET: folio's refcount will be incremented by @refs.
*
- * FOLL_PIN on compound pages that are > two pages long: page's refcount will
- * be incremented by @refs, and page[2].hpage_pinned_refcount will be
- * incremented by @refs * GUP_PIN_COUNTING_BIAS.
+ * FOLL_PIN on large folios: folio's refcount will be incremented by
+ * @refs, and its compound_pincount will be incremented by @refs.
*
- * FOLL_PIN on normal pages, or compound pages that are two pages long:
- * page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS.
+ * FOLL_PIN on single-page folios: folio's refcount will be incremented by
+ * @refs * GUP_PIN_COUNTING_BIAS.
*
- * Return: head page (with refcount appropriately incremented) for success, or
- * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
- * considered failure, and furthermore, a likely bug in the caller, so a warning
- * is also emitted.
+ * Return: The folio containing @page (with refcount appropriately
+ * incremented) for success, or NULL upon failure. If neither FOLL_GET
+ * nor FOLL_PIN was set, that's considered failure, and furthermore,
+ * a likely bug in the caller, so a warning is also emitted.
*/
-struct page *try_grab_compound_head(struct page *page,
- int refs, unsigned int flags)
+struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
{
if (flags & FOLL_GET)
- return try_get_compound_head(page, refs);
+ return try_get_folio(page, refs);
else if (flags & FOLL_PIN) {
+ struct folio *folio;
+
/*
* Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
* right zone, so fail and let the caller fall back to the slow
@@ -143,63 +107,57 @@ struct page *try_grab_compound_head(struct page *page,
* CAUTION: Don't use compound_head() on the page before this
* point, the result won't be stable.
*/
- page = try_get_compound_head(page, refs);
- if (!page)
+ folio = try_get_folio(page, refs);
+ if (!folio)
return NULL;
/*
- * When pinning a compound page of order > 1 (which is what
- * hpage_pincount_available() checks for), use an exact count to
- * track it, via hpage_pincount_add/_sub().
+ * When pinning a large folio, use an exact count to track it.
*
- * However, be sure to *also* increment the normal page refcount
- * field at least once, so that the page really is pinned.
- * That's why the refcount from the earlier
- * try_get_compound_head() is left intact.
+ * However, be sure to *also* increment the normal folio
+ * refcount field at least once, so that the folio really
+ * is pinned. That's why the refcount from the earlier
+ * try_get_folio() is left intact.
*/
- if (hpage_pincount_available(page))
- hpage_pincount_add(page, refs);
+ if (folio_test_large(folio))
+ atomic_add(refs, folio_pincount_ptr(folio));
else
- page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
-
- mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
- refs);
+ folio_ref_add(folio,
+ refs * (GUP_PIN_COUNTING_BIAS - 1));
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
- return page;
+ return folio;
}
WARN_ON_ONCE(1);
return NULL;
}
-static void put_compound_head(struct page *page, int refs, unsigned int flags)
+static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
- mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
- refs);
-
- if (hpage_pincount_available(page))
- hpage_pincount_sub(page, refs);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
+ if (folio_test_large(folio))
+ atomic_sub(refs, folio_pincount_ptr(folio));
else
refs *= GUP_PIN_COUNTING_BIAS;
}
- put_page_refs(page, refs);
+ folio_put_refs(folio, refs);
}
/**
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
+ * @page: pointer to page to be grabbed
+ * @flags: gup flags: these are the FOLL_* flag values.
*
* This might not do anything at all, depending on the flags argument.
*
* "grab" names in this file mean, "look at flags to decide whether to use
* FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
*
- * @page: pointer to page to be grabbed
- * @flags: gup flags: these are the FOLL_* flag values.
- *
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases: please see the try_grab_compound_head() documentation, with
+ * time. Cases: please see the try_grab_folio() documentation, with
* "refs=1".
*
* Return: true for success, or if no action was required (if neither FOLL_PIN
@@ -208,10 +166,31 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
*/
bool __must_check try_grab_page(struct page *page, unsigned int flags)
{
- if (!(flags & (FOLL_GET | FOLL_PIN)))
- return true;
+ struct folio *folio = page_folio(page);
+
+ WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+ if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
+ return false;
+
+ if (flags & FOLL_GET)
+ folio_ref_inc(folio);
+ else if (flags & FOLL_PIN) {
+ /*
+ * Similar to try_grab_folio(): be sure to *also*
+ * increment the normal page refcount field at least once,
+ * so that the page really is pinned.
+ */
+ if (folio_test_large(folio)) {
+ folio_ref_add(folio, 1);
+ atomic_add(1, folio_pincount_ptr(folio));
+ } else {
+ folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ }
- return try_grab_compound_head(page, 1, flags);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+ }
+
+ return true;
}
/**
@@ -225,62 +204,40 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
*/
void unpin_user_page(struct page *page)
{
- put_compound_head(compound_head(page), 1, FOLL_PIN);
+ gup_put_folio(page_folio(page), 1, FOLL_PIN);
}
EXPORT_SYMBOL(unpin_user_page);
-static inline void compound_range_next(unsigned long i, unsigned long npages,
- struct page **list, struct page **head,
- unsigned int *ntails)
+static inline struct folio *gup_folio_range_next(struct page *start,
+ unsigned long npages, unsigned long i, unsigned int *ntails)
{
- struct page *next, *page;
+ struct page *next = nth_page(start, i);
+ struct folio *folio = page_folio(next);
unsigned int nr = 1;
- if (i >= npages)
- return;
-
- next = *list + i;
- page = compound_head(next);
- if (PageCompound(page) && compound_order(page) >= 1)
- nr = min_t(unsigned int,
- page + compound_nr(page) - next, npages - i);
+ if (folio_test_large(folio))
+ nr = min_t(unsigned int, npages - i,
+ folio_nr_pages(folio) - folio_page_idx(folio, next));
- *head = page;
*ntails = nr;
+ return folio;
}
-#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
- for (__i = 0, \
- compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
- __i < __npages; __i += __ntails, \
- compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
-
-static inline void compound_next(unsigned long i, unsigned long npages,
- struct page **list, struct page **head,
- unsigned int *ntails)
+static inline struct folio *gup_folio_next(struct page **list,
+ unsigned long npages, unsigned long i, unsigned int *ntails)
{
- struct page *page;
+ struct folio *folio = page_folio(list[i]);
unsigned int nr;
- if (i >= npages)
- return;
-
- page = compound_head(list[i]);
for (nr = i + 1; nr < npages; nr++) {
- if (compound_head(list[nr]) != page)
+ if (page_folio(list[nr]) != folio)
break;
}
- *head = page;
*ntails = nr - i;
+ return folio;
}
-#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
- for (__i = 0, \
- compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
- __i < __npages; __i += __ntails, \
- compound_next(__i, __npages, __list, &(__head), &(__ntails)))
-
/**
* unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
* @pages: array of pages to be maybe marked dirty, and definitely released.
@@ -306,16 +263,17 @@ static inline void compound_next(unsigned long i, unsigned long npages,
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty)
{
- unsigned long index;
- struct page *head;
- unsigned int ntails;
+ unsigned long i;
+ struct folio *folio;
+ unsigned int nr;
if (!make_dirty) {
unpin_user_pages(pages, npages);
return;
}
- for_each_compound_head(index, pages, npages, head, ntails) {
+ for (i = 0; i < npages; i += nr) {
+ folio = gup_folio_next(pages, npages, i, &nr);
/*
* Checking PageDirty at this point may race with
* clear_page_dirty_for_io(), but that's OK. Two key
@@ -336,9 +294,12 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
* written back, so it gets written back again in the
* next writeback cycle. This is harmless.
*/
- if (!PageDirty(head))
- set_page_dirty_lock(head);
- put_compound_head(head, ntails, FOLL_PIN);
+ if (!folio_test_dirty(folio)) {
+ folio_lock(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ }
+ gup_put_folio(folio, nr, FOLL_PIN);
}
}
EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
@@ -367,14 +328,18 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
bool make_dirty)
{
- unsigned long index;
- struct page *head;
- unsigned int ntails;
+ unsigned long i;
+ struct folio *folio;
+ unsigned int nr;
- for_each_compound_range(index, &page, npages, head, ntails) {
- if (make_dirty && !PageDirty(head))
- set_page_dirty_lock(head);
- put_compound_head(head, ntails, FOLL_PIN);
+ for (i = 0; i < npages; i += nr) {
+ folio = gup_folio_range_next(page, npages, i, &nr);
+ if (make_dirty && !folio_test_dirty(folio)) {
+ folio_lock(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ }
+ gup_put_folio(folio, nr, FOLL_PIN);
}
}
EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
@@ -390,9 +355,9 @@ EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
*/
void unpin_user_pages(struct page **pages, unsigned long npages)
{
- unsigned long index;
- struct page *head;
- unsigned int ntails;
+ unsigned long i;
+ struct folio *folio;
+ unsigned int nr;
/*
* If this WARN_ON() fires, then the system *might* be leaking pages (by
@@ -402,8 +367,10 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
if (WARN_ON(IS_ERR_VALUE(npages)))
return;
- for_each_compound_head(index, pages, npages, head, ntails)
- put_compound_head(head, ntails, FOLL_PIN);
+ for (i = 0; i < npages; i += nr) {
+ folio = gup_folio_next(pages, npages, i, &nr);
+ gup_put_folio(folio, nr, FOLL_PIN);
+ }
}
EXPORT_SYMBOL(unpin_user_pages);
@@ -439,10 +406,6 @@ static struct page *no_page_table(struct vm_area_struct *vma,
static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, unsigned int flags)
{
- /* No page to get reference */
- if (flags & FOLL_GET)
- return -EFAULT;
-
if (flags & FOLL_TOUCH) {
pte_t entry = *pte;
@@ -572,32 +535,6 @@ retry:
*/
mark_page_accessed(page);
}
- if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
- /* Do not mlock pte-mapped THP */
- if (PageTransCompound(page))
- goto out;
-
- /*
- * The preliminary mapping check is mainly to avoid the
- * pointless overhead of lock_page on the ZERO_PAGE
- * which might bounce very badly if there is contention.
- *
- * If the page is already locked, we don't need to
- * handle it now - vmscan will handle it later if and
- * when it attempts to reclaim the page.
- */
- if (page->mapping && trylock_page(page)) {
- lru_add_drain(); /* push cached pages to LRU */
- /*
- * Because we lock page here, and migration is
- * blocked by the pte's page reference, and we
- * know the page is still mapped, we don't even
- * need to check for file-cache page truncation.
- */
- mlock_vma_page(page);
- unlock_page(page);
- }
- }
out:
pte_unmap_unlock(ptep, ptl);
return page;
@@ -920,9 +857,6 @@ static int faultin_page(struct vm_area_struct *vma,
unsigned int fault_flags = 0;
vm_fault_t ret;
- /* mlock all present pages, but do not fault in new pages */
- if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
- return -ENOENT;
if (*flags & FOLL_NOFAULT)
return -EFAULT;
if (*flags & FOLL_WRITE)
@@ -1173,15 +1107,20 @@ retry:
case -ENOMEM:
case -EHWPOISON:
goto out;
- case -ENOENT:
- goto next_page;
}
BUG();
} else if (PTR_ERR(page) == -EEXIST) {
/*
* Proper page table entry exists, but no corresponding
- * struct page.
+ * struct page. If the caller expects **pages to be
+ * filled in, bail out now, because that can't be done
+ * for this page.
*/
+ if (pages) {
+ ret = PTR_ERR(page);
+ goto out;
+ }
+
goto next_page;
} else if (IS_ERR(page)) {
ret = PTR_ERR(page);
@@ -1472,9 +1411,14 @@ long populate_vma_page_range(struct vm_area_struct *vma,
VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
- gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+ /*
+ * Rightly or wrongly, the VM_LOCKONFAULT case has never used
+ * faultin_page() to break COW, so it has no work to do here.
+ */
if (vma->vm_flags & VM_LOCKONFAULT)
- gup_flags &= ~FOLL_POPULATE;
+ return nr_pages;
+
+ gup_flags = FOLL_TOUCH;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
@@ -1541,10 +1485,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* in the page table.
* FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
* a poisoned page.
- * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON;
if (write)
gup_flags |= FOLL_WRITE;
@@ -1704,11 +1647,11 @@ EXPORT_SYMBOL(fault_in_writeable);
* @uaddr: start of address range
* @size: length of address range
*
- * Faults in an address range using get_user_pages, i.e., without triggering
- * hardware page faults. This is primarily useful when we already know that
- * some or all of the pages in the address range aren't in memory.
+ * Faults in an address range for writing. This is primarily useful when we
+ * already know that some or all of the pages in the address range aren't in
+ * memory.
*
- * Other than fault_in_writeable(), this function is non-destructive.
+ * Unlike fault_in_writeable(), this function is non-destructive.
*
* Note that we don't pin or otherwise hold the pages referenced that we fault
* in. There's no guarantee that they'll stay in memory for any duration of
@@ -1719,46 +1662,27 @@ EXPORT_SYMBOL(fault_in_writeable);
*/
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
{
- unsigned long start = (unsigned long)untagged_addr(uaddr);
- unsigned long end, nstart, nend;
+ unsigned long start = (unsigned long)uaddr, end;
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma = NULL;
- int locked = 0;
+ bool unlocked = false;
- nstart = start & PAGE_MASK;
+ if (unlikely(size == 0))
+ return 0;
end = PAGE_ALIGN(start + size);
- if (end < nstart)
+ if (end < start)
end = 0;
- for (; nstart != end; nstart = nend) {
- unsigned long nr_pages;
- long ret;
- if (!locked) {
- locked = 1;
- mmap_read_lock(mm);
- vma = find_vma(mm, nstart);
- } else if (nstart >= vma->vm_end)
- vma = vma->vm_next;
- if (!vma || vma->vm_start >= end)
- break;
- nend = end ? min(end, vma->vm_end) : vma->vm_end;
- if (vma->vm_flags & (VM_IO | VM_PFNMAP))
- continue;
- if (nstart < vma->vm_start)
- nstart = vma->vm_start;
- nr_pages = (nend - nstart) / PAGE_SIZE;
- ret = __get_user_pages_locked(mm, nstart, nr_pages,
- NULL, NULL, &locked,
- FOLL_TOUCH | FOLL_WRITE);
- if (ret <= 0)
+ mmap_read_lock(mm);
+ do {
+ if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
break;
- nend = nstart + ret * PAGE_SIZE;
- }
- if (locked)
- mmap_read_unlock(mm);
- if (nstart == end)
- return 0;
- return size - min_t(size_t, nstart - start, size);
+ start = (start + PAGE_SIZE) & PAGE_MASK;
+ } while (start != end);
+ mmap_read_unlock(mm);
+
+ if (size > (unsigned long)uaddr - start)
+ return size - ((unsigned long)uaddr - start);
+ return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);
@@ -1843,72 +1767,80 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages,
unsigned int gup_flags)
{
- unsigned long i;
- unsigned long isolation_error_count = 0;
- bool drain_allow = true;
+ unsigned long isolation_error_count = 0, i;
+ struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
- long ret = 0;
- struct page *prev_head = NULL;
- struct page *head;
- struct migration_target_control mtc = {
- .nid = NUMA_NO_NODE,
- .gfp_mask = GFP_USER | __GFP_NOWARN,
- };
+ bool drain_allow = true;
+ int ret = 0;
for (i = 0; i < nr_pages; i++) {
- head = compound_head(pages[i]);
- if (head == prev_head)
+ struct folio *folio = page_folio(pages[i]);
+
+ if (folio == prev_folio)
+ continue;
+ prev_folio = folio;
+
+ if (folio_is_pinnable(folio))
continue;
- prev_head = head;
+
/*
- * If we get a movable page, since we are going to be pinning
- * these entries, try to move them out if possible.
+ * Try to move out any movable page before pinning the range.
*/
- if (!is_pinnable_page(head)) {
- if (PageHuge(head)) {
- if (!isolate_huge_page(head, &movable_page_list))
- isolation_error_count++;
- } else {
- if (!PageLRU(head) && drain_allow) {
- lru_add_drain_all();
- drain_allow = false;
- }
+ if (folio_test_hugetlb(folio)) {
+ if (!isolate_huge_page(&folio->page,
+ &movable_page_list))
+ isolation_error_count++;
+ continue;
+ }
- if (isolate_lru_page(head)) {
- isolation_error_count++;
- continue;
- }
- list_add_tail(&head->lru, &movable_page_list);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON +
- page_is_file_lru(head),
- thp_nr_pages(head));
- }
+ if (!folio_test_lru(folio) && drain_allow) {
+ lru_add_drain_all();
+ drain_allow = false;
+ }
+
+ if (folio_isolate_lru(folio)) {
+ isolation_error_count++;
+ continue;
}
+ list_add_tail(&folio->lru, &movable_page_list);
+ node_stat_mod_folio(folio,
+ NR_ISOLATED_ANON + folio_is_file_lru(folio),
+ folio_nr_pages(folio));
}
+ if (!list_empty(&movable_page_list) || isolation_error_count)
+ goto unpin_pages;
+
/*
* If list is empty, and no isolation errors, means that all pages are
* in the correct zone.
*/
- if (list_empty(&movable_page_list) && !isolation_error_count)
- return nr_pages;
+ return nr_pages;
+unpin_pages:
if (gup_flags & FOLL_PIN) {
unpin_user_pages(pages, nr_pages);
} else {
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);
}
+
if (!list_empty(&movable_page_list)) {
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_NOWARN,
+ };
+
ret = migrate_pages(&movable_page_list, alloc_migration_target,
NULL, (unsigned long)&mtc, MIGRATE_SYNC,
MR_LONGTERM_PIN, NULL);
- if (ret && !list_empty(&movable_page_list))
- putback_movable_pages(&movable_page_list);
+ if (ret > 0) /* number of pages not migrated */
+ ret = -ENOMEM;
}
- return ret > 0 ? -ENOMEM : ret;
+ if (ret && !list_empty(&movable_page_list))
+ putback_movable_pages(&movable_page_list);
+ return ret;
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
@@ -2117,65 +2049,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
}
EXPORT_SYMBOL(get_user_pages);
-/**
- * get_user_pages_locked() - variant of get_user_pages()
- *
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @gup_flags: flags modifying lookup behaviour
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long. Or NULL, if caller
- * only intends to ensure the pages are faulted in.
- * @locked: pointer to lock flag indicating whether lock is held and
- * subsequently whether VM_FAULT_RETRY functionality can be
- * utilised. Lock must initially be held.
- *
- * It is suitable to replace the form:
- *
- * mmap_read_lock(mm);
- * do_something()
- * get_user_pages(mm, ..., pages, NULL);
- * mmap_read_unlock(mm);
- *
- * to:
- *
- * int locked = 1;
- * mmap_read_lock(mm);
- * do_something()
- * get_user_pages_locked(mm, ..., pages, &locked);
- * if (locked)
- * mmap_read_unlock(mm);
- *
- * We can leverage the VM_FAULT_RETRY functionality in the page fault
- * paths better by using either get_user_pages_locked() or
- * get_user_pages_unlocked().
- *
- */
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- int *locked)
-{
- /*
- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. As there are no users of this flag in this call we simply
- * disallow this option for now.
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
- return -EINVAL;
- /*
- * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
- * never directly by the caller, so enforce that:
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
- return -EINVAL;
-
- return __get_user_pages_locked(current->mm, start, nr_pages,
- pages, NULL, locked,
- gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(get_user_pages_locked);
-
/*
* get_user_pages_unlocked() is suitable to replace the form:
*
@@ -2277,7 +2150,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
ptem = ptep = pte_offset_map(&pmd, addr);
do {
pte_t pte = ptep_get_lockless(ptep);
- struct page *head, *page;
+ struct page *page;
+ struct folio *folio;
/*
* Similar to the PMD case below, NUMA hinting must take slow
@@ -2304,22 +2178,20 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- head = try_grab_compound_head(page, 1, flags);
- if (!head)
+ folio = try_grab_folio(page, 1, flags);
+ if (!folio)
goto pte_unmap;
if (unlikely(page_is_secretmem(page))) {
- put_compound_head(head, 1, flags);
+ gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_compound_head(head, 1, flags);
+ gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
-
/*
* We need to make the page accessible if and only if we are
* going to access its content (the FOLL_PIN case). Please
@@ -2329,14 +2201,13 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
if (flags & FOLL_PIN) {
ret = arch_make_page_accessible(page);
if (ret) {
- unpin_user_page(page);
+ gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
}
- SetPageReferenced(page);
+ folio_set_referenced(folio);
pages[*nr] = page;
(*nr)++;
-
} while (ptep++, addr += PAGE_SIZE, addr != end);
ret = 1;
@@ -2453,8 +2324,8 @@ static int record_subpages(struct page *page, unsigned long addr,
{
int nr;
- for (nr = 0; addr != end; addr += PAGE_SIZE)
- pages[nr++] = page++;
+ for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
+ pages[nr] = nth_page(page, nr);
return nr;
}
@@ -2472,7 +2343,8 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
struct page **pages, int *nr)
{
unsigned long pte_end;
- struct page *head, *page;
+ struct page *page;
+ struct folio *folio;
pte_t pte;
int refs;
@@ -2488,21 +2360,20 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
/* hugepages are never "special" */
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- head = pte_page(pte);
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+ page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
refs = record_subpages(page, addr, end, pages + *nr);
- head = try_grab_compound_head(head, refs, flags);
- if (!head)
+ folio = try_grab_folio(page, refs, flags);
+ if (!folio)
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_compound_head(head, refs, flags);
+ gup_put_folio(folio, refs, flags);
return 0;
}
*nr += refs;
- SetPageReferenced(head);
+ folio_set_referenced(folio);
return 1;
}
@@ -2536,7 +2407,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
- struct page *head, *page;
+ struct page *page;
+ struct folio *folio;
int refs;
if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
@@ -2549,20 +2421,20 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
pages, nr);
}
- page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
refs = record_subpages(page, addr, end, pages + *nr);
- head = try_grab_compound_head(pmd_page(orig), refs, flags);
- if (!head)
+ folio = try_grab_folio(page, refs, flags);
+ if (!folio)
return 0;
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
- put_compound_head(head, refs, flags);
+ gup_put_folio(folio, refs, flags);
return 0;
}
*nr += refs;
- SetPageReferenced(head);
+ folio_set_referenced(folio);
return 1;
}
@@ -2570,7 +2442,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
- struct page *head, *page;
+ struct page *page;
+ struct folio *folio;
int refs;
if (!pud_access_permitted(orig, flags & FOLL_WRITE))
@@ -2583,20 +2456,20 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
pages, nr);
}
- page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
refs = record_subpages(page, addr, end, pages + *nr);
- head = try_grab_compound_head(pud_page(orig), refs, flags);
- if (!head)
+ folio = try_grab_folio(page, refs, flags);
+ if (!folio)
return 0;
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
- put_compound_head(head, refs, flags);
+ gup_put_folio(folio, refs, flags);
return 0;
}
*nr += refs;
- SetPageReferenced(head);
+ folio_set_referenced(folio);
return 1;
}
@@ -2605,27 +2478,28 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
struct page **pages, int *nr)
{
int refs;
- struct page *head, *page;
+ struct page *page;
+ struct folio *folio;
if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
BUILD_BUG_ON(pgd_devmap(orig));
- page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+ page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
refs = record_subpages(page, addr, end, pages + *nr);
- head = try_grab_compound_head(pgd_page(orig), refs, flags);
- if (!head)
+ folio = try_grab_folio(page, refs, flags);
+ if (!folio)
return 0;
if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
- put_compound_head(head, refs, flags);
+ gup_put_folio(folio, refs, flags);
return 0;
}
*nr += refs;
- SetPageReferenced(head);
+ folio_set_referenced(folio);
return 1;
}
@@ -3118,32 +2992,3 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages_unlocked);
-
-/*
- * pin_user_pages_locked() is the FOLL_PIN variant of get_user_pages_locked().
- * Behavior is the same, except that this one sets FOLL_PIN and rejects
- * FOLL_GET.
- */
-long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- int *locked)
-{
- /*
- * FIXME: Current FOLL_LONGTERM behavior is incompatible with
- * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
- * vmas. As there are no users of this flag in this call we simply
- * disallow this option for now.
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
- return -EINVAL;
-
- /* FOLL_GET and FOLL_PIN are mutually exclusive. */
- if (WARN_ON_ONCE(gup_flags & FOLL_GET))
- return -EINVAL;
-
- gup_flags |= FOLL_PIN;
- return __get_user_pages_locked(current->mm, start, nr_pages,
- pages, NULL, locked,
- gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(pin_user_pages_locked);
diff --git a/mm/highmem.c b/mm/highmem.c
index 762679050c9a..0cc0c4da7ed9 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -736,11 +736,11 @@ void *page_address(const struct page *page)
list_for_each_entry(pam, &pas->lh, list) {
if (pam->page == page) {
ret = pam->virtual;
- goto done;
+ break;
}
}
}
-done:
+
spin_unlock_irqrestore(&pas->lock, flags);
return ret;
}
@@ -773,13 +773,12 @@ void set_page_address(struct page *page, void *virtual)
list_for_each_entry(pam, &pas->lh, list) {
if (pam->page == page) {
list_del(&pam->list);
- spin_unlock_irqrestore(&pas->lock, flags);
- goto done;
+ break;
}
}
spin_unlock_irqrestore(&pas->lock, flags);
}
-done:
+
return;
}
diff --git a/mm/hmm.c b/mm/hmm.c
index bd56641c79d4..af71aac3140e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -417,7 +417,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
unsigned long addr = start;
pud_t pud;
- int ret = 0;
spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma);
if (!ptl)
@@ -466,7 +465,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
out_unlock:
spin_unlock(ptl);
- return ret;
+ return 0;
}
#else
#define hmm_vma_walk_pud NULL
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 406a3c28c026..005fab2f3b73 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -34,6 +34,7 @@
#include <linux/oom.h>
#include <linux/numa.h>
#include <linux/page_owner.h>
+#include <linux/sched/sysctl.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -582,13 +583,10 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long ret;
loff_t off = (loff_t)pgoff << PAGE_SHIFT;
- if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
- goto out;
-
ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE);
if (ret)
return ret;
-out:
+
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
@@ -1380,39 +1378,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd, flags);
- if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
- /*
- * We don't mlock() pte-mapped THPs. This way we can avoid
- * leaking mlocked pages into non-VM_LOCKED VMAs.
- *
- * For anon THP:
- *
- * In most cases the pmd is the only mapping of the page as we
- * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
- * writable private mappings in populate_vma_page_range().
- *
- * The only scenario when we have the page shared here is if we
- * mlocking read-only mapping shared over fork(). We skip
- * mlocking such pages.
- *
- * For file THP:
- *
- * We can expect PageDoubleMap() to be stable under page lock:
- * for file pages we set it in page_add_file_rmap(), which
- * requires page to be locked.
- */
-
- if (PageAnon(page) && compound_mapcount(page) != 1)
- goto skip_mlock;
- if (PageDoubleMap(page) || !page->mapping)
- goto skip_mlock;
- if (!trylock_page(page))
- goto skip_mlock;
- if (page->mapping && !PageDoubleMap(page))
- mlock_vma_page(page);
- unlock_page(page);
- }
-skip_mlock:
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
@@ -1610,7 +1575,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (pmd_present(orig_pmd)) {
page = pmd_page(orig_pmd);
- page_remove_rmap(page, true);
+ page_remove_rmap(page, vma, true);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(!PageHead(page), page);
} else if (thp_migration_supported()) {
@@ -1766,17 +1731,28 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
}
#endif
- /*
- * Avoid trapping faults against the zero page. The read-only
- * data is likely to be read-cached on the local CPU and
- * local/remote hits to the zero page are not interesting.
- */
- if (prot_numa && is_huge_zero_pmd(*pmd))
- goto unlock;
+ if (prot_numa) {
+ struct page *page;
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (is_huge_zero_pmd(*pmd))
+ goto unlock;
- if (prot_numa && pmd_protnone(*pmd))
- goto unlock;
+ if (pmd_protnone(*pmd))
+ goto unlock;
+ page = pmd_page(*pmd);
+ /*
+ * Skip scanning top tier node if normal numa
+ * balancing is disabled
+ */
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+ node_is_toptier(page_to_nid(page)))
+ goto unlock;
+ }
/*
* In case prot_numa, we are under mmap_read_lock(mm). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
@@ -1995,7 +1971,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
set_page_dirty(page);
if (!PageReferenced(page) && pmd_young(old_pmd))
SetPageReferenced(page);
- page_remove_rmap(page, true);
+ page_remove_rmap(page, vma, true);
put_page(page);
}
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
@@ -2055,9 +2031,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
young = pmd_young(old_pmd);
soft_dirty = pmd_soft_dirty(old_pmd);
uffd_wp = pmd_uffd_wp(old_pmd);
+ VM_BUG_ON_PAGE(!page_count(page), page);
+ page_ref_add(page, HPAGE_PMD_NR - 1);
}
- VM_BUG_ON_PAGE(!page_count(page), page);
- page_ref_add(page, HPAGE_PMD_NR - 1);
/*
* Withdraw the table only after we mark the pmd entry invalid.
@@ -2129,6 +2105,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
}
}
unlock_page_memcg(page);
+
+ /* Above is effectively page_remove_rmap(page, vma, true) */
+ munlock_vma_page(page, vma, true);
}
smp_wmb(); /* make pte visible before pmd */
@@ -2136,18 +2115,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (freeze) {
for (i = 0; i < HPAGE_PMD_NR; i++) {
- page_remove_rmap(page + i, false);
+ page_remove_rmap(page + i, vma, false);
put_page(page + i);
}
}
}
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze, struct page *page)
+ unsigned long address, bool freeze, struct folio *folio)
{
spinlock_t *ptl;
struct mmu_notifier_range range;
- bool do_unlock_page = false;
+ bool do_unlock_folio = false;
pmd_t _pmd;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@@ -2157,20 +2136,20 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
ptl = pmd_lock(vma->vm_mm, pmd);
/*
- * If caller asks to setup a migration entries, we need a page to check
- * pmd against. Otherwise we can end up replacing wrong page.
+ * If caller asks to setup a migration entry, we need a folio to check
+ * pmd against. Otherwise we can end up replacing wrong folio.
*/
- VM_BUG_ON(freeze && !page);
- if (page) {
- VM_WARN_ON_ONCE(!PageLocked(page));
- if (page != pmd_page(*pmd))
+ VM_BUG_ON(freeze && !folio);
+ if (folio) {
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+ if (folio != page_folio(pmd_page(*pmd)))
goto out;
}
repeat:
if (pmd_trans_huge(*pmd)) {
- if (!page) {
- page = pmd_page(*pmd);
+ if (!folio) {
+ folio = page_folio(pmd_page(*pmd));
/*
* An anonymous page must be locked, to ensure that a
* concurrent reuse_swap_page() sees stable mapcount;
@@ -2178,33 +2157,31 @@ repeat:
* and page lock must not be taken when zap_pmd_range()
* calls __split_huge_pmd() while i_mmap_lock is held.
*/
- if (PageAnon(page)) {
- if (unlikely(!trylock_page(page))) {
- get_page(page);
+ if (folio_test_anon(folio)) {
+ if (unlikely(!folio_trylock(folio))) {
+ folio_get(folio);
_pmd = *pmd;
spin_unlock(ptl);
- lock_page(page);
+ folio_lock(folio);
spin_lock(ptl);
if (unlikely(!pmd_same(*pmd, _pmd))) {
- unlock_page(page);
- put_page(page);
- page = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
+ folio = NULL;
goto repeat;
}
- put_page(page);
+ folio_put(folio);
}
- do_unlock_page = true;
+ do_unlock_folio = true;
}
}
- if (PageMlocked(page))
- clear_page_mlock(page);
} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
goto out;
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
out:
spin_unlock(ptl);
- if (do_unlock_page)
- unlock_page(page);
+ if (do_unlock_folio)
+ folio_unlock(folio);
/*
* No need to double call mmu_notifier->invalidate_range() callback.
* They are 3 cases to consider inside __split_huge_pmd_locked():
@@ -2222,7 +2199,7 @@ out:
}
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
- bool freeze, struct page *page)
+ bool freeze, struct folio *folio)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -2243,7 +2220,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
pmd = pmd_offset(pud, address);
- __split_huge_pmd(vma, pmd, address, freeze, page);
+ __split_huge_pmd(vma, pmd, address, freeze, folio);
}
static inline void split_huge_pmd_if_needed(struct vm_area_struct *vma, unsigned long address)
@@ -2283,6 +2260,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
+ struct folio *folio = page_folio(page);
enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
TTU_SYNC;
@@ -2293,26 +2271,27 @@ static void unmap_page(struct page *page)
* pages can simply be left unmapped, then faulted back on demand.
* If that is ever changed (perhaps for mlock), update remap_page().
*/
- if (PageAnon(page))
- try_to_migrate(page, ttu_flags);
+ if (folio_test_anon(folio))
+ try_to_migrate(folio, ttu_flags);
else
- try_to_unmap(page, ttu_flags | TTU_IGNORE_MLOCK);
+ try_to_unmap(folio, ttu_flags | TTU_IGNORE_MLOCK);
VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
}
-static void remap_page(struct page *page, unsigned int nr)
+static void remap_page(struct folio *folio, unsigned long nr)
{
- int i;
+ int i = 0;
/* If unmap_page() uses try_to_migrate() on file, remove this check */
- if (!PageAnon(page))
+ if (!folio_test_anon(folio))
return;
- if (PageTransHuge(page)) {
- remove_migration_ptes(page, page, true);
- } else {
- for (i = 0; i < nr; i++)
- remove_migration_ptes(page + i, page + i, true);
+ for (;;) {
+ remove_migration_ptes(folio, folio, true);
+ i += folio_nr_pages(folio);
+ if (i >= nr)
+ break;
+ folio = folio_next(folio);
}
}
@@ -2332,8 +2311,11 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
} else {
/* head is still on lru (and we have it frozen) */
VM_WARN_ON(!PageLRU(head));
+ if (PageUnevictable(tail))
+ tail->mlock_count = 0;
+ else
+ list_add_tail(&tail->lru, &head->lru);
SetPageLRU(tail);
- list_add_tail(&tail->lru, &head->lru);
}
}
@@ -2469,7 +2451,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
local_irq_enable();
- remap_page(head, nr);
+ remap_page(folio, nr);
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2494,30 +2476,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
}
-int total_mapcount(struct page *page)
-{
- int i, compound, nr, ret;
-
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) + 1;
-
- compound = compound_mapcount(page);
- nr = compound_nr(page);
- if (PageHuge(page))
- return compound;
- ret = compound;
- for (i = 0; i < nr; i++)
- ret += atomic_read(&page[i]._mapcount) + 1;
- /* File pages has compound_mapcount included in _mapcount */
- if (!PageAnon(page))
- return ret - compound * nr;
- if (PageDoubleMap(page))
- ret -= nr;
- return ret;
-}
-
/*
* This calculates accurately how many mappings a transparent hugepage
* has (unlike page_mapcount() which isn't fully accurate). This full
@@ -2567,18 +2525,19 @@ int page_trans_huge_mapcount(struct page *page)
}
/* Racy check whether the huge page can be split */
-bool can_split_huge_page(struct page *page, int *pextra_pins)
+bool can_split_folio(struct folio *folio, int *pextra_pins)
{
int extra_pins;
/* Additional pins from page cache */
- if (PageAnon(page))
- extra_pins = PageSwapCache(page) ? thp_nr_pages(page) : 0;
+ if (folio_test_anon(folio))
+ extra_pins = folio_test_swapcache(folio) ?
+ folio_nr_pages(folio) : 0;
else
- extra_pins = thp_nr_pages(page);
+ extra_pins = folio_nr_pages(folio);
if (pextra_pins)
*pextra_pins = extra_pins;
- return total_mapcount(page) == page_count(page) - extra_pins - 1;
+ return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - 1;
}
/*
@@ -2602,7 +2561,8 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
*/
int split_huge_page_to_list(struct page *page, struct list_head *list)
{
- struct page *head = compound_head(page);
+ struct folio *folio = page_folio(page);
+ struct page *head = &folio->page;
struct deferred_split *ds_queue = get_deferred_split_queue(head);
XA_STATE(xas, &head->mapping->i_pages, head->index);
struct anon_vma *anon_vma = NULL;
@@ -2622,7 +2582,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
* The caller does not necessarily hold an mmap_lock that would
* prevent the anon_vma disappearing so we first we take a
* reference to it and then lock the anon_vma for write. This
- * is similar to page_lock_anon_vma_read except the write lock
+ * is similar to folio_lock_anon_vma_read except the write lock
* is taken to serialise against parallel split or collapse
* operations.
*/
@@ -2669,7 +2629,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
* Racy check if we can split the page, before unmap_page() will
* split PMDs
*/
- if (!can_split_huge_page(head, &extra_pins)) {
+ if (!can_split_folio(folio, &extra_pins)) {
ret = -EBUSY;
goto out_unlock;
}
@@ -2719,7 +2679,7 @@ fail:
if (mapping)
xas_unlock(&xas);
local_irq_enable();
- remap_page(head, thp_nr_pages(head));
+ remap_page(folio, folio_nr_pages(folio));
ret = -EBUSY;
}
@@ -2953,7 +2913,6 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
*/
for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
struct vm_area_struct *vma = find_vma(mm, addr);
- unsigned int follflags;
struct page *page;
if (!vma || addr < vma->vm_start)
@@ -2966,8 +2925,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
}
/* FOLL_DUMP to ignore special (like zero) pages */
- follflags = FOLL_GET | FOLL_DUMP;
- page = follow_page(vma, addr, follflags);
+ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
if (IS_ERR(page))
continue;
@@ -2978,7 +2936,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
goto next;
total++;
- if (!can_split_huge_page(compound_head(page), NULL))
+ if (!can_split_folio(page_folio(page), NULL))
goto next;
if (!trylock_page(page))
@@ -3171,7 +3129,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp);
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
- page_remove_rmap(page, true);
+ page_remove_rmap(page, vma, true);
put_page(page);
}
@@ -3197,14 +3155,13 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
- flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
if (PageAnon(new))
page_add_anon_rmap(new, vma, mmun_start, true);
else
- page_add_file_rmap(new, true);
+ page_add_file_rmap(new, vma, true);
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
- if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
- mlock_vma_page(new);
+
+ /* No need to invalidate - it was non-present before */
update_mmu_cache_pmd(vma, address, pvmw->pmd);
}
#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 61895cc01d09..b34f50156f7e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,7 @@
#include <linux/llist.h>
#include <linux/cma.h>
#include <linux/migrate.h>
+#include <linux/nospec.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -1320,7 +1321,9 @@ static void __destroy_compound_gigantic_page(struct page *page,
}
set_compound_order(page, 0);
+#ifdef CONFIG_64BIT
page[1].compound_nr = 0;
+#endif
__ClearPageHead(page);
}
@@ -1812,7 +1815,9 @@ out_error:
for (; j < nr_pages; j++, p = mem_map_next(p, page, j))
__ClearPageReserved(p);
set_compound_order(page, 0);
+#ifdef CONFIG_64BIT
page[1].compound_nr = 0;
+#endif
__ClearPageHead(page);
return false;
}
@@ -1854,6 +1859,7 @@ int PageHeadHuge(struct page *page_head)
return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR;
}
+EXPORT_SYMBOL_GPL(PageHeadHuge);
/*
* Find and lock address space (mapping) in write mode.
@@ -3498,8 +3504,7 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
#define HSTATE_ATTR(_name) \
- static struct kobj_attribute _name##_attr = \
- __ATTR(_name, 0644, _name##_show, _name##_store)
+ static struct kobj_attribute _name##_attr = __ATTR_RW(_name)
static struct kobject *hugepages_kobj;
static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
@@ -4159,10 +4164,10 @@ static int __init hugepages_setup(char *s)
pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n");
return 0;
}
- node = tmp;
- p += count + 1;
- if (node < 0 || node >= nr_online_nodes)
+ if (tmp >= nr_online_nodes)
goto invalid;
+ node = array_index_nospec(tmp, nr_online_nodes);
+ p += count + 1;
/* Parse hugepages */
if (sscanf(p, "%lu%n", &tmp, &count) != 1)
goto invalid;
@@ -4637,7 +4642,6 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
vma->vm_page_prot));
}
entry = pte_mkyoung(entry);
- entry = pte_mkhuge(entry);
entry = arch_make_huge_pte(entry, shift, vma->vm_flags);
return entry;
@@ -4851,14 +4855,13 @@ again:
}
static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, pte_t *src_pte)
+ unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte)
{
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
- pte_t *dst_pte, pte;
spinlock_t *src_ptl, *dst_ptl;
+ pte_t pte;
- dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h));
dst_ptl = huge_pte_lock(h, mm, dst_pte);
src_ptl = huge_pte_lockptr(h, mm, src_pte);
@@ -4917,7 +4920,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
if (!dst_pte)
break;
- move_huge_pte(vma, old_addr, new_addr, src_pte);
+ move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte);
}
flush_tlb_range(vma, old_end - len, old_end);
mmu_notifier_invalidate_range_end(&range);
@@ -5014,7 +5017,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
set_page_dirty(page);
hugetlb_count_sub(pages_per_huge_page(h), mm);
- page_remove_rmap(page, true);
+ page_remove_rmap(page, vma, true);
spin_unlock(ptl);
tlb_remove_page_size(tlb, page, huge_page_size(h));
@@ -5259,7 +5262,7 @@ retry_avoidcopy:
/* Break COW */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
- page_remove_rmap(old_page, true);
+ page_remove_rmap(old_page, vma, true);
hugepage_add_new_anon_rmap(new_page, vma, haddr);
set_huge_pte_at(mm, haddr, ptep,
make_huge_pte(vma, new_page, 1));
@@ -5342,6 +5345,7 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
pgoff_t idx,
unsigned int flags,
unsigned long haddr,
+ unsigned long addr,
unsigned long reason)
{
vm_fault_t ret;
@@ -5349,6 +5353,7 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
struct vm_fault vmf = {
.vma = vma,
.address = haddr,
+ .real_address = addr,
.flags = flags,
/*
@@ -5417,7 +5422,7 @@ retry:
/* Check for page in userfault range */
if (userfaultfd_missing(vma)) {
ret = hugetlb_handle_userfault(vma, mapping, idx,
- flags, haddr,
+ flags, haddr, address,
VM_UFFD_MISSING);
goto out;
}
@@ -5481,7 +5486,7 @@ retry:
unlock_page(page);
put_page(page);
ret = hugetlb_handle_userfault(vma, mapping, idx,
- flags, haddr,
+ flags, haddr, address,
VM_UFFD_MINOR);
goto out;
}
@@ -5818,7 +5823,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
*pagep = NULL;
goto out;
}
- folio_copy(page_folio(page), page_folio(*pagep));
+ copy_user_huge_page(page, *pagep, dst_addr, dst_vma,
+ pages_per_huge_page(h));
put_page(*pagep);
*pagep = NULL;
}
@@ -6072,7 +6078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pages) {
/*
- * try_grab_compound_head() should always succeed here,
+ * try_grab_folio() should always succeed here,
* because: a) we hold the ptl lock, and b) we've just
* checked that the huge page is present in the page
* tables. If the huge page is present, then the tail
@@ -6081,9 +6087,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* any way. So this page must be available at this
* point, unless the page refcount overflowed:
*/
- if (WARN_ON_ONCE(!try_grab_compound_head(pages[i],
- refs,
- flags))) {
+ if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
+ flags))) {
spin_unlock(ptl);
remainder = 0;
err = -ENOMEM;
@@ -6172,7 +6177,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned int shift = huge_page_shift(hstate_vma(vma));
old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
- pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
+ pte = huge_pte_modify(old_pte, newprot);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
@@ -6890,9 +6895,9 @@ static int __init cmdline_parse_hugetlb_cma(char *p)
break;
if (s[count] == ':') {
- nid = tmp;
- if (nid < 0 || nid >= MAX_NUMNODES)
+ if (tmp >= MAX_NUMNODES)
break;
+ nid = array_index_nospec(tmp, MAX_NUMNODES);
s += count + 1;
tmp = memparse(s, &s);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index c540c21e26f5..791626983c2e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -124,9 +124,9 @@
* page of page structs (page 0) associated with the HugeTLB page contains the 4
* page structs necessary to describe the HugeTLB. The only use of the remaining
* pages of page structs (page 1 to page 7) is to point to page->compound_head.
- * Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs
+ * Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs
* will be used for each HugeTLB page. This will allow us to free the remaining
- * 6 pages to the buddy allocator.
+ * 7 pages to the buddy allocator.
*
* Here is how things look after remapping.
*
@@ -134,30 +134,30 @@
* +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
* | | | 0 | -------------> | 0 |
* | | +-----------+ +-----------+
- * | | | 1 | -------------> | 1 |
- * | | +-----------+ +-----------+
- * | | | 2 | ----------------^ ^ ^ ^ ^ ^
- * | | +-----------+ | | | | |
- * | | | 3 | ------------------+ | | | |
- * | | +-----------+ | | | |
- * | | | 4 | --------------------+ | | |
- * | PMD | +-----------+ | | |
- * | level | | 5 | ----------------------+ | |
- * | mapping | +-----------+ | |
- * | | | 6 | ------------------------+ |
- * | | +-----------+ |
- * | | | 7 | --------------------------+
+ * | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^
+ * | | +-----------+ | | | | | |
+ * | | | 2 | -----------------+ | | | | |
+ * | | +-----------+ | | | | |
+ * | | | 3 | -------------------+ | | | |
+ * | | +-----------+ | | | |
+ * | | | 4 | ---------------------+ | | |
+ * | PMD | +-----------+ | | |
+ * | level | | 5 | -----------------------+ | |
+ * | mapping | +-----------+ | |
+ * | | | 6 | -------------------------+ |
+ * | | +-----------+ |
+ * | | | 7 | ---------------------------+
* | | +-----------+
* | |
* | |
* | |
* +-----------+
*
- * When a HugeTLB is freed to the buddy system, we should allocate 6 pages for
+ * When a HugeTLB is freed to the buddy system, we should allocate 7 pages for
* vmemmap pages and restore the previous mapping relationship.
*
* For the HugeTLB page of the pud level mapping. It is similar to the former.
- * We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages.
+ * We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages.
*
* Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
* (e.g. aarch64) provides a contiguous bit in the translation table entries
@@ -166,7 +166,13 @@
*
* The contiguous bit is used to increase the mapping size at the pmd and pte
* (last) level. So this type of HugeTLB page can be optimized only when its
- * size of the struct page structs is greater than 2 pages.
+ * size of the struct page structs is greater than 1 page.
+ *
+ * Notice: The head vmemmap page is not freed to the buddy allocator and all
+ * tail vmemmap pages are mapped to the head vmemmap page frame. So we can see
+ * more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page)
+ * associated with each HugeTLB page. The compound_head() can handle this
+ * correctly (more details refer to the comment above compound_head()).
*/
#define pr_fmt(fmt) "HugeTLB: " fmt
@@ -175,19 +181,21 @@
/*
* There are a lot of struct page structures associated with each HugeTLB page.
* For tail pages, the value of compound_head is the same. So we can reuse first
- * page of tail page structures. We map the virtual addresses of the remaining
- * pages of tail page structures to the first tail page struct, and then free
- * these page frames. Therefore, we need to reserve two pages as vmemmap areas.
+ * page of head page structures. We map the virtual addresses of all the pages
+ * of tail page structures to the head page struct, and then free these page
+ * frames. Therefore, we need to reserve one pages as vmemmap areas.
*/
-#define RESERVE_VMEMMAP_NR 2U
+#define RESERVE_VMEMMAP_NR 1U
#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
-bool hugetlb_free_vmemmap_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON,
+ hugetlb_free_vmemmap_enabled_key);
+EXPORT_SYMBOL(hugetlb_free_vmemmap_enabled_key);
static int __init early_hugetlb_free_vmemmap_param(char *buf)
{
/* We cannot optimize if a "struct page" crosses page boundaries. */
- if ((!is_power_of_2(sizeof(struct page)))) {
+ if (!is_power_of_2(sizeof(struct page))) {
pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n");
return 0;
}
@@ -196,9 +204,9 @@ static int __init early_hugetlb_free_vmemmap_param(char *buf)
return -EINVAL;
if (!strcmp(buf, "on"))
- hugetlb_free_vmemmap_enabled = true;
+ static_branch_enable(&hugetlb_free_vmemmap_enabled_key);
else if (!strcmp(buf, "off"))
- hugetlb_free_vmemmap_enabled = false;
+ static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
else
return -EINVAL;
@@ -236,7 +244,6 @@ int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
*/
ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse,
GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
-
if (!ret)
ClearHPageVmemmapOptimized(head);
@@ -277,14 +284,13 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
BUILD_BUG_ON(__NR_USED_SUBPAGE >=
RESERVE_VMEMMAP_SIZE / sizeof(struct page));
- if (!hugetlb_free_vmemmap_enabled)
+ if (!hugetlb_free_vmemmap_enabled())
return;
vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
/*
- * The head page and the first tail page are not to be freed to buddy
- * allocator, the other pages will map to the first tail page, so they
- * can be freed.
+ * The head page is not to be freed to buddy allocator, the other tail
+ * pages will map to the head page, so they can be freed.
*
* Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
* on some architectures (e.g. aarch64). See Documentation/arm64/
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index aff4d27ec235..bb0cea5468cb 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -32,9 +32,9 @@ static int hwpoison_inject(void *data, u64 val)
shake_page(hpage);
/*
- * This implies unable to support non-LRU pages.
+ * This implies unable to support non-LRU pages except free page.
*/
- if (!PageLRU(hpage) && !PageHuge(p))
+ if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
return 0;
/*
@@ -48,7 +48,8 @@ static int hwpoison_inject(void *data, u64 val)
inject:
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
- return memory_failure(pfn, 0);
+ err = memory_failure(pfn, 0);
+ return (err == -EOPNOTSUPP) ? 0 : err;
}
static int hwpoison_unpoison(void *data, u64 val)
diff --git a/mm/init-mm.c b/mm/init-mm.c
index b4a6f38fb51d..fbe7844d0912 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/user_namespace.h>
+#include <linux/ioasid.h>
#include <asm/mmu.h>
#ifndef INIT_MM_CONTEXT
@@ -38,6 +39,9 @@ struct mm_struct init_mm = {
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
.cpu_bitmap = CPU_BITS_NONE,
+#ifdef CONFIG_IOMMU_SVA
+ .pasid = INVALID_IOASID,
+#endif
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/internal.h b/mm/internal.h
index d80300392a19..58dc6adc19c5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
+#include <linux/rmap.h>
#include <linux/tracepoint-defs.h>
struct folio_batch;
@@ -66,24 +67,20 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
+void deactivate_file_folio(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
-static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
-{
- return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
-}
-
struct zap_details;
void unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
struct zap_details *details);
-void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read,
- unsigned long lookahead_size);
+void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
+ unsigned int order);
void force_page_cache_ra(struct readahead_control *, unsigned long nr);
static inline void force_page_cache_readahead(struct address_space *mapping,
struct file *file, pgoff_t index, unsigned long nr_to_read)
@@ -100,6 +97,9 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
loff_t end);
+long invalidate_inode_page(struct page *page);
+unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
+ pgoff_t start, pgoff_t end, unsigned long *nr_pagevec);
/**
* folio_evictable - Test whether a folio is evictable.
@@ -155,10 +155,18 @@ extern unsigned long highest_memmap_pfn;
#define MAX_RECLAIM_RETRIES 16
/*
+ * in mm/early_ioremap.c
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+ unsigned long size, pgprot_t prot);
+
+/*
* in mm/vmscan.c:
*/
-extern int isolate_lru_page(struct page *page);
-extern void putback_lru_page(struct page *page);
+int isolate_lru_page(struct page *page);
+int folio_isolate_lru(struct folio *folio);
+void putback_lru_page(struct page *page);
+void folio_putback_lru(struct folio *folio);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
/*
@@ -390,6 +398,7 @@ static inline bool is_data_mapping(vm_flags_t flags)
void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev);
void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
+struct anon_vma *folio_anon_vma(struct folio *folio);
#ifdef CONFIG_MMU
void unmap_mapping_folio(struct folio *folio);
@@ -398,32 +407,56 @@ extern long populate_vma_page_range(struct vm_area_struct *vma,
extern long faultin_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
bool write, int *locked);
-extern void munlock_vma_pages_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
-static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
-{
- munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
-}
-
-/*
- * must be called with vma's mmap_lock held for read or write, and page locked.
- */
-extern void mlock_vma_page(struct page *page);
-extern unsigned int munlock_vma_page(struct page *page);
-
extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
unsigned long len);
-
/*
- * Clear the page's PageMlocked(). This can be useful in a situation where
- * we want to unconditionally remove a page from the pagecache -- e.g.,
- * on truncation or freeing.
+ * mlock_vma_page() and munlock_vma_page():
+ * should be called with vma's mmap_lock held for read or write,
+ * under page table lock for the pte/pmd being added or removed.
*
- * It is legal to call this function for any page, mlocked or not.
- * If called for a page that is still mapped by mlocked vmas, all we do
- * is revert to lazy LRU behaviour -- semantics are not broken.
+ * mlock is usually called at the end of page_add_*_rmap(),
+ * munlock at the end of page_remove_rmap(); but new anon
+ * pages are managed by lru_cache_add_inactive_or_unevictable()
+ * calling mlock_new_page().
+ *
+ * @compound is used to include pmd mappings of THPs, but filter out
+ * pte mappings of THPs, which cannot be consistently counted: a pte
+ * mapping of the THP head cannot be distinguished by the page alone.
*/
-extern void clear_page_mlock(struct page *page);
+void mlock_folio(struct folio *folio);
+static inline void mlock_vma_folio(struct folio *folio,
+ struct vm_area_struct *vma, bool compound)
+{
+ /*
+ * The VM_SPECIAL check here serves two purposes.
+ * 1) VM_IO check prevents migration from double-counting during mlock.
+ * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED
+ * is never left set on a VM_SPECIAL vma, there is an interval while
+ * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
+ * still be set while VM_SPECIAL bits are added: so ignore it then.
+ */
+ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) &&
+ (compound || !folio_test_large(folio)))
+ mlock_folio(folio);
+}
+
+static inline void mlock_vma_page(struct page *page,
+ struct vm_area_struct *vma, bool compound)
+{
+ mlock_vma_folio(page_folio(page), vma, compound);
+}
+
+void munlock_page(struct page *page);
+static inline void munlock_vma_page(struct page *page,
+ struct vm_area_struct *vma, bool compound)
+{
+ if (unlikely(vma->vm_flags & VM_LOCKED) &&
+ (compound || !PageTransCompound(page)))
+ munlock_page(page);
+}
+void mlock_new_page(struct page *page);
+bool need_mlock_page_drain(int cpu);
+void mlock_page_drain(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
@@ -457,18 +490,20 @@ vma_address(struct page *page, struct vm_area_struct *vma)
}
/*
- * Then at what user virtual address will none of the page be found in vma?
+ * Then at what user virtual address will none of the range be found in vma?
* Assumes that vma_address() already returned a good starting address.
- * If page is a compound head, the entire compound page is considered.
*/
-static inline unsigned long
-vma_address_end(struct page *page, struct vm_area_struct *vma)
+static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
{
+ struct vm_area_struct *vma = pvmw->vma;
pgoff_t pgoff;
unsigned long address;
- VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
- pgoff = page_to_pgoff(page) + compound_nr(page);
+ /* Common case, plus ->pgoff is invalid for KSM */
+ if (pvmw->nr_pages == 1)
+ return pvmw->address + PAGE_SIZE;
+
+ pgoff = pvmw->pgoff + pvmw->nr_pages;
address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
/* Check for address beyond vma (or wrapped through 0?) */
if (address < vma->vm_start || address > vma->vm_end)
@@ -498,8 +533,13 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
}
#else /* !CONFIG_MMU */
static inline void unmap_mapping_folio(struct folio *folio) { }
-static inline void clear_page_mlock(struct page *page) { }
-static inline void mlock_vma_page(struct page *page) { }
+static inline void mlock_vma_page(struct page *page,
+ struct vm_area_struct *vma, bool compound) { }
+static inline void munlock_vma_page(struct page *page,
+ struct vm_area_struct *vma, bool compound) { }
+static inline void mlock_new_page(struct page *page) { }
+static inline bool need_mlock_page_drain(int cpu) { return false; }
+static inline void mlock_page_drain(int cpu) { }
static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
{
}
@@ -572,17 +612,6 @@ static inline void mminit_verify_zonelist(void)
}
#endif /* CONFIG_DEBUG_MEMORY_INIT */
-/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
-#if defined(CONFIG_SPARSEMEM)
-extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
- unsigned long *end_pfn);
-#else
-static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
- unsigned long *end_pfn)
-{
-}
-#endif /* CONFIG_SPARSEMEM */
-
#define NODE_RECLAIM_NOSCAN -2
#define NODE_RECLAIM_FULL -1
#define NODE_RECLAIM_SOME 0
@@ -718,4 +747,13 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);
+void free_zone_device_page(struct page *page);
+
+/*
+ * mm/gup.c
+ */
+struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
+
+DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
index 6872cd5e5390..0bb95728a784 100644
--- a/mm/kfence/Makefile
+++ b/mm/kfence/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_KFENCE) := core.o report.o
+obj-y := core.o report.o
CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 5ad40e3add45..2f9fdfde1941 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -38,22 +38,27 @@
#define KFENCE_WARN_ON(cond) \
({ \
const bool __cond = WARN_ON(cond); \
- if (unlikely(__cond)) \
+ if (unlikely(__cond)) { \
WRITE_ONCE(kfence_enabled, false); \
+ disabled_by_warn = true; \
+ } \
__cond; \
})
/* === Data ================================================================= */
static bool kfence_enabled __read_mostly;
+static bool disabled_by_warn __read_mostly;
-static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "kfence."
+static int kfence_enable_late(void);
static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
{
unsigned long num;
@@ -64,10 +69,11 @@ static int param_set_sample_interval(const char *val, const struct kernel_param
if (!num) /* Using 0 to indicate KFENCE is disabled. */
WRITE_ONCE(kfence_enabled, false);
- else if (!READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
- return -EINVAL; /* Cannot (re-)enable KFENCE on-the-fly. */
*((unsigned long *)kp->arg) = num;
+
+ if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
+ return disabled_by_warn ? -EINVAL : kfence_enable_late();
return 0;
}
@@ -89,8 +95,12 @@ module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_inte
static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
+/* If true, use a deferrable timer. */
+static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
+module_param_named(deferrable, kfence_deferrable, bool, 0444);
+
/* The pool of pages used for guard pages and objects. */
-char *__kfence_pool __ro_after_init;
+char *__kfence_pool __read_mostly;
EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
/*
@@ -531,17 +541,19 @@ static void rcu_guarded_free(struct rcu_head *h)
kfence_guarded_free((void *)meta->addr, meta, false);
}
-static bool __init kfence_init_pool(void)
+/*
+ * Initialization of the KFENCE pool after its allocation.
+ * Returns 0 on success; otherwise returns the address up to
+ * which partial initialization succeeded.
+ */
+static unsigned long kfence_init_pool(void)
{
unsigned long addr = (unsigned long)__kfence_pool;
struct page *pages;
int i;
- if (!__kfence_pool)
- return false;
-
if (!arch_kfence_init_pool())
- goto err;
+ return addr;
pages = virt_to_page(addr);
@@ -559,7 +571,7 @@ static bool __init kfence_init_pool(void)
/* Verify we do not have a compound head page. */
if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
- goto err;
+ return addr;
__SetPageSlab(&pages[i]);
}
@@ -572,7 +584,7 @@ static bool __init kfence_init_pool(void)
*/
for (i = 0; i < 2; i++) {
if (unlikely(!kfence_protect(addr)))
- goto err;
+ return addr;
addr += PAGE_SIZE;
}
@@ -589,7 +601,7 @@ static bool __init kfence_init_pool(void)
/* Protect the right redzone. */
if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
- goto err;
+ return addr;
addr += 2 * PAGE_SIZE;
}
@@ -602,9 +614,21 @@ static bool __init kfence_init_pool(void)
*/
kmemleak_free(__kfence_pool);
- return true;
+ return 0;
+}
+
+static bool __init kfence_init_pool_early(void)
+{
+ unsigned long addr;
+
+ if (!__kfence_pool)
+ return false;
+
+ addr = kfence_init_pool();
+
+ if (!addr)
+ return true;
-err:
/*
* Only release unprotected pages, and do not try to go back and change
* page attributes due to risk of failing to do so as well. If changing
@@ -617,6 +641,26 @@ err:
return false;
}
+static bool kfence_init_pool_late(void)
+{
+ unsigned long addr, free_size;
+
+ addr = kfence_init_pool();
+
+ if (!addr)
+ return true;
+
+ /* Same as above. */
+ free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
+#ifdef CONFIG_CONTIG_ALLOC
+ free_contig_range(page_to_pfn(virt_to_page(addr)), free_size / PAGE_SIZE);
+#else
+ free_pages_exact((void *)addr, free_size);
+#endif
+ __kfence_pool = NULL;
+ return false;
+}
+
/* === DebugFS Interface ==================================================== */
static int stats_show(struct seq_file *seq, void *v)
@@ -700,6 +744,8 @@ late_initcall(kfence_debugfs_init);
/* === Allocation Gate Timer ================================================ */
+static struct delayed_work kfence_timer;
+
#ifdef CONFIG_KFENCE_STATIC_KEYS
/* Wait queue to wake up allocation-gate timer task. */
static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
@@ -722,7 +768,6 @@ static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
* avoids IPIs, at the cost of not immediately capturing allocations if the
* instructions remain cached.
*/
-static struct delayed_work kfence_timer;
static void toggle_allocation_gate(struct work_struct *work)
{
if (!READ_ONCE(kfence_enabled))
@@ -750,7 +795,6 @@ static void toggle_allocation_gate(struct work_struct *work)
queue_delayed_work(system_unbound_wq, &kfence_timer,
msecs_to_jiffies(kfence_sample_interval));
}
-static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
/* === Public interface ===================================================== */
@@ -765,25 +809,77 @@ void __init kfence_alloc_pool(void)
pr_err("failed to allocate pool\n");
}
+static void kfence_init_enable(void)
+{
+ if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
+ static_branch_enable(&kfence_allocation_key);
+
+ if (kfence_deferrable)
+ INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate);
+ else
+ INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate);
+
+ WRITE_ONCE(kfence_enabled, true);
+ queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
+
+ pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+ CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
+ (void *)(__kfence_pool + KFENCE_POOL_SIZE));
+}
+
void __init kfence_init(void)
{
+ stack_hash_seed = (u32)random_get_entropy();
+
/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
if (!kfence_sample_interval)
return;
- stack_hash_seed = (u32)random_get_entropy();
- if (!kfence_init_pool()) {
+ if (!kfence_init_pool_early()) {
pr_err("%s failed\n", __func__);
return;
}
- if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
- static_branch_enable(&kfence_allocation_key);
+ kfence_init_enable();
+}
+
+static int kfence_init_late(void)
+{
+ const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
+#ifdef CONFIG_CONTIG_ALLOC
+ struct page *pages;
+
+ pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
+ if (!pages)
+ return -ENOMEM;
+ __kfence_pool = page_to_virt(pages);
+#else
+ if (nr_pages > MAX_ORDER_NR_PAGES) {
+ pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
+ return -EINVAL;
+ }
+ __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
+ if (!__kfence_pool)
+ return -ENOMEM;
+#endif
+
+ if (!kfence_init_pool_late()) {
+ pr_err("%s failed\n", __func__);
+ return -EBUSY;
+ }
+
+ kfence_init_enable();
+ return 0;
+}
+
+static int kfence_enable_late(void)
+{
+ if (!__kfence_pool)
+ return kfence_init_late();
+
WRITE_ONCE(kfence_enabled, true);
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
- pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
- CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
- (void *)(__kfence_pool + KFENCE_POOL_SIZE));
+ return 0;
}
void kfence_shutdown_cache(struct kmem_cache *s)
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index a22b1af85577..1b50f70a4c0f 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* 100x the sample interval should be more than enough to ensure we get
* a KFENCE allocation eventually.
*/
- timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+ timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
/*
* Especially for non-preemption kernels, ensure the allocation-gate
* timer can catch up: after @resched_after, every failed allocation
* attempt yields, to ensure the allocation-gate timer is scheduled.
*/
- resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL);
+ resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
do {
if (test_cache)
alloc = kmem_cache_alloc(test_cache, gfp);
@@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test)
int i;
/* Skip if we think it'd take too long. */
- KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100);
+ KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
setup_test_cache(test, size, 0, NULL);
buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
@@ -623,10 +623,11 @@ static void test_gfpzero(struct kunit *test)
break;
test_free(buf2);
- if (i == CONFIG_KFENCE_NUM_OBJECTS) {
+ if (kthread_should_stop() || (i == CONFIG_KFENCE_NUM_OBJECTS)) {
kunit_warn(test, "giving up ... cannot get same object back\n");
return;
}
+ cond_resched();
}
for (i = 0; i < size; i++)
@@ -739,7 +740,7 @@ static void test_memcache_alloc_bulk(struct kunit *test)
* 100x the sample interval should be more than enough to ensure we get
* a KFENCE allocation eventually.
*/
- timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+ timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
do {
void *objects[100];
int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 35f14d0a00a6..1cdf7c38b9e5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -16,6 +16,7 @@
#include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
#include <linux/page_idle.h>
+#include <linux/page_table_check.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
@@ -773,7 +774,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
*/
spin_lock(ptl);
ptep_clear(vma->vm_mm, address, _pte);
- page_remove_rmap(src_page, false);
+ page_remove_rmap(src_page, vma, false);
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
@@ -1416,6 +1417,21 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
return 0;
}
+static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ spinlock_t *ptl;
+ pmd_t pmd;
+
+ mmap_assert_write_locked(mm);
+ ptl = pmd_lock(vma->vm_mm, pmdp);
+ pmd = pmdp_collapse_flush(vma, addr, pmdp);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ page_table_check_pte_clear_range(mm, addr, pmd);
+ pte_free(mm, pmd_pgtable(pmd));
+}
+
/**
* collapse_pte_mapped_thp - Try to collapse a pte-mapped THP for mm at
* address haddr.
@@ -1433,7 +1449,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
struct vm_area_struct *vma = find_vma(mm, haddr);
struct page *hpage;
pte_t *start_pte, *pte;
- pmd_t *pmd, _pmd;
+ pmd_t *pmd;
spinlock_t *ptl;
int count = 0;
int i;
@@ -1497,7 +1513,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (pte_none(*pte))
continue;
page = vm_normal_page(vma, addr, *pte);
- page_remove_rmap(page, false);
+ page_remove_rmap(page, vma, false);
}
pte_unmap_unlock(start_pte, ptl);
@@ -1509,12 +1525,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
}
/* step 4: collapse pmd */
- ptl = pmd_lock(vma->vm_mm, pmd);
- _pmd = pmdp_collapse_flush(vma, haddr, pmd);
- spin_unlock(ptl);
- mm_dec_nr_ptes(mm);
- pte_free(mm, pmd_pgtable(_pmd));
-
+ collapse_and_free_pmd(mm, vma, haddr, pmd);
drop_hpage:
unlock_page(hpage);
put_page(hpage);
@@ -1552,7 +1563,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long addr;
- pmd_t *pmd, _pmd;
+ pmd_t *pmd;
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
@@ -1591,14 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* reverse order. Trylock is a way to avoid deadlock.
*/
if (mmap_write_trylock(mm)) {
- if (!khugepaged_test_exit(mm)) {
- spinlock_t *ptl = pmd_lock(mm, pmd);
- /* assume page table is clear */
- _pmd = pmdp_collapse_flush(vma, addr, pmd);
- spin_unlock(ptl);
- mm_dec_nr_ptes(mm);
- pte_free(mm, pmd_pgtable(_pmd));
- }
+ if (!khugepaged_test_exit(mm))
+ collapse_and_free_pmd(mm, vma, addr, pmd);
mmap_write_unlock(mm);
} else {
/* Try again later */
@@ -1829,13 +1834,13 @@ static void collapse_file(struct mm_struct *mm,
}
if (page_mapped(page))
- unmap_mapping_pages(mapping, index, 1, false);
+ try_to_unmap(page_folio(page),
+ TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
xas_lock_irq(&xas);
xas_set(&xas, index);
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
- VM_BUG_ON_PAGE(page_mapped(page), page);
/*
* The page is expected to have page_count() == 3:
@@ -1899,6 +1904,13 @@ xa_locked:
xas_unlock_irq(&xas);
xa_unlocked:
+ /*
+ * If collapse is successful, flush must be done now before copying.
+ * If collapse is unsuccessful, does flush actually need to be done?
+ * Do it anyway, to clear the state.
+ */
+ try_to_unmap_flush();
+
if (result == SCAN_SUCCEED) {
struct page *page, *tmp;
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index dc3758fdba68..7580baa76af1 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1410,7 +1410,8 @@ static void kmemleak_scan(void)
{
unsigned long flags;
struct kmemleak_object *object;
- int i;
+ struct zone *zone;
+ int __maybe_unused i;
int new_leaks = 0;
jiffies_last_scan = jiffies;
@@ -1450,9 +1451,9 @@ static void kmemleak_scan(void)
* Struct page scanning for each node.
*/
get_online_mems();
- for_each_online_node(i) {
- unsigned long start_pfn = node_start_pfn(i);
- unsigned long end_pfn = node_end_pfn(i);
+ for_each_populated_zone(zone) {
+ unsigned long start_pfn = zone->zone_start_pfn;
+ unsigned long end_pfn = zone_end_pfn(zone);
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
@@ -1461,8 +1462,8 @@ static void kmemleak_scan(void)
if (!page)
continue;
- /* only scan pages belonging to this node */
- if (page_to_nid(page) != i)
+ /* only scan pages belonging to this zone */
+ if (page_zone(page) != zone)
continue;
/* only scan if page is in use */
if (page_count(page) == 0)
diff --git a/mm/ksm.c b/mm/ksm.c
index c20bd4d9a0d9..063a48eeb5ee 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1034,10 +1034,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
pte_t *orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- };
+ DEFINE_PAGE_VMA_WALK(pvmw, page, vma, 0, 0);
int swapped;
int err = -EFAULT;
struct mmu_notifier_range range;
@@ -1177,7 +1174,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
ptep_clear_flush(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, newpte);
- page_remove_rmap(page, false);
+ page_remove_rmap(page, vma, false);
if (!page_mapped(page))
try_to_free_swap(page);
put_page(page);
@@ -1252,16 +1249,6 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
err = replace_page(vma, page, kpage, orig_pte);
}
- if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
- munlock_vma_page(page);
- if (!PageMlocked(kpage)) {
- unlock_page(page);
- lock_page(kpage);
- mlock_vma_page(kpage);
- page = kpage; /* for final unlock */
- }
- }
-
out_unlock:
unlock_page(page);
out:
@@ -2567,7 +2554,8 @@ void __ksm_exit(struct mm_struct *mm)
struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
- struct anon_vma *anon_vma = page_anon_vma(page);
+ struct folio *folio = page_folio(page);
+ struct anon_vma *anon_vma = folio_anon_vma(folio);
struct page *new_page;
if (PageKsm(page)) {
@@ -2595,26 +2583,29 @@ struct page *ksm_might_need_to_copy(struct page *page,
SetPageDirty(new_page);
__SetPageUptodate(new_page);
__SetPageLocked(new_page);
+#ifdef CONFIG_SWAP
+ count_vm_event(KSM_SWPIN_COPY);
+#endif
}
return new_page;
}
-void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
+void rmap_walk_ksm(struct folio *folio, const struct rmap_walk_control *rwc)
{
struct stable_node *stable_node;
struct rmap_item *rmap_item;
int search_new_forks = 0;
- VM_BUG_ON_PAGE(!PageKsm(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_ksm(folio), folio);
/*
* Rely on the page lock to protect against concurrent modifications
* to that page's node of the stable tree.
*/
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- stable_node = page_stable_node(page);
+ stable_node = folio_stable_node(folio);
if (!stable_node)
return;
again:
@@ -2649,11 +2640,11 @@ again:
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
continue;
- if (!rwc->rmap_one(page, vma, addr, rwc->arg)) {
+ if (!rwc->rmap_one(folio, vma, addr, rwc->arg)) {
anon_vma_unlock_read(anon_vma);
return;
}
- if (rwc->done && rwc->done(page)) {
+ if (rwc->done && rwc->done(folio)) {
anon_vma_unlock_read(anon_vma);
return;
}
@@ -2826,8 +2817,7 @@ static void wait_while_offlining(void)
#define KSM_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
#define KSM_ATTR(_name) \
- static struct kobj_attribute _name##_attr = \
- __ATTR(_name, 0644, _name##_show, _name##_store)
+ static struct kobj_attribute _name##_attr = __ATTR_RW(_name)
static ssize_t sleep_millisecs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 0cd5e89ca063..c669d87001a6 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -13,6 +13,7 @@
#include <linux/mutex.h>
#include <linux/memcontrol.h>
#include "slab.h"
+#include "internal.h"
#ifdef CONFIG_MEMCG_KMEM
static LIST_HEAD(memcg_list_lrus);
@@ -49,35 +50,32 @@ static int lru_shrinker_id(struct list_lru *lru)
}
static inline struct list_lru_one *
-list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
+list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
{
- struct list_lru_memcg *memcg_lrus;
- /*
- * Either lock or RCU protects the array of per cgroup lists
- * from relocation (see memcg_update_list_lru_node).
- */
- memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
- lockdep_is_held(&nlru->lock));
- if (memcg_lrus && idx >= 0)
- return memcg_lrus->lru[idx];
- return &nlru->lru;
+ if (list_lru_memcg_aware(lru) && idx >= 0) {
+ struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
+
+ return mlru ? &mlru->node[nid] : NULL;
+ }
+ return &lru->node[nid].lru;
}
static inline struct list_lru_one *
-list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
+list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
struct mem_cgroup **memcg_ptr)
{
+ struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l = &nlru->lru;
struct mem_cgroup *memcg = NULL;
- if (!nlru->memcg_lrus)
+ if (!list_lru_memcg_aware(lru))
goto out;
memcg = mem_cgroup_from_obj(ptr);
if (!memcg)
goto out;
- l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
+ l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
out:
if (memcg_ptr)
*memcg_ptr = memcg;
@@ -103,18 +101,18 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru)
}
static inline struct list_lru_one *
-list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
+list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
{
- return &nlru->lru;
+ return &lru->node[nid].lru;
}
static inline struct list_lru_one *
-list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
+list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
struct mem_cgroup **memcg_ptr)
{
if (memcg_ptr)
*memcg_ptr = NULL;
- return &nlru->lru;
+ return &lru->node[nid].lru;
}
#endif /* CONFIG_MEMCG_KMEM */
@@ -127,7 +125,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
spin_lock(&nlru->lock);
if (list_empty(item)) {
- l = list_lru_from_kmem(nlru, item, &memcg);
+ l = list_lru_from_kmem(lru, nid, item, &memcg);
list_add_tail(item, &l->list);
/* Set shrinker bit if the first element was added */
if (!l->nr_items++)
@@ -150,7 +148,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
spin_lock(&nlru->lock);
if (!list_empty(item)) {
- l = list_lru_from_kmem(nlru, item, NULL);
+ l = list_lru_from_kmem(lru, nid, item, NULL);
list_del_init(item);
l->nr_items--;
nlru->nr_items--;
@@ -180,13 +178,12 @@ EXPORT_SYMBOL_GPL(list_lru_isolate_move);
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg)
{
- struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
long count;
rcu_read_lock();
- l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
- count = READ_ONCE(l->nr_items);
+ l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
+ count = l ? READ_ONCE(l->nr_items) : 0;
rcu_read_unlock();
if (unlikely(count < 0))
@@ -206,17 +203,20 @@ unsigned long list_lru_count_node(struct list_lru *lru, int nid)
EXPORT_SYMBOL_GPL(list_lru_count_node);
static unsigned long
-__list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
+__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
{
-
+ struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
struct list_head *item, *n;
unsigned long isolated = 0;
- l = list_lru_from_memcg_idx(nlru, memcg_idx);
restart:
+ l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
+ if (!l)
+ goto out;
+
list_for_each_safe(item, n, &l->list) {
enum lru_status ret;
@@ -260,6 +260,7 @@ restart:
BUG();
}
}
+out:
return isolated;
}
@@ -272,8 +273,8 @@ list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
unsigned long ret;
spin_lock(&nlru->lock);
- ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
- nr_to_walk);
+ ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
+ cb_arg, nr_to_walk);
spin_unlock(&nlru->lock);
return ret;
}
@@ -288,8 +289,8 @@ list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
unsigned long ret;
spin_lock_irq(&nlru->lock);
- ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
- nr_to_walk);
+ ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
+ cb_arg, nr_to_walk);
spin_unlock_irq(&nlru->lock);
return ret;
}
@@ -299,16 +300,20 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
unsigned long *nr_to_walk)
{
long isolated = 0;
- int memcg_idx;
isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
nr_to_walk);
+
+#ifdef CONFIG_MEMCG_KMEM
if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
- for_each_memcg_cache_index(memcg_idx) {
+ struct list_lru_memcg *mlru;
+ unsigned long index;
+
+ xa_for_each(&lru->xa, index, mlru) {
struct list_lru_node *nlru = &lru->node[nid];
spin_lock(&nlru->lock);
- isolated += __list_lru_walk_one(nlru, memcg_idx,
+ isolated += __list_lru_walk_one(lru, nid, index,
isolate, cb_arg,
nr_to_walk);
spin_unlock(&nlru->lock);
@@ -317,6 +322,8 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
break;
}
}
+#endif
+
return isolated;
}
EXPORT_SYMBOL_GPL(list_lru_walk_node);
@@ -328,204 +335,81 @@ static void init_one_lru(struct list_lru_one *l)
}
#ifdef CONFIG_MEMCG_KMEM
-static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
- int begin, int end)
+static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
{
- int i;
-
- for (i = begin; i < end; i++)
- kfree(memcg_lrus->lru[i]);
-}
-
-static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
- int begin, int end)
-{
- int i;
+ int nid;
+ struct list_lru_memcg *mlru;
- for (i = begin; i < end; i++) {
- struct list_lru_one *l;
+ mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
+ if (!mlru)
+ return NULL;
- l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
- if (!l)
- goto fail;
+ for_each_node(nid)
+ init_one_lru(&mlru->node[nid]);
- init_one_lru(l);
- memcg_lrus->lru[i] = l;
- }
- return 0;
-fail:
- __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
- return -ENOMEM;
+ return mlru;
}
-static int memcg_init_list_lru_node(struct list_lru_node *nlru)
+static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
{
- struct list_lru_memcg *memcg_lrus;
- int size = memcg_nr_cache_ids;
-
- memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL);
- if (!memcg_lrus)
- return -ENOMEM;
-
- if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
- kvfree(memcg_lrus);
- return -ENOMEM;
- }
- RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
-
- return 0;
-}
+ struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx);
-static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
-{
- struct list_lru_memcg *memcg_lrus;
/*
- * This is called when shrinker has already been unregistered,
- * and nobody can use it. So, there is no need to use kvfree_rcu().
+ * The __list_lru_walk_one() can walk the list of this node.
+ * We need kvfree_rcu() here. And the walking of the list
+ * is under lru->node[nid]->lock, which can serve as a RCU
+ * read-side critical section.
*/
- memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
- __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
- kvfree(memcg_lrus);
+ if (mlru)
+ kvfree_rcu(mlru, rcu);
}
-static int memcg_update_list_lru_node(struct list_lru_node *nlru,
- int old_size, int new_size)
+static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
{
- struct list_lru_memcg *old, *new;
-
- BUG_ON(old_size > new_size);
-
- old = rcu_dereference_protected(nlru->memcg_lrus,
- lockdep_is_held(&list_lrus_mutex));
- new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- if (__memcg_init_list_lru_node(new, old_size, new_size)) {
- kvfree(new);
- return -ENOMEM;
- }
-
- memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size));
- rcu_assign_pointer(nlru->memcg_lrus, new);
- kvfree_rcu(old, rcu);
- return 0;
-}
-
-static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
- int old_size, int new_size)
-{
- struct list_lru_memcg *memcg_lrus;
-
- memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
- lockdep_is_held(&list_lrus_mutex));
- /* do not bother shrinking the array back to the old size, because we
- * cannot handle allocation failures here */
- __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
-}
-
-static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
-{
- int i;
-
+ if (memcg_aware)
+ xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
lru->memcg_aware = memcg_aware;
-
- if (!memcg_aware)
- return 0;
-
- for_each_node(i) {
- if (memcg_init_list_lru_node(&lru->node[i]))
- goto fail;
- }
- return 0;
-fail:
- for (i = i - 1; i >= 0; i--) {
- if (!lru->node[i].memcg_lrus)
- continue;
- memcg_destroy_list_lru_node(&lru->node[i]);
- }
- return -ENOMEM;
}
static void memcg_destroy_list_lru(struct list_lru *lru)
{
- int i;
+ XA_STATE(xas, &lru->xa, 0);
+ struct list_lru_memcg *mlru;
if (!list_lru_memcg_aware(lru))
return;
- for_each_node(i)
- memcg_destroy_list_lru_node(&lru->node[i]);
-}
-
-static int memcg_update_list_lru(struct list_lru *lru,
- int old_size, int new_size)
-{
- int i;
-
- for_each_node(i) {
- if (memcg_update_list_lru_node(&lru->node[i],
- old_size, new_size))
- goto fail;
- }
- return 0;
-fail:
- for (i = i - 1; i >= 0; i--) {
- if (!lru->node[i].memcg_lrus)
- continue;
-
- memcg_cancel_update_list_lru_node(&lru->node[i],
- old_size, new_size);
+ xas_lock_irq(&xas);
+ xas_for_each(&xas, mlru, ULONG_MAX) {
+ kfree(mlru);
+ xas_store(&xas, NULL);
}
- return -ENOMEM;
-}
-
-static void memcg_cancel_update_list_lru(struct list_lru *lru,
- int old_size, int new_size)
-{
- int i;
-
- for_each_node(i)
- memcg_cancel_update_list_lru_node(&lru->node[i],
- old_size, new_size);
+ xas_unlock_irq(&xas);
}
-int memcg_update_all_list_lrus(int new_size)
-{
- int ret = 0;
- struct list_lru *lru;
- int old_size = memcg_nr_cache_ids;
-
- mutex_lock(&list_lrus_mutex);
- list_for_each_entry(lru, &memcg_list_lrus, list) {
- ret = memcg_update_list_lru(lru, old_size, new_size);
- if (ret)
- goto fail;
- }
-out:
- mutex_unlock(&list_lrus_mutex);
- return ret;
-fail:
- list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list)
- memcg_cancel_update_list_lru(lru, old_size, new_size);
- goto out;
-}
-
-static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
- int src_idx, struct mem_cgroup *dst_memcg)
+static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
+ int src_idx, struct mem_cgroup *dst_memcg)
{
struct list_lru_node *nlru = &lru->node[nid];
int dst_idx = dst_memcg->kmemcg_id;
struct list_lru_one *src, *dst;
/*
+ * If there is no lru entry in this nlru, we can skip it immediately.
+ */
+ if (!READ_ONCE(nlru->nr_items))
+ return;
+
+ /*
* Since list_lru_{add,del} may be called under an IRQ-safe lock,
* we have to use IRQ-safe primitives here to avoid deadlock.
*/
spin_lock_irq(&nlru->lock);
- src = list_lru_from_memcg_idx(nlru, src_idx);
- dst = list_lru_from_memcg_idx(nlru, dst_idx);
+ src = list_lru_from_memcg_idx(lru, nid, src_idx);
+ if (!src)
+ goto out;
+ dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
list_splice_init(&src->list, &dst->list);
@@ -534,32 +418,143 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
src->nr_items = 0;
}
-
+out:
spin_unlock_irq(&nlru->lock);
}
-static void memcg_drain_list_lru(struct list_lru *lru,
- int src_idx, struct mem_cgroup *dst_memcg)
+static void memcg_reparent_list_lru(struct list_lru *lru,
+ int src_idx, struct mem_cgroup *dst_memcg)
{
int i;
for_each_node(i)
- memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
+ memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);
+
+ memcg_list_lru_free(lru, src_idx);
}
-void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
+void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
{
+ struct cgroup_subsys_state *css;
struct list_lru *lru;
+ int src_idx = memcg->kmemcg_id;
+
+ /*
+ * Change kmemcg_id of this cgroup and all its descendants to the
+ * parent's id, and then move all entries from this cgroup's list_lrus
+ * to ones of the parent.
+ *
+ * After we have finished, all list_lrus corresponding to this cgroup
+ * are guaranteed to remain empty. So we can safely free this cgroup's
+ * list lrus in memcg_list_lru_free().
+ *
+ * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
+ * from allocating list lrus for this cgroup after memcg_list_lru_free()
+ * call.
+ */
+ rcu_read_lock();
+ css_for_each_descendant_pre(css, &memcg->css) {
+ struct mem_cgroup *child;
+
+ child = mem_cgroup_from_css(css);
+ WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id);
+ }
+ rcu_read_unlock();
mutex_lock(&list_lrus_mutex);
list_for_each_entry(lru, &memcg_list_lrus, list)
- memcg_drain_list_lru(lru, src_idx, dst_memcg);
+ memcg_reparent_list_lru(lru, src_idx, parent);
mutex_unlock(&list_lrus_mutex);
}
+
+static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
+ struct list_lru *lru)
+{
+ int idx = memcg->kmemcg_id;
+
+ return idx < 0 || xa_load(&lru->xa, idx);
+}
+
+int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
+ gfp_t gfp)
+{
+ int i;
+ unsigned long flags;
+ struct list_lru_memcg_table {
+ struct list_lru_memcg *mlru;
+ struct mem_cgroup *memcg;
+ } *table;
+ XA_STATE(xas, &lru->xa, 0);
+
+ if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
+ return 0;
+
+ gfp &= GFP_RECLAIM_MASK;
+ table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
+ if (!table)
+ return -ENOMEM;
+
+ /*
+ * Because the list_lru can be reparented to the parent cgroup's
+ * list_lru, we should make sure that this cgroup and all its
+ * ancestors have allocated list_lru_memcg.
+ */
+ for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
+ if (memcg_list_lru_allocated(memcg, lru))
+ break;
+
+ table[i].memcg = memcg;
+ table[i].mlru = memcg_init_list_lru_one(gfp);
+ if (!table[i].mlru) {
+ while (i--)
+ kfree(table[i].mlru);
+ kfree(table);
+ return -ENOMEM;
+ }
+ }
+
+ xas_lock_irqsave(&xas, flags);
+ while (i--) {
+ int index = READ_ONCE(table[i].memcg->kmemcg_id);
+ struct list_lru_memcg *mlru = table[i].mlru;
+
+ xas_set(&xas, index);
+retry:
+ if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) {
+ kfree(mlru);
+ } else {
+ xas_store(&xas, mlru);
+ if (xas_error(&xas) == -ENOMEM) {
+ xas_unlock_irqrestore(&xas, flags);
+ if (xas_nomem(&xas, gfp))
+ xas_set_err(&xas, 0);
+ xas_lock_irqsave(&xas, flags);
+ /*
+ * The xas lock has been released, this memcg
+ * can be reparented before us. So reload
+ * memcg id. More details see the comments
+ * in memcg_reparent_list_lrus().
+ */
+ index = READ_ONCE(table[i].memcg->kmemcg_id);
+ if (index < 0)
+ xas_set_err(&xas, 0);
+ else if (!xas_error(&xas) && index != xas.xa_index)
+ xas_set(&xas, index);
+ goto retry;
+ }
+ }
+ }
+ /* xas_nomem() is used to free memory instead of memory allocation. */
+ if (xas.xa_alloc)
+ xas_nomem(&xas, gfp);
+ xas_unlock_irqrestore(&xas, flags);
+ kfree(table);
+
+ return xas_error(&xas);
+}
#else
-static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
+static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
{
- return 0;
}
static void memcg_destroy_list_lru(struct list_lru *lru)
@@ -571,7 +566,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
struct lock_class_key *key, struct shrinker *shrinker)
{
int i;
- int err = -ENOMEM;
#ifdef CONFIG_MEMCG_KMEM
if (shrinker)
@@ -579,11 +573,10 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
else
lru->shrinker_id = -1;
#endif
- memcg_get_cache_ids();
lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
if (!lru->node)
- goto out;
+ return -ENOMEM;
for_each_node(i) {
spin_lock_init(&lru->node[i].lock);
@@ -592,18 +585,10 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
init_one_lru(&lru->node[i].lru);
}
- err = memcg_init_list_lru(lru, memcg_aware);
- if (err) {
- kfree(lru->node);
- /* Do this so a list_lru_destroy() doesn't crash: */
- lru->node = NULL;
- goto out;
- }
-
+ memcg_init_list_lru(lru, memcg_aware);
list_lru_register(lru);
-out:
- memcg_put_cache_ids();
- return err;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(__list_lru_init);
@@ -613,8 +598,6 @@ void list_lru_destroy(struct list_lru *lru)
if (!lru->node)
return;
- memcg_get_cache_ids();
-
list_lru_unregister(lru);
memcg_destroy_list_lru(lru);
@@ -624,6 +607,5 @@ void list_lru_destroy(struct list_lru *lru)
#ifdef CONFIG_MEMCG_KMEM
lru->shrinker_id = -1;
#endif
- memcg_put_cache_ids();
}
EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/maccess.c b/mm/maccess.c
index d3f1a1f0b1c1..3fed2b876539 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -335,3 +335,9 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count)
return ret;
}
+
+void __copy_overflow(int size, unsigned long count)
+{
+ WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
+EXPORT_SYMBOL(__copy_overflow);
diff --git a/mm/madvise.c b/mm/madvise.c
index 5604064df464..39b712fd8300 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -65,7 +65,7 @@ static int madvise_need_mmap_write(int behavior)
}
#ifdef CONFIG_ANON_VMA_NAME
-static struct anon_vma_name *anon_vma_name_alloc(const char *name)
+struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
struct anon_vma_name *anon_name;
size_t count;
@@ -81,78 +81,48 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name)
return anon_name;
}
-static void vma_anon_name_free(struct kref *kref)
+void anon_vma_name_free(struct kref *kref)
{
struct anon_vma_name *anon_name =
container_of(kref, struct anon_vma_name, kref);
kfree(anon_name);
}
-static inline bool has_vma_anon_name(struct vm_area_struct *vma)
+struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{
- return !vma->vm_file && vma->anon_name;
-}
-
-const char *vma_anon_name(struct vm_area_struct *vma)
-{
- if (!has_vma_anon_name(vma))
- return NULL;
-
mmap_assert_locked(vma->vm_mm);
- return vma->anon_name->name;
-}
-
-void dup_vma_anon_name(struct vm_area_struct *orig_vma,
- struct vm_area_struct *new_vma)
-{
- if (!has_vma_anon_name(orig_vma))
- return;
-
- kref_get(&orig_vma->anon_name->kref);
- new_vma->anon_name = orig_vma->anon_name;
-}
-
-void free_vma_anon_name(struct vm_area_struct *vma)
-{
- struct anon_vma_name *anon_name;
-
- if (!has_vma_anon_name(vma))
- return;
+ if (vma->vm_file)
+ return NULL;
- anon_name = vma->anon_name;
- vma->anon_name = NULL;
- kref_put(&anon_name->kref, vma_anon_name_free);
+ return vma->anon_name;
}
/* mmap_lock should be write-locked */
-static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
+static int replace_anon_vma_name(struct vm_area_struct *vma,
+ struct anon_vma_name *anon_name)
{
- const char *anon_name;
+ struct anon_vma_name *orig_name = anon_vma_name(vma);
- if (!name) {
- free_vma_anon_name(vma);
+ if (!anon_name) {
+ vma->anon_name = NULL;
+ anon_vma_name_put(orig_name);
return 0;
}
- anon_name = vma_anon_name(vma);
- if (anon_name) {
- /* Same name, nothing to do here */
- if (!strcmp(name, anon_name))
- return 0;
+ if (anon_vma_name_eq(orig_name, anon_name))
+ return 0;
- free_vma_anon_name(vma);
- }
- vma->anon_name = anon_vma_name_alloc(name);
- if (!vma->anon_name)
- return -ENOMEM;
+ vma->anon_name = anon_vma_name_reuse(anon_name);
+ anon_vma_name_put(orig_name);
return 0;
}
#else /* CONFIG_ANON_VMA_NAME */
-static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
+static int replace_anon_vma_name(struct vm_area_struct *vma,
+ struct anon_vma_name *anon_name)
{
- if (name)
+ if (anon_name)
return -EINVAL;
return 0;
@@ -161,17 +131,19 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
/*
* Update the vm_flags on region of a vma, splitting it or merging it as
* necessary. Must be called with mmap_sem held for writing;
+ * Caller should ensure anon_name stability by raising its refcount even when
+ * anon_name belongs to a valid vma because this function might free that vma.
*/
static int madvise_update_vma(struct vm_area_struct *vma,
struct vm_area_struct **prev, unsigned long start,
unsigned long end, unsigned long new_flags,
- const char *name)
+ struct anon_vma_name *anon_name)
{
struct mm_struct *mm = vma->vm_mm;
int error;
pgoff_t pgoff;
- if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) {
+ if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) {
*prev = vma;
return 0;
}
@@ -179,7 +151,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, name);
+ vma->vm_userfaultfd_ctx, anon_name);
if (*prev) {
vma = *prev;
goto success;
@@ -209,7 +181,7 @@ success:
*/
vma->vm_flags = new_flags;
if (!vma->vm_file) {
- error = replace_vma_anon_name(vma, name);
+ error = replace_anon_vma_name(vma, anon_name);
if (error)
return error;
}
@@ -530,6 +502,11 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
tlb_end_vma(tlb, vma);
}
+static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
+{
+ return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
+}
+
static long madvise_cold(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start_addr, unsigned long end_addr)
@@ -877,8 +854,8 @@ static long madvise_populate(struct vm_area_struct *vma,
* our VMA might have been split.
*/
if (!vma || start >= vma->vm_end) {
- vma = find_vma(mm, start);
- if (!vma || start < vma->vm_start)
+ vma = vma_lookup(mm, start);
+ if (!vma)
return -ENOMEM;
}
@@ -975,6 +952,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
unsigned long behavior)
{
int error;
+ struct anon_vma_name *anon_name;
unsigned long new_flags = vma->vm_flags;
switch (behavior) {
@@ -1040,8 +1018,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
break;
}
+ anon_name = anon_vma_name(vma);
+ anon_vma_name_get(anon_name);
error = madvise_update_vma(vma, prev, start, end, new_flags,
- vma_anon_name(vma));
+ anon_name);
+ anon_vma_name_put(anon_name);
out:
/*
@@ -1091,6 +1072,8 @@ static int madvise_inject_error(int behavior,
pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
pfn, start);
ret = memory_failure(pfn, MF_COUNT_INCREASED);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
}
if (ret)
@@ -1225,7 +1208,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
static int madvise_vma_anon_name(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end,
- unsigned long name)
+ unsigned long anon_name)
{
int error;
@@ -1234,7 +1217,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
return -EBADF;
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
- (const char *)name);
+ (struct anon_vma_name *)anon_name);
/*
* madvise() returns EAGAIN if kernel resources, such as
@@ -1246,7 +1229,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
}
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
- unsigned long len_in, const char *name)
+ unsigned long len_in, struct anon_vma_name *anon_name)
{
unsigned long end;
unsigned long len;
@@ -1266,7 +1249,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
if (end == start)
return 0;
- return madvise_walk_vmas(mm, start, end, (unsigned long)name,
+ return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
madvise_vma_anon_name);
}
#endif /* CONFIG_ANON_VMA_NAME */
@@ -1450,15 +1433,21 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
while (iov_iter_count(&iter)) {
iovec = iov_iter_iovec(&iter);
+ /*
+ * do_madvise returns ENOMEM if unmapped holes are present
+ * in the passed VMA. process_madvise() is expected to skip
+ * unmapped holes passed to it in the 'struct iovec' list
+ * and not fail because of them. Thus treat -ENOMEM return
+ * from do_madvise as valid and continue processing.
+ */
ret = do_madvise(mm, (unsigned long)iovec.iov_base,
iovec.iov_len, behavior);
- if (ret < 0)
+ if (ret < 0 && ret != -ENOMEM)
break;
iov_iter_advance(&iter, iovec.iov_len);
}
- if (ret == 0)
- ret = total_len - iov_iter_count(&iter);
+ ret = (total_len - iov_iter_count(&iter)) ? : ret;
release_mm:
mmput(mm);
diff --git a/mm/memblock.c b/mm/memblock.c
index 1018e50566f3..b12a364f2766 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -366,14 +366,20 @@ void __init memblock_discard(void)
addr = __pa(memblock.reserved.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.reserved.max);
- memblock_free_late(addr, size);
+ if (memblock_reserved_in_slab)
+ kfree(memblock.reserved.regions);
+ else
+ memblock_free_late(addr, size);
}
if (memblock.memory.regions != memblock_memory_init_regions) {
addr = __pa(memblock.memory.regions);
size = PAGE_ALIGN(sizeof(struct memblock_region) *
memblock.memory.max);
- memblock_free_late(addr, size);
+ if (memblock_memory_in_slab)
+ kfree(memblock.memory.regions);
+ else
+ memblock_free_late(addr, size);
}
memblock_memory = NULL;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 09d342c7cbd0..d495c2acb9f0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -53,6 +53,7 @@
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/vmpressure.h>
+#include <linux/memremap.h>
#include <linux/mm_inline.h>
#include <linux/swap_cgroup.h>
#include <linux/cpu.h>
@@ -254,7 +255,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
}
#ifdef CONFIG_MEMCG_KMEM
-extern spinlock_t css_set_lock;
+static DEFINE_SPINLOCK(objcg_lock);
bool mem_cgroup_kmem_disabled(void)
{
@@ -298,9 +299,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
- spin_lock_irqsave(&css_set_lock, flags);
+ spin_lock_irqsave(&objcg_lock, flags);
list_del(&objcg->list);
- spin_unlock_irqrestore(&css_set_lock, flags);
+ spin_unlock_irqrestore(&objcg_lock, flags);
percpu_ref_exit(ref);
kfree_rcu(objcg, rcu);
@@ -332,7 +333,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
- spin_lock_irq(&css_set_lock);
+ spin_lock_irq(&objcg_lock);
/* 1) Ready to reparent active objcg. */
list_add(&objcg->list, &memcg->objcg_list);
@@ -342,54 +343,12 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
/* 3) Move already reparented objcgs to the parent's list */
list_splice(&memcg->objcg_list, &parent->objcg_list);
- spin_unlock_irq(&css_set_lock);
+ spin_unlock_irq(&objcg_lock);
percpu_ref_kill(&objcg->refcnt);
}
/*
- * This will be used as a shrinker list's index.
- * The main reason for not using cgroup id for this:
- * this works better in sparse environments, where we have a lot of memcgs,
- * but only a few kmem-limited. Or also, if we have, for instance, 200
- * memcgs, and none but the 200th is kmem-limited, we'd have to have a
- * 200 entry array for that.
- *
- * The current size of the caches array is stored in memcg_nr_cache_ids. It
- * will double each time we have to increase it.
- */
-static DEFINE_IDA(memcg_cache_ida);
-int memcg_nr_cache_ids;
-
-/* Protects memcg_nr_cache_ids */
-static DECLARE_RWSEM(memcg_cache_ids_sem);
-
-void memcg_get_cache_ids(void)
-{
- down_read(&memcg_cache_ids_sem);
-}
-
-void memcg_put_cache_ids(void)
-{
- up_read(&memcg_cache_ids_sem);
-}
-
-/*
- * MIN_SIZE is different than 1, because we would like to avoid going through
- * the alloc/free process all the time. In a small machine, 4 kmem-limited
- * cgroups is a reasonable guess. In the future, it could be a parameter or
- * tunable, but that is strictly not necessary.
- *
- * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
- * this constant directly from cgroup, but it is understandable that this is
- * better kept as an internal representation in cgroup.c. In any case, the
- * cgrp_id space is not getting any smaller, and we don't have to necessarily
- * increase ours as well if it increases.
- */
-#define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
-
-/*
* A lot of the calls to the cache allocation functions are expected to be
* inlined by the compiler. Since the calls to memcg_slab_pre_alloc_hook() are
* conditional to this static branch, we'll have to allow modules that does
@@ -629,6 +588,35 @@ static DEFINE_SPINLOCK(stats_flush_lock);
static DEFINE_PER_CPU(unsigned int, stats_updates);
static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
+/*
+ * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
+ * not rely on this as part of an acquired spinlock_t lock. These functions are
+ * never used in hardirq context on PREEMPT_RT and therefore disabling preemtion
+ * is sufficient.
+ */
+static void memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+ preempt_disable();
+#else
+ VM_BUG_ON(!irqs_disabled());
+#endif
+}
+
+static void __memcg_stats_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+ preempt_disable();
+#endif
+}
+
+static void memcg_stats_unlock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+ preempt_enable();
+#endif
+}
+
static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
{
unsigned int x;
@@ -705,6 +693,27 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
memcg = pn->memcg;
+ /*
+ * The caller from rmap relay on disabled preemption becase they never
+ * update their counter from in-interrupt context. For these two
+ * counters we check that the update is never performed from an
+ * interrupt context while other caller need to have disabled interrupt.
+ */
+ __memcg_stats_lock();
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ switch (idx) {
+ case NR_ANON_MAPPED:
+ case NR_FILE_MAPPED:
+ case NR_ANON_THPS:
+ case NR_SHMEM_PMDMAPPED:
+ case NR_FILE_PMDMAPPED:
+ WARN_ON_ONCE(!in_task());
+ break;
+ default:
+ WARN_ON_ONCE(!irqs_disabled());
+ }
+ }
+
/* Update memcg */
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
@@ -712,6 +721,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
memcg_rstat_updated(memcg, val);
+ memcg_stats_unlock();
}
/**
@@ -794,8 +804,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
if (mem_cgroup_disabled())
return;
+ memcg_stats_lock();
__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
memcg_rstat_updated(memcg, count);
+ memcg_stats_unlock();
}
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@ -858,6 +870,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
*/
static void memcg_check_events(struct mem_cgroup *memcg, int nid)
{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return;
+
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
@@ -1257,8 +1272,7 @@ struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
* @nr_pages: positive when adding or negative when removing
*
* This function must be called under lru_lock, just before a page is added
- * to or just after a page is removed from an lru list (that ordering being
- * so as to allow it to check that lru_size 0 is consistent with list_empty).
+ * to or just after a page is removed from an lru list.
*/
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int zid, int nr_pages)
@@ -1371,6 +1385,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = {
{ "anon", NR_ANON_MAPPED },
{ "file", NR_FILE_PAGES },
+ { "kernel", MEMCG_KMEM },
{ "kernel_stack", NR_KERNEL_STACK_KB },
{ "pagetables", NR_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B },
@@ -1795,20 +1810,16 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
}
-enum oom_status {
- OOM_SUCCESS,
- OOM_FAILED,
- OOM_ASYNC,
- OOM_SKIPPED
-};
-
-static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+/*
+ * Returns true if successfully killed one or more processes. Though in some
+ * corner cases it can return true even without killing any process.
+ */
+static bool mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
{
- enum oom_status ret;
- bool locked;
+ bool locked, ret;
if (order > PAGE_ALLOC_COSTLY_ORDER)
- return OOM_SKIPPED;
+ return false;
memcg_memory_event(memcg, MEMCG_OOM);
@@ -1831,14 +1842,13 @@ static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int
* victim and then we have to bail out from the charge path.
*/
if (memcg->oom_kill_disable) {
- if (!current->in_user_fault)
- return OOM_SKIPPED;
- css_get(&memcg->css);
- current->memcg_in_oom = memcg;
- current->memcg_oom_gfp_mask = mask;
- current->memcg_oom_order = order;
-
- return OOM_ASYNC;
+ if (current->in_user_fault) {
+ css_get(&memcg->css);
+ current->memcg_in_oom = memcg;
+ current->memcg_oom_gfp_mask = mask;
+ current->memcg_oom_order = order;
+ }
+ return false;
}
mem_cgroup_mark_under_oom(memcg);
@@ -1849,10 +1859,7 @@ static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int
mem_cgroup_oom_notify(memcg);
mem_cgroup_unmark_under_oom(memcg);
- if (mem_cgroup_out_of_memory(memcg, mask, order))
- ret = OOM_SUCCESS;
- else
- ret = OOM_FAILED;
+ ret = mem_cgroup_out_of_memory(memcg, mask, order);
if (locked)
mem_cgroup_oom_unlock(memcg);
@@ -2085,45 +2092,47 @@ void unlock_page_memcg(struct page *page)
folio_memcg_unlock(page_folio(page));
}
-struct obj_stock {
+struct memcg_stock_pcp {
+ local_lock_t stock_lock;
+ struct mem_cgroup *cached; /* this never be root cgroup */
+ unsigned int nr_pages;
+
#ifdef CONFIG_MEMCG_KMEM
struct obj_cgroup *cached_objcg;
struct pglist_data *cached_pgdat;
unsigned int nr_bytes;
int nr_slab_reclaimable_b;
int nr_slab_unreclaimable_b;
-#else
- int dummy[0];
#endif
-};
-
-struct memcg_stock_pcp {
- struct mem_cgroup *cached; /* this never be root cgroup */
- unsigned int nr_pages;
- struct obj_stock task_obj;
- struct obj_stock irq_obj;
struct work_struct work;
unsigned long flags;
#define FLUSHING_CACHED_CHARGE 0
};
-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
+static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
+ .stock_lock = INIT_LOCAL_LOCK(stock_lock),
+};
static DEFINE_MUTEX(percpu_charge_mutex);
#ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct obj_stock *stock);
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg);
+static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages);
#else
-static inline void drain_obj_stock(struct obj_stock *stock)
+static inline struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
{
+ return NULL;
}
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg)
{
return false;
}
+static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
+{
+}
#endif
/**
@@ -2146,7 +2155,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
if (nr_pages > MEMCG_CHARGE_BATCH)
return ret;
- local_irq_save(flags);
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
stock = this_cpu_ptr(&memcg_stock);
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
@@ -2154,7 +2163,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
ret = true;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
return ret;
}
@@ -2183,6 +2192,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
static void drain_local_stock(struct work_struct *dummy)
{
struct memcg_stock_pcp *stock;
+ struct obj_cgroup *old = NULL;
unsigned long flags;
/*
@@ -2190,28 +2200,25 @@ static void drain_local_stock(struct work_struct *dummy)
* drain_stock races is that we always operate on local CPU stock
* here with IRQ disabled
*/
- local_irq_save(flags);
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
stock = this_cpu_ptr(&memcg_stock);
- drain_obj_stock(&stock->irq_obj);
- if (in_task())
- drain_obj_stock(&stock->task_obj);
+ old = drain_obj_stock(stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ if (old)
+ obj_cgroup_put(old);
}
/*
* Cache charges(val) to local per_cpu area.
* This will be consumed by consume_stock() function, later.
*/
-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
- unsigned long flags;
-
- local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
if (stock->cached != memcg) { /* reset if necessary */
@@ -2223,8 +2230,15 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
if (stock->nr_pages > MEMCG_CHARGE_BATCH)
drain_stock(stock);
+}
- local_irq_restore(flags);
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+ unsigned long flags;
+
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ __refill_stock(memcg, nr_pages);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
}
/*
@@ -2244,7 +2258,8 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
* as well as workers from this path always operate on the local
* per-cpu data. CPU up doesn't touch memcg_stock at all.
*/
- curcpu = get_cpu();
+ migrate_disable();
+ curcpu = smp_processor_id();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
@@ -2267,7 +2282,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
schedule_work_on(cpu, &stock->work);
}
}
- put_cpu();
+ migrate_enable();
mutex_unlock(&percpu_charge_mutex);
}
@@ -2541,7 +2556,6 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
int nr_retries = MAX_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct page_counter *counter;
- enum oom_status oom_status;
unsigned long nr_reclaimed;
bool passed_oom = false;
bool may_swap = true;
@@ -2570,15 +2584,6 @@ retry:
}
/*
- * Memcg doesn't have a dedicated reserve for atomic
- * allocations. But like the global atomic pool, we need to
- * put the burden of reclaim on regular allocation requests
- * and let these go through as privileged allocations.
- */
- if (gfp_mask & __GFP_ATOMIC)
- goto force;
-
- /*
* Prevent unbounded recursion when reclaim operations need to
* allocate memory. This might exceed the limits temporarily,
* but we prefer facilitating memory reclaim and getting back
@@ -2644,15 +2649,20 @@ retry:
* a forward progress or bypass the charge if the oom killer
* couldn't make any progress.
*/
- oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
- get_order(nr_pages * PAGE_SIZE));
- if (oom_status == OOM_SUCCESS) {
+ if (mem_cgroup_oom(mem_over_limit, gfp_mask,
+ get_order(nr_pages * PAGE_SIZE))) {
passed_oom = true;
nr_retries = MAX_RECLAIM_RETRIES;
goto retry;
}
nomem:
- if (!(gfp_mask & __GFP_NOFAIL))
+ /*
+ * Memcg doesn't have a dedicated reserve for atomic
+ * allocations. But like the global atomic pool, we need to
+ * put the burden of reclaim on regular allocation requests
+ * and let these go through as privileged allocations.
+ */
+ if (!(gfp_mask & (__GFP_NOFAIL | __GFP_HIGH)))
return -ENOMEM;
force:
/*
@@ -2688,7 +2698,7 @@ done_restock:
READ_ONCE(memcg->swap.high);
/* Don't bother a random interrupted task */
- if (in_interrupt()) {
+ if (!in_task()) {
if (mem_high) {
schedule_work(&memcg->high_work);
break;
@@ -2712,6 +2722,11 @@ done_restock:
}
} while ((memcg = parent_mem_cgroup(memcg)));
+ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
+ !(current->flags & PF_MEMALLOC) &&
+ gfpflags_allow_blocking(gfp_mask)) {
+ mem_cgroup_handle_over_high();
+ }
return 0;
}
@@ -2748,20 +2763,6 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
folio->memcg_data = (unsigned long)memcg;
}
-static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
-{
- struct mem_cgroup *memcg;
-
- rcu_read_lock();
-retry:
- memcg = obj_cgroup_memcg(objcg);
- if (unlikely(!css_tryget(&memcg->css)))
- goto retry;
- rcu_read_unlock();
-
- return memcg;
-}
-
#ifdef CONFIG_MEMCG_KMEM
/*
* The allocated objcg pointers array is not accounted directly.
@@ -2771,41 +2772,6 @@ retry:
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
/*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
- struct memcg_stock_pcp *stock;
-
- if (likely(in_task())) {
- *pflags = 0UL;
- preempt_disable();
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->task_obj;
- }
-
- local_irq_save(*pflags);
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
- if (likely(in_task()))
- preempt_enable();
- else
- local_irq_restore(flags);
-}
-
-/*
* mod_objcg_mlstate() may be called with irq enabled, so
* mod_memcg_lruvec_state() should be used.
*/
@@ -2936,48 +2902,17 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
return objcg;
}
-static int memcg_alloc_cache_id(void)
+static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
- int id, size;
- int err;
-
- id = ida_simple_get(&memcg_cache_ida,
- 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
- if (id < 0)
- return id;
-
- if (id < memcg_nr_cache_ids)
- return id;
-
- /*
- * There's no space for the new id in memcg_caches arrays,
- * so we have to grow them.
- */
- down_write(&memcg_cache_ids_sem);
-
- size = 2 * (id + 1);
- if (size < MEMCG_CACHES_MIN_SIZE)
- size = MEMCG_CACHES_MIN_SIZE;
- else if (size > MEMCG_CACHES_MAX_SIZE)
- size = MEMCG_CACHES_MAX_SIZE;
-
- err = memcg_update_all_list_lrus(size);
- if (!err)
- memcg_nr_cache_ids = size;
-
- up_write(&memcg_cache_ids_sem);
-
- if (err) {
- ida_simple_remove(&memcg_cache_ida, id);
- return err;
+ mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+ if (nr_pages > 0)
+ page_counter_charge(&memcg->kmem, nr_pages);
+ else
+ page_counter_uncharge(&memcg->kmem, -nr_pages);
}
- return id;
}
-static void memcg_free_cache_id(int id)
-{
- ida_simple_remove(&memcg_cache_ida, id);
-}
/*
* obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
@@ -2991,8 +2926,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
memcg = get_mem_cgroup_from_objcg(objcg);
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- page_counter_uncharge(&memcg->kmem, nr_pages);
+ memcg_account_kmem(memcg, -nr_pages);
refill_stock(memcg, nr_pages);
css_put(&memcg->css);
@@ -3018,8 +2952,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret)
goto out;
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- page_counter_charge(&memcg->kmem, nr_pages);
+ memcg_account_kmem(memcg, nr_pages);
out:
css_put(&memcg->css);
@@ -3075,17 +3008,21 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
+ struct memcg_stock_pcp *stock;
+ struct obj_cgroup *old = NULL;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
int *bytes;
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ stock = this_cpu_ptr(&memcg_stock);
+
/*
* Save vmstat data in stock and skip vmstat array update unless
* accumulating over a page of vmstat data or when pgdat or idx
* changes.
*/
if (stock->cached_objcg != objcg) {
- drain_obj_stock(stock);
+ old = drain_obj_stock(stock);
obj_cgroup_get(objcg);
stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
@@ -3129,38 +3066,51 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
if (nr)
mod_objcg_mlstate(objcg, pgdat, idx, nr);
- put_obj_stock(flags);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ if (old)
+ obj_cgroup_put(old);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
{
+ struct memcg_stock_pcp *stock;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
bool ret = false;
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
stock->nr_bytes -= nr_bytes;
ret = true;
}
- put_obj_stock(flags);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
return ret;
}
-static void drain_obj_stock(struct obj_stock *stock)
+static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
{
struct obj_cgroup *old = stock->cached_objcg;
if (!old)
- return;
+ return NULL;
if (stock->nr_bytes) {
unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
- if (nr_pages)
- obj_cgroup_uncharge_pages(old, nr_pages);
+ if (nr_pages) {
+ struct mem_cgroup *memcg;
+
+ memcg = get_mem_cgroup_from_objcg(old);
+
+ memcg_account_kmem(memcg, -nr_pages);
+ __refill_stock(memcg, nr_pages);
+
+ css_put(&memcg->css);
+ }
/*
* The leftover is flushed to the centralized per-memcg value.
@@ -3195,8 +3145,12 @@ static void drain_obj_stock(struct obj_stock *stock)
stock->cached_pgdat = NULL;
}
- obj_cgroup_put(old);
stock->cached_objcg = NULL;
+ /*
+ * The `old' objects needs to be released by the caller via
+ * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
+ */
+ return old;
}
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@@ -3204,13 +3158,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
{
struct mem_cgroup *memcg;
- if (in_task() && stock->task_obj.cached_objcg) {
- memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
- if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
- return true;
- }
- if (stock->irq_obj.cached_objcg) {
- memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
+ if (stock->cached_objcg) {
+ memcg = obj_cgroup_memcg(stock->cached_objcg);
if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
return true;
}
@@ -3221,12 +3170,16 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
bool allow_uncharge)
{
+ struct memcg_stock_pcp *stock;
+ struct obj_cgroup *old = NULL;
unsigned long flags;
- struct obj_stock *stock = get_obj_stock(&flags);
unsigned int nr_pages = 0;
+ local_lock_irqsave(&memcg_stock.stock_lock, flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (stock->cached_objcg != objcg) { /* reset if necessary */
- drain_obj_stock(stock);
+ old = drain_obj_stock(stock);
obj_cgroup_get(objcg);
stock->cached_objcg = objcg;
stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
@@ -3240,7 +3193,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
stock->nr_bytes &= (PAGE_SIZE - 1);
}
- put_obj_stock(flags);
+ local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ if (old)
+ obj_cgroup_put(old);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -3625,28 +3580,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
struct obj_cgroup *objcg;
- int memcg_id;
if (cgroup_memory_nokmem)
return 0;
- BUG_ON(memcg->kmemcg_id >= 0);
-
- memcg_id = memcg_alloc_cache_id();
- if (memcg_id < 0)
- return memcg_id;
+ if (unlikely(mem_cgroup_is_root(memcg)))
+ return 0;
objcg = obj_cgroup_alloc();
- if (!objcg) {
- memcg_free_cache_id(memcg_id);
+ if (!objcg)
return -ENOMEM;
- }
+
objcg->memcg = memcg;
rcu_assign_pointer(memcg->objcg, objcg);
static_branch_enable(&memcg_kmem_enabled_key);
- memcg->kmemcg_id = memcg_id;
+ memcg->kmemcg_id = memcg->id.id;
return 0;
}
@@ -3654,9 +3604,11 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
static void memcg_offline_kmem(struct mem_cgroup *memcg)
{
struct mem_cgroup *parent;
- int kmemcg_id;
- if (memcg->kmemcg_id == -1)
+ if (cgroup_memory_nokmem)
+ return;
+
+ if (unlikely(mem_cgroup_is_root(memcg)))
return;
parent = parent_mem_cgroup(memcg);
@@ -3665,19 +3617,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
memcg_reparent_objcgs(memcg, parent);
- kmemcg_id = memcg->kmemcg_id;
- BUG_ON(kmemcg_id < 0);
-
/*
* After we have finished memcg_reparent_objcgs(), all list_lrus
* corresponding to this cgroup are guaranteed to remain empty.
* The ordering is imposed by list_lru_node->lock taken by
- * memcg_drain_all_list_lrus().
+ * memcg_reparent_list_lrus().
*/
- memcg_drain_all_list_lrus(kmemcg_id, parent);
-
- memcg_free_cache_id(kmemcg_id);
- memcg->kmemcg_id = -1;
+ memcg_reparent_list_lrus(memcg, parent);
}
#else
static int memcg_online_kmem(struct mem_cgroup *memcg)
@@ -3763,8 +3709,12 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
}
break;
case RES_SOFT_LIMIT:
- memcg->soft_limit = nr_pages;
- ret = 0;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ ret = -EOPNOTSUPP;
+ } else {
+ memcg->soft_limit = nr_pages;
+ ret = 0;
+ }
break;
}
return ret ?: nbytes;
@@ -4740,6 +4690,9 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
char *endp;
int ret;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return -EOPNOTSUPP;
+
buf = strstrip(buf);
efd = simple_strtoul(buf, &endp, 10);
@@ -5067,18 +5020,8 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{
struct mem_cgroup_per_node *pn;
- int tmp = node;
- /*
- * This routine is called against possible nodes.
- * But it's BUG to call kmalloc() against offline node.
- *
- * TODO: this routine can waste much memory for nodes which will
- * never be onlined. It's better to use memory hotplug callback
- * function.
- */
- if (!node_state(node, N_NORMAL_MEMORY))
- tmp = -1;
- pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, tmp);
+
+ pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, node);
if (!pn)
return 1;
@@ -5090,8 +5033,6 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
}
lruvec_init(&pn->lruvec);
- pn->usage_in_excess = 0;
- pn->on_tree = false;
pn->memcg = memcg;
memcg->nodeinfo[node] = pn;
@@ -5137,8 +5078,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
return ERR_PTR(error);
memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
- 1, MEM_CGROUP_ID_MAX,
- GFP_KERNEL);
+ 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL);
if (memcg->id.id < 0) {
error = memcg->id.id;
goto fail;
@@ -5192,7 +5132,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
struct mem_cgroup *memcg, *old_memcg;
- long error = -ENOMEM;
old_memcg = set_active_memcg(parent);
memcg = mem_cgroup_alloc();
@@ -5221,34 +5160,26 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
return &memcg->css;
}
- /* The following stuff does not apply to the root */
- error = memcg_online_kmem(memcg);
- if (error)
- goto fail;
-
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
static_branch_inc(&memcg_sockets_enabled_key);
return &memcg->css;
-fail:
- mem_cgroup_id_remove(memcg);
- mem_cgroup_free(memcg);
- return ERR_PTR(error);
}
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ if (memcg_online_kmem(memcg))
+ goto remove_id;
+
/*
* A memcg must be visible for expand_shrinker_info()
* by the time the maps are allocated. So, we allocate maps
* here, when for_each_mem_cgroup() can't skip it.
*/
- if (alloc_shrinker_info(memcg)) {
- mem_cgroup_id_remove(memcg);
- return -ENOMEM;
- }
+ if (alloc_shrinker_info(memcg))
+ goto offline_kmem;
/* Online state pins memcg ID, memcg ID pins CSS */
refcount_set(&memcg->id.ref, 1);
@@ -5258,6 +5189,11 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
2UL*HZ);
return 0;
+offline_kmem:
+ memcg_offline_kmem(memcg);
+remove_id:
+ mem_cgroup_id_remove(memcg);
+ return -ENOMEM;
}
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
@@ -5315,9 +5251,6 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
cancel_work_sync(&memcg->high_work);
mem_cgroup_remove_from_trees(memcg);
free_shrinker_info(memcg);
-
- /* Need to offline kmem if online_css() fails */
- memcg_offline_kmem(memcg);
mem_cgroup_free(memcg);
}
@@ -5503,17 +5436,12 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
return NULL;
/*
- * Handle MEMORY_DEVICE_PRIVATE which are ZONE_DEVICE page belonging to
- * a device and because they are not accessible by CPU they are store
- * as special swap entry in the CPU page table.
+ * Handle device private pages that are not accessible by the CPU, but
+ * stored as special swap entries in the page table.
*/
if (is_device_private_entry(ent)) {
page = pfn_swap_entry_to_page(ent);
- /*
- * MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have
- * a refcount of 1 when free (unlike normal page)
- */
- if (!page_ref_add_unless(page, 1, 1))
+ if (!get_page_unless_zero(page))
return NULL;
return page;
}
@@ -6801,8 +6729,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
if (do_memsw_account())
page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
- page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
+ if (ug->nr_kmem)
+ memcg_account_kmem(ug->memcg, -ug->nr_kmem);
memcg_oom_recover(ug->memcg);
}
@@ -6821,7 +6749,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
long nr_pages;
struct mem_cgroup *memcg;
struct obj_cgroup *objcg;
- bool use_objcg = folio_memcg_kmem(folio);
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
@@ -6830,7 +6757,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
* folio memcg or objcg at this point, we have fully
* exclusive access to the folio.
*/
- if (use_objcg) {
+ if (folio_memcg_kmem(folio)) {
objcg = __folio_objcg(folio);
/*
* This get matches the put at the end of the function and
@@ -6858,7 +6785,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
nr_pages = folio_nr_pages(folio);
- if (use_objcg) {
+ if (folio_memcg_kmem(folio)) {
ug->nr_memory += nr_pages;
ug->nr_kmem += nr_pages;
@@ -6968,7 +6895,7 @@ void mem_cgroup_sk_alloc(struct sock *sk)
return;
/* Do not associate the sock with unrelated interrupted task's memcg. */
- if (in_interrupt())
+ if (!in_task())
return;
rcu_read_lock();
@@ -7053,7 +6980,7 @@ static int __init cgroup_memory(char *s)
if (!strcmp(token, "nokmem"))
cgroup_memory_nokmem = true;
}
- return 0;
+ return 1;
}
__setup("cgroup.memory=", cgroup_memory);
@@ -7121,19 +7048,19 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
/**
* mem_cgroup_swapout - transfer a memsw charge to swap
- * @page: page whose memsw charge to transfer
+ * @folio: folio whose memsw charge to transfer
* @entry: swap entry to move the charge to
*
- * Transfer the memsw charge of @page to @entry.
+ * Transfer the memsw charge of @folio to @entry.
*/
-void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
{
struct mem_cgroup *memcg, *swap_memcg;
unsigned int nr_entries;
unsigned short oldid;
- VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+ VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
if (mem_cgroup_disabled())
return;
@@ -7141,9 +7068,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
- memcg = page_memcg(page);
+ memcg = folio_memcg(folio);
- VM_WARN_ON_ONCE_PAGE(!memcg, page);
+ VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
if (!memcg)
return;
@@ -7153,16 +7080,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
* ancestor for the swap instead and transfer the memory+swap charge.
*/
swap_memcg = mem_cgroup_id_get_online(memcg);
- nr_entries = thp_nr_pages(page);
+ nr_entries = folio_nr_pages(folio);
/* Get references for the tail pages, too */
if (nr_entries > 1)
mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg),
nr_entries);
- VM_BUG_ON_PAGE(oldid, page);
+ VM_BUG_ON_FOLIO(oldid, folio);
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
- page->memcg_data = 0;
+ folio->memcg_data = 0;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, nr_entries);
@@ -7179,9 +7106,10 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
* important here to have the interrupts disabled because it is the
* only synchronisation we have for updating the per-CPU variables.
*/
- VM_BUG_ON(!irqs_disabled());
+ memcg_stats_lock();
mem_cgroup_charge_statistics(memcg, -nr_entries);
- memcg_check_events(memcg, page_to_nid(page));
+ memcg_stats_unlock();
+ memcg_check_events(memcg, folio_nid(folio));
css_put(&memcg->css);
}
diff --git a/mm/memfd.c b/mm/memfd.c
index 9f80f162791a..08f5f8304746 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -31,20 +31,28 @@
static void memfd_tag_pins(struct xa_state *xas)
{
struct page *page;
- unsigned int tagged = 0;
+ int latency = 0;
+ int cache_count;
lru_add_drain();
xas_lock_irq(xas);
xas_for_each(xas, page, ULONG_MAX) {
- if (xa_is_value(page))
- continue;
- page = find_subpage(page, xas->xa_index);
- if (page_count(page) - page_mapcount(page) > 1)
+ cache_count = 1;
+ if (!xa_is_value(page) &&
+ PageTransHuge(page) && !PageHuge(page))
+ cache_count = HPAGE_PMD_NR;
+
+ if (!xa_is_value(page) &&
+ page_count(page) - total_mapcount(page) != cache_count)
xas_set_mark(xas, MEMFD_TAG_PINNED);
+ if (cache_count != 1)
+ xas_set(xas, page->index + cache_count);
- if (++tagged % XA_CHECK_SCHED)
+ latency += cache_count;
+ if (latency < XA_CHECK_SCHED)
continue;
+ latency = 0;
xas_pause(xas);
xas_unlock_irq(xas);
@@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping)
error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) {
- unsigned int tagged = 0;
+ int latency = 0;
+ int cache_count;
if (!xas_marked(&xas, MEMFD_TAG_PINNED))
break;
@@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping)
xas_lock_irq(&xas);
xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
bool clear = true;
- if (xa_is_value(page))
- continue;
- page = find_subpage(page, xas.xa_index);
- if (page_count(page) - page_mapcount(page) != 1) {
+
+ cache_count = 1;
+ if (!xa_is_value(page) &&
+ PageTransHuge(page) && !PageHuge(page))
+ cache_count = HPAGE_PMD_NR;
+
+ if (!xa_is_value(page) && cache_count !=
+ page_count(page) - total_mapcount(page)) {
/*
* On the last scan, we clean up all those tags
* we inserted; but make a note that we still
@@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping)
}
if (clear)
xas_clear_mark(&xas, MEMFD_TAG_PINNED);
- if (++tagged % XA_CHECK_SCHED)
+
+ latency += cache_count;
+ if (latency < XA_CHECK_SCHED)
continue;
+ latency = 0;
xas_pause(&xas);
xas_unlock_irq(&xas);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 14ae5c18e776..dcb6bb9cf731 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -130,12 +130,6 @@ static int hwpoison_filter_dev(struct page *p)
hwpoison_filter_dev_minor == ~0U)
return 0;
- /*
- * page_mapping() does not accept slab pages.
- */
- if (PageSlab(p))
- return -EINVAL;
-
mapping = page_mapping(p);
if (mapping == NULL || mapping->host == NULL)
return -EINVAL;
@@ -258,16 +252,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid);
- if (flags & MF_ACTION_REQUIRED) {
- if (t == current)
- ret = force_sig_mceerr(BUS_MCEERR_AR,
- (void __user *)tk->addr, addr_lsb);
- else
- /* Signal other processes sharing the page if they have PF_MCE_EARLY set. */
- ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
- addr_lsb, t);
- } else {
+ if ((flags & MF_ACTION_REQUIRED) && (t == current))
+ ret = force_sig_mceerr(BUS_MCEERR_AR,
+ (void __user *)tk->addr, addr_lsb);
+ else
/*
+ * Signal other processes sharing the page if they have
+ * PF_MCE_EARLY set.
* Don't use force here, it's convenient if the signal
* can be temporarily blocked.
* This could cause a loop when the user sets SIGBUS
@@ -275,7 +266,6 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
*/
ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
addr_lsb, t); /* synchronous? */
- }
if (ret < 0)
pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
@@ -315,6 +305,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
pmd_t *pmd;
pte_t *pte;
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
pgd = pgd_offset(vma->vm_mm, address);
if (!pgd_present(*pgd))
return 0;
@@ -487,12 +478,13 @@ static struct task_struct *task_early_kill(struct task_struct *tsk,
static void collect_procs_anon(struct page *page, struct list_head *to_kill,
int force_early)
{
+ struct folio *folio = page_folio(page);
struct vm_area_struct *vma;
struct task_struct *tsk;
struct anon_vma *av;
pgoff_t pgoff;
- av = page_lock_anon_vma_read(page);
+ av = folio_lock_anon_vma_read(folio);
if (av == NULL) /* Not actually mapped anymore */
return;
@@ -707,8 +699,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
+ else
+ ret = 0;
mmap_read_unlock(p->mm);
- return ret ? -EFAULT : -EHWPOISON;
+ return ret > 0 ? -EHWPOISON : -EFAULT;
}
static const char *action_name[] = {
@@ -739,6 +733,7 @@ static const char * const action_page_types[] = {
[MF_MSG_BUDDY] = "free buddy page",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
+ [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -1182,12 +1177,18 @@ void ClearPageHWPoisonTakenOff(struct page *page)
* does not return true for hugetlb or device memory pages, so it's assumed
* to be called only in the context where we never have such pages.
*/
-static inline bool HWPoisonHandlable(struct page *page)
+static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
{
- return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page);
+ bool movable = false;
+
+ /* Soft offline could mirgate non-LRU movable pages */
+ if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
+ movable = true;
+
+ return movable || PageLRU(page) || is_free_buddy_page(page);
}
-static int __get_hwpoison_page(struct page *page)
+static int __get_hwpoison_page(struct page *page, unsigned long flags)
{
struct page *head = compound_head(page);
int ret = 0;
@@ -1202,7 +1203,7 @@ static int __get_hwpoison_page(struct page *page)
* for any unsupported type of page in order to reduce the risk of
* unexpected races caused by taking a page refcount.
*/
- if (!HWPoisonHandlable(head))
+ if (!HWPoisonHandlable(head, flags))
return -EBUSY;
if (get_page_unless_zero(head)) {
@@ -1227,7 +1228,7 @@ static int get_any_page(struct page *p, unsigned long flags)
try_again:
if (!count_increased) {
- ret = __get_hwpoison_page(p);
+ ret = __get_hwpoison_page(p, flags);
if (!ret) {
if (page_count(p)) {
/* We raced with an allocation, retry. */
@@ -1255,7 +1256,7 @@ try_again:
}
}
- if (PageHuge(p) || HWPoisonHandlable(p)) {
+ if (PageHuge(p) || HWPoisonHandlable(p, flags)) {
ret = 1;
} else {
/*
@@ -1347,6 +1348,7 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
int flags, struct page *hpage)
{
+ struct folio *folio = page_folio(hpage);
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
struct address_space *mapping;
LIST_HEAD(tokill);
@@ -1411,26 +1413,22 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
if (kill)
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
- if (!PageHuge(hpage)) {
- try_to_unmap(hpage, ttu);
+ if (PageHuge(hpage) && !PageAnon(hpage)) {
+ /*
+ * For hugetlb pages in shared mappings, try_to_unmap
+ * could potentially call huge_pmd_unshare. Because of
+ * this, take semaphore in write mode here and set
+ * TTU_RMAP_LOCKED to indicate we have taken the lock
+ * at this higher level.
+ */
+ mapping = hugetlb_page_mapping_lock_write(hpage);
+ if (mapping) {
+ try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
+ i_mmap_unlock_write(mapping);
+ } else
+ pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
} else {
- if (!PageAnon(hpage)) {
- /*
- * For hugetlb pages in shared mappings, try_to_unmap
- * could potentially call huge_pmd_unshare. Because of
- * this, take semaphore in write mode here and set
- * TTU_RMAP_LOCKED to indicate we have taken the lock
- * at this higher level.
- */
- mapping = hugetlb_page_mapping_lock_write(hpage);
- if (mapping) {
- try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
- i_mmap_unlock_write(mapping);
- } else
- pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
- } else {
- try_to_unmap(hpage, ttu);
- }
+ try_to_unmap(folio, ttu);
}
unmap_success = !page_mapped(hpage);
@@ -1526,7 +1524,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestClearPageHWPoison(head))
num_poisoned_pages_dec();
unlock_page(head);
- return 0;
+ return -EOPNOTSUPP;
}
unlock_page(head);
res = MF_FAILED;
@@ -1543,8 +1541,27 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
}
lock_page(head);
+
+ /*
+ * The page could have changed compound pages due to race window.
+ * If this happens just bail out.
+ */
+ if (!PageHuge(p) || compound_head(p) != head) {
+ action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
+ res = -EBUSY;
+ goto out;
+ }
+
page_flags = head->flags;
+ if (hwpoison_filter(p)) {
+ if (TestClearPageHWPoison(head))
+ num_poisoned_pages_dec();
+ put_page(p);
+ res = -EOPNOTSUPP;
+ goto out;
+ }
+
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
@@ -1596,6 +1613,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
}
/*
+ * Pages instantiated by device-dax (not filesystem-dax)
+ * may be compound pages.
+ */
+ page = compound_head(page);
+
+ /*
* Prevent the inode from being freed while we are interrogating
* the address_space, typically this would be handled by
* lock_page(), but dax pages do not use the page lock. This
@@ -1607,7 +1630,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
goto out;
if (hwpoison_filter(page)) {
- rc = 0;
+ rc = -EOPNOTSUPP;
goto unlock;
}
@@ -1632,7 +1655,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
* SIGBUS (i.e. MF_MUST_KILL)
*/
flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
- collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
+ collect_procs(page, &tokill, true);
list_for_each_entry(tk, &tokill, nd)
if (tk->size_shift)
@@ -1647,7 +1670,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
start = (page->index << PAGE_SHIFT) & ~(size - 1);
unmap_mapping_range(page->mapping, start, size, 0);
}
- kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags);
+ kill_procs(&tokill, true, false, pfn, flags);
rc = 0;
unlock:
dax_unlock_page(page, cookie);
@@ -1676,12 +1699,15 @@ static DEFINE_MUTEX(mf_mutex);
*
* Must run in process context (e.g. a work queue) with interrupts
* enabled and no spinlocks hold.
+ *
+ * Return: 0 for successfully handled the memory error,
+ * -EOPNOTSUPP for memory_filter() filtered the error event,
+ * < 0(except -EOPNOTSUPP) on failure.
*/
int memory_failure(unsigned long pfn, int flags)
{
struct page *p;
struct page *hpage;
- struct page *orig_head;
struct dev_pagemap *pgmap;
int res = 0;
unsigned long page_flags;
@@ -1727,7 +1753,7 @@ try_again:
goto unlock_mutex;
}
- orig_head = hpage = compound_head(p);
+ hpage = compound_head(p);
num_poisoned_pages_inc();
/*
@@ -1808,10 +1834,21 @@ try_again:
lock_page(p);
/*
- * The page could have changed compound pages during the locking.
- * If this happens just bail out.
+ * We're only intended to deal with the non-Compound page here.
+ * However, the page could have changed compound pages due to
+ * race window. If this happens, we could try again to hopefully
+ * handle the page next round.
*/
- if (PageCompound(p) && compound_head(p) != orig_head) {
+ if (PageCompound(p)) {
+ if (retry) {
+ if (TestClearPageHWPoison(p))
+ num_poisoned_pages_dec();
+ unlock_page(p);
+ put_page(p);
+ flags &= ~MF_COUNT_INCREASED;
+ retry = false;
+ goto try_again;
+ }
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
res = -EBUSY;
goto unlock_page;
@@ -1831,6 +1868,7 @@ try_again:
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
+ res = -EOPNOTSUPP;
goto unlock_mutex;
}
@@ -1839,7 +1877,7 @@ try_again:
* page_lock. We need wait writeback completion for this page or it
* may trigger vfs BUG while evict inode.
*/
- if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
+ if (!PageLRU(p) && !PageWriteback(p))
goto identify_page_state;
/*
@@ -2133,7 +2171,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
*/
static int __soft_offline_page(struct page *page)
{
- int ret = 0;
+ long ret = 0;
unsigned long pfn = page_to_pfn(page);
struct page *hpage = compound_head(page);
char const *msg_page[] = {"page", "hugepage"};
@@ -2144,12 +2182,6 @@ static int __soft_offline_page(struct page *page)
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- /*
- * Check PageHWPoison again inside page lock because PageHWPoison
- * is set by memory_failure() outside page lock. Note that
- * memory_failure() also double-checks PageHWPoison inside page lock,
- * so there's no race between soft_offline_page() and memory_failure().
- */
lock_page(page);
if (!PageHuge(page))
wait_on_page_writeback(page);
@@ -2160,7 +2192,7 @@ static int __soft_offline_page(struct page *page)
return 0;
}
- if (!PageHuge(page))
+ if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page))
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
@@ -2168,10 +2200,6 @@ static int __soft_offline_page(struct page *page)
ret = invalidate_inode_page(page);
unlock_page(page);
- /*
- * RED-PEN would be better to keep it isolated here, but we
- * would need to fix isolation locking first.
- */
if (ret) {
pr_info("soft_offline: %#lx: invalidated\n", pfn);
page_handle_poison(page, false, true);
@@ -2190,7 +2218,7 @@ static int __soft_offline_page(struct page *page)
if (!list_empty(&pagelist))
putback_movable_pages(&pagelist);
- pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n",
+ pr_info("soft offline: %#lx: %s migration failed %ld, type %pGp\n",
pfn, msg_page[huge], ret, &page->flags);
if (ret > 0)
ret = -EBUSY;
@@ -2282,7 +2310,7 @@ int soft_offline_page(unsigned long pfn, int flags)
retry:
get_online_mems();
- ret = get_hwpoison_page(page, flags);
+ ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
put_online_mems();
if (ret > 0) {
diff --git a/mm/memory.c b/mm/memory.c
index c125c4969913..7c40850b7124 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -735,9 +735,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
set_pte_at(vma->vm_mm, address, ptep, pte);
- if (vma->vm_flags & VM_LOCKED)
- mlock_vma_page(page);
-
/*
* No need to invalidate - it was non-present before. However
* secondary CPUs may have mappings that need invalidating.
@@ -1309,22 +1306,34 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
- struct address_space *zap_mapping; /* Check page->mapping if set */
struct folio *single_folio; /* Locked folio to be unmapped */
+ bool even_cows; /* Zap COWed private pages too? */
};
-/*
- * We set details->zap_mapping when we want to unmap shared but keep private
- * pages. Return true if skip zapping this page, false otherwise.
- */
-static inline bool
-zap_skip_check_mapping(struct zap_details *details, struct page *page)
+/* Whether we should zap all COWed (private) pages too */
+static inline bool should_zap_cows(struct zap_details *details)
{
- if (!details || !page)
- return false;
+ /* By default, zap all pages */
+ if (!details)
+ return true;
+
+ /* Or, we zap COWed pages only if the caller wants to */
+ return details->even_cows;
+}
+
+/* Decides whether we should zap this page with the page pointer specified */
+static inline bool should_zap_page(struct zap_details *details, struct page *page)
+{
+ /* If we can make a decision without *page.. */
+ if (should_zap_cows(details))
+ return true;
- return details->zap_mapping &&
- (details->zap_mapping != page_rmapping(page));
+ /* E.g. the caller passes NULL for the case of a zero page */
+ if (!page)
+ return true;
+
+ /* Otherwise we should only zap non-anon pages */
+ return !PageAnon(page);
}
static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -1349,6 +1358,8 @@ again:
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = *pte;
+ struct page *page;
+
if (pte_none(ptent))
continue;
@@ -1356,10 +1367,8 @@ again:
break;
if (pte_present(ptent)) {
- struct page *page;
-
page = vm_normal_page(vma, addr, ptent);
- if (unlikely(zap_skip_check_mapping(details, page)))
+ if (unlikely(!should_zap_page(details, page)))
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
@@ -1377,7 +1386,7 @@ again:
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
- page_remove_rmap(page, false);
+ page_remove_rmap(page, vma, false);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
if (unlikely(__tlb_remove_page(tlb, page))) {
@@ -1391,34 +1400,32 @@ again:
entry = pte_to_swp_entry(ptent);
if (is_device_private_entry(entry) ||
is_device_exclusive_entry(entry)) {
- struct page *page = pfn_swap_entry_to_page(entry);
-
- if (unlikely(zap_skip_check_mapping(details, page)))
+ page = pfn_swap_entry_to_page(entry);
+ if (unlikely(!should_zap_page(details, page)))
continue;
- pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
rss[mm_counter(page)]--;
-
if (is_device_private_entry(entry))
- page_remove_rmap(page, false);
-
+ page_remove_rmap(page, vma, false);
put_page(page);
- continue;
- }
-
- /* If details->check_mapping, we leave swap entries. */
- if (unlikely(details))
- continue;
-
- if (!non_swap_entry(entry))
+ } else if (!non_swap_entry(entry)) {
+ /* Genuine swap entry, hence a private anon page */
+ if (!should_zap_cows(details))
+ continue;
rss[MM_SWAPENTS]--;
- else if (is_migration_entry(entry)) {
- struct page *page;
-
+ if (unlikely(!free_swap_and_cache(entry)))
+ print_bad_pte(vma, addr, ptent, NULL);
+ } else if (is_migration_entry(entry)) {
page = pfn_swap_entry_to_page(entry);
+ if (!should_zap_page(details, page))
+ continue;
rss[mm_counter(page)]--;
+ } else if (is_hwpoison_entry(entry)) {
+ if (!should_zap_cows(details))
+ continue;
+ } else {
+ /* We should have covered all the swap entry types */
+ WARN_ON_ONCE(1);
}
- if (unlikely(!free_swap_and_cache(entry)))
- print_bad_pte(vma, addr, ptent, NULL);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1705,7 +1712,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size)
{
- if (address < vma->vm_start || address + size > vma->vm_end ||
+ if (!range_in_vma(vma, address, address + size) ||
!(vma->vm_flags & VM_PFNMAP))
return;
@@ -1753,16 +1760,16 @@ static int validate_page_before_insert(struct page *page)
return 0;
}
-static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
+static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
if (!pte_none(*pte))
return -EBUSY;
/* Ok, finally just insert the thing.. */
get_page(page);
- inc_mm_counter_fast(mm, mm_counter_file(page));
- page_add_file_rmap(page, false);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
+ page_add_file_rmap(page, vma, false);
+ set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
return 0;
}
@@ -1776,7 +1783,6 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
static int insert_page(struct vm_area_struct *vma, unsigned long addr,
struct page *page, pgprot_t prot)
{
- struct mm_struct *mm = vma->vm_mm;
int retval;
pte_t *pte;
spinlock_t *ptl;
@@ -1785,17 +1791,17 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
if (retval)
goto out;
retval = -ENOMEM;
- pte = get_locked_pte(mm, addr, &ptl);
+ pte = get_locked_pte(vma->vm_mm, addr, &ptl);
if (!pte)
goto out;
- retval = insert_page_into_pte_locked(mm, pte, addr, page, prot);
+ retval = insert_page_into_pte_locked(vma, pte, addr, page, prot);
pte_unmap_unlock(pte, ptl);
out:
return retval;
}
#ifdef pte_index
-static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
+static int insert_page_in_batch_locked(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
int err;
@@ -1805,7 +1811,7 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
err = validate_page_before_insert(page);
if (err)
return err;
- return insert_page_into_pte_locked(mm, pte, addr, page, prot);
+ return insert_page_into_pte_locked(vma, pte, addr, page, prot);
}
/* insert_pages() amortizes the cost of spinlock operations
@@ -1842,7 +1848,7 @@ more:
start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
- int err = insert_page_in_batch_locked(mm, pte,
+ int err = insert_page_in_batch_locked(vma, pte,
addr, pages[curr_page_idx], prot);
if (unlikely(err)) {
pte_unmap_unlock(start_pte, pte_lock);
@@ -3098,7 +3104,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
* mapcount is visible. So transitively, TLBs to
* old page will be flushed before it can be reused.
*/
- page_remove_rmap(old_page, false);
+ page_remove_rmap(old_page, vma, false);
}
/* Free the old page.. */
@@ -3118,16 +3124,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
*/
mmu_notifier_invalidate_range_only_end(&range);
if (old_page) {
- /*
- * Don't let another task, with possibly unlocked vma,
- * keep the mlocked page.
- */
- if (page_copied && (vma->vm_flags & VM_LOCKED)) {
- lock_page(old_page); /* LRU manipulation */
- if (PageMlocked(old_page))
- munlock_vma_page(old_page);
- unlock_page(old_page);
- }
if (page_copied)
free_swap_cache(old_page);
put_page(old_page);
@@ -3340,12 +3336,8 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
vma_interval_tree_foreach(vma, root, first_index, last_index) {
vba = vma->vm_pgoff;
vea = vba + vma_pages(vma) - 1;
- zba = first_index;
- if (zba < vba)
- zba = vba;
- zea = last_index;
- if (zea > vea)
- zea = vea;
+ zba = max(first_index, vba);
+ zea = min(last_index, vea);
unmap_mapping_range_vma(vma,
((zba - vba) << PAGE_SHIFT) + vma->vm_start,
@@ -3377,7 +3369,7 @@ void unmap_mapping_folio(struct folio *folio)
first_index = folio->index;
last_index = folio->index + folio_nr_pages(folio) - 1;
- details.zap_mapping = mapping;
+ details.even_cows = false;
details.single_folio = folio;
i_mmap_lock_write(mapping);
@@ -3406,7 +3398,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
pgoff_t first_index = start;
pgoff_t last_index = start + nr - 1;
- details.zap_mapping = even_cows ? NULL : mapping;
+ details.even_cows = even_cows;
if (last_index < first_index)
last_index = ULONG_MAX;
@@ -3871,11 +3863,16 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
return ret;
if (unlikely(PageHWPoison(vmf->page))) {
- if (ret & VM_FAULT_LOCKED)
+ vm_fault_t poisonret = VM_FAULT_HWPOISON;
+ if (ret & VM_FAULT_LOCKED) {
+ /* Retry if a clean page was removed from the cache. */
+ if (invalidate_inode_page(vmf->page))
+ poisonret = 0;
unlock_page(vmf->page);
+ }
put_page(vmf->page);
vmf->page = NULL;
- return VM_FAULT_HWPOISON;
+ return poisonret;
}
if (unlikely(!(ret & VM_FAULT_LOCKED)))
@@ -3947,7 +3944,8 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
- page_add_file_rmap(page, true);
+ page_add_file_rmap(page, vma, true);
+
/*
* deposit and withdraw with pmd lock held
*/
@@ -3996,7 +3994,7 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
lru_cache_add_inactive_or_unevictable(page, vma);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
- page_add_file_rmap(page, false);
+ page_add_file_rmap(page, vma, false);
}
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
}
@@ -4622,6 +4620,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
struct vm_fault vmf = {
.vma = vma,
.address = address & PAGE_MASK,
+ .real_address = address,
.flags = flags,
.pgoff = linear_page_index(vma, address),
.gfp_mask = __get_fault_gfp_mask(vma),
@@ -5444,6 +5443,8 @@ long copy_huge_page_from_user(struct page *dst_page,
if (rc)
break;
+ flush_dcache_page(subpage);
+
cond_resched();
}
return ret_val;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2a9627dc784c..416b38ca8def 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -295,12 +295,6 @@ struct page *pfn_to_online_page(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(pfn_to_online_page);
-/*
- * Reasonably generic function for adding memory. It is
- * expected that archs that support memory hotplug will
- * call this function after deciding the zone to which to
- * add the new pages.
- */
int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
struct mhp_params *params)
{
@@ -829,7 +823,7 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
struct pglist_data *pgdat = NODE_DATA(nid);
int zid;
- for (zid = 0; zid <= ZONE_NORMAL; zid++) {
+ for (zid = 0; zid < ZONE_NORMAL; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
if (zone_intersects(zone, start_pfn, nr_pages))
@@ -1162,43 +1156,20 @@ static void reset_node_present_pages(pg_data_t *pgdat)
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-static pg_data_t __ref *hotadd_new_pgdat(int nid)
+static pg_data_t __ref *hotadd_init_pgdat(int nid)
{
struct pglist_data *pgdat;
+ /*
+ * NODE_DATA is preallocated (free_area_init) but its internal
+ * state is not allocated completely. Add missing pieces.
+ * Completely offline nodes stay around and they just need
+ * reintialization.
+ */
pgdat = NODE_DATA(nid);
- if (!pgdat) {
- pgdat = arch_alloc_nodedata(nid);
- if (!pgdat)
- return NULL;
-
- pgdat->per_cpu_nodestats =
- alloc_percpu(struct per_cpu_nodestat);
- arch_refresh_nodedata(nid, pgdat);
- } else {
- int cpu;
- /*
- * Reset the nr_zones, order and highest_zoneidx before reuse.
- * Note that kswapd will init kswapd_highest_zoneidx properly
- * when it starts in the near future.
- */
- pgdat->nr_zones = 0;
- pgdat->kswapd_order = 0;
- pgdat->kswapd_highest_zoneidx = 0;
- for_each_online_cpu(cpu) {
- struct per_cpu_nodestat *p;
-
- p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
- memset(p, 0, sizeof(*p));
- }
- }
-
- /* we can use NODE_DATA(nid) from here */
- pgdat->node_id = nid;
- pgdat->node_start_pfn = 0;
/* init node's zones as empty zones, we don't have any present pages.*/
- free_area_init_core_hotplug(nid);
+ free_area_init_core_hotplug(pgdat);
/*
* The node we allocated has no zone fallback lists. For avoiding
@@ -1210,6 +1181,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid)
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
+ * TODO: should be in free_area_init_core_hotplug?
*/
reset_node_managed_pages(pgdat);
reset_node_present_pages(pgdat);
@@ -1217,16 +1189,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid)
return pgdat;
}
-static void rollback_node_hotadd(int nid)
-{
- pg_data_t *pgdat = NODE_DATA(nid);
-
- arch_refresh_nodedata(nid, NULL);
- free_percpu(pgdat->per_cpu_nodestats);
- arch_free_nodedata(pgdat);
-}
-
-
/*
* __try_online_node - online a node if offlined
* @nid: the node ID
@@ -1246,7 +1208,7 @@ static int __try_online_node(int nid, bool set_node_online)
if (node_online(nid))
return 0;
- pgdat = hotadd_new_pgdat(nid);
+ pgdat = hotadd_init_pgdat(nid);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
ret = -ENOMEM;
@@ -1327,7 +1289,7 @@ bool mhp_supports_memmap_on_memory(unsigned long size)
* populate a single PMD.
*/
return memmap_on_memory &&
- !hugetlb_free_vmemmap_enabled &&
+ !hugetlb_free_vmemmap_enabled() &&
IS_ENABLED(CONFIG_MHP_MEMMAP_ON_MEMORY) &&
size == memory_block_size_bytes() &&
IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
@@ -1421,9 +1383,9 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
BUG_ON(ret);
}
- /* link memory sections under this node.*/
- link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
- MEMINIT_HOTPLUG);
+ register_memory_blocks_under_node(nid, PFN_DOWN(start),
+ PFN_UP(start + size - 1),
+ MEMINIT_HOTPLUG);
/* create new memmap entry */
if (!strcmp(res->name, "System RAM"))
@@ -1445,9 +1407,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
return ret;
error:
- /* rollback pgdat allocation and others */
- if (new_node)
- rollback_node_hotadd(nid);
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size);
error_mem_hotplug_end:
@@ -1590,38 +1549,6 @@ bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
- * Confirm all pages in a range [start, end) belong to the same zone (skipping
- * memory holes). When true, return the zone.
- */
-struct zone *test_pages_in_a_zone(unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long pfn, sec_end_pfn;
- struct zone *zone = NULL;
- struct page *page;
-
- for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
- pfn < end_pfn;
- pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
- /* Make sure the memory section is present first */
- if (!present_section_nr(pfn_to_section_nr(pfn)))
- continue;
- for (; pfn < sec_end_pfn && pfn < end_pfn;
- pfn += MAX_ORDER_NR_PAGES) {
- /* Check if we got outside of the zone */
- if (zone && !zone_spans_pfn(zone, pfn))
- return NULL;
- page = pfn_to_page(pfn);
- if (zone && page_zone(page) != zone)
- return NULL;
- zone = page_zone(page);
- }
- }
-
- return zone;
-}
-
-/*
* Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
* non-lru movable pages and hugepages). Will skip over most unmovable
* pages (esp., pages that can be skipped when offlining), but bail out on
@@ -1690,10 +1617,13 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
DEFAULT_RATELIMIT_BURST);
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ struct folio *folio;
+
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
- head = compound_head(page);
+ folio = page_folio(page);
+ head = &folio->page;
if (PageHuge(page)) {
pfn = page_to_pfn(head) + compound_nr(head) - 1;
@@ -1710,10 +1640,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
* the unmap as the catch all safety net).
*/
if (PageHWPoison(page)) {
- if (WARN_ON(PageLRU(page)))
- isolate_lru_page(page);
- if (page_mapped(page))
- try_to_unmap(page, TTU_IGNORE_MLOCK);
+ if (WARN_ON(folio_test_lru(folio)))
+ folio_isolate_lru(folio);
+ if (folio_mapped(folio))
+ try_to_unmap(folio, TTU_IGNORE_MLOCK);
continue;
}
@@ -1844,15 +1774,15 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
}
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
- struct memory_group *group)
+ struct zone *zone, struct memory_group *group)
{
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn, system_ram_pages = 0;
+ const int node = zone_to_nid(zone);
unsigned long flags;
- struct zone *zone;
struct memory_notify arg;
- int ret, node;
char *reason;
+ int ret;
/*
* {on,off}lining is constrained to full memory sections (or more
@@ -1884,15 +1814,17 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
goto failed_removal;
}
- /* This makes hotplug much easier...and readable.
- we assume this for now. .*/
- zone = test_pages_in_a_zone(start_pfn, end_pfn);
- if (!zone) {
+ /*
+ * We only support offlining of memory blocks managed by a single zone,
+ * checked by calling code. This is just a sanity check that we might
+ * want to remove in the future.
+ */
+ if (WARN_ON_ONCE(page_zone(pfn_to_page(start_pfn)) != zone ||
+ page_zone(pfn_to_page(end_pfn - 1)) != zone)) {
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
}
- node = zone_to_nid(zone);
/*
* Disable pcplists so that page isolation cannot race with freeing
@@ -2004,6 +1936,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
return 0;
failed_removal_isolated:
+ /* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal_pcplists_disabled:
@@ -2014,7 +1947,6 @@ failed_removal:
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- /* pushback to free area */
mem_hotplug_done();
return ret;
}
@@ -2046,12 +1978,12 @@ static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
return mem->nr_vmemmap_pages;
}
-static int check_cpu_on_node(pg_data_t *pgdat)
+static int check_cpu_on_node(int nid)
{
int cpu;
for_each_present_cpu(cpu) {
- if (cpu_to_node(cpu) == pgdat->node_id)
+ if (cpu_to_node(cpu) == nid)
/*
* the cpu on this node isn't removed, and we can't
* offline this node.
@@ -2085,7 +2017,6 @@ static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
*/
void try_offline_node(int nid)
{
- pg_data_t *pgdat = NODE_DATA(nid);
int rc;
/*
@@ -2093,7 +2024,7 @@ void try_offline_node(int nid)
* offline it. A node spans memory after move_pfn_range_to_zone(),
* e.g., after the memory block was onlined.
*/
- if (pgdat->node_spanned_pages)
+ if (node_spanned_pages(nid))
return;
/*
@@ -2105,7 +2036,7 @@ void try_offline_node(int nid)
if (rc)
return;
- if (check_cpu_on_node(pgdat))
+ if (check_cpu_on_node(nid))
return;
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 028e8dd82b44..a2516d31db6c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -786,7 +786,6 @@ static int vma_replace_policy(struct vm_area_struct *vma,
static int mbind_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{
- struct vm_area_struct *next;
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
@@ -801,8 +800,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
if (start > vma->vm_start)
prev = vma;
- for (; vma && vma->vm_start < end; prev = vma, vma = next) {
- next = vma->vm_next;
+ for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) {
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
@@ -814,13 +812,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
new_pol, vma->vm_userfaultfd_ctx,
- vma_anon_name(vma));
+ anon_vma_name(vma));
if (prev) {
vma = prev;
- next = vma->vm_next;
- if (mpol_equal(vma_policy(vma), new_pol))
- continue;
- /* vma_merge() joined vma && vma->next, case 8 */
goto replace;
}
if (vma->vm_start != vmstart) {
@@ -907,17 +901,14 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
static int lookup_node(struct mm_struct *mm, unsigned long addr)
{
struct page *p = NULL;
- int err;
+ int ret;
- int locked = 1;
- err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
- if (err > 0) {
- err = page_to_nid(p);
+ ret = get_user_pages_fast(addr & PAGE_MASK, 1, 0, &p);
+ if (ret > 0) {
+ ret = page_to_nid(p);
put_page(p);
}
- if (locked)
- mmap_read_unlock(mm);
- return err;
+ return ret;
}
/* Retrieve NUMA policy */
@@ -968,14 +959,14 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
if (flags & MPOL_F_NODE) {
if (flags & MPOL_F_ADDR) {
/*
- * Take a refcount on the mpol, lookup_node()
- * will drop the mmap_lock, so after calling
- * lookup_node() only "pol" remains valid, "vma"
- * is stale.
+ * Take a refcount on the mpol, because we are about to
+ * drop the mmap_lock, after which only "pol" remains
+ * valid, "vma" is stale.
*/
pol_refcount = pol;
vma = NULL;
mpol_get(pol);
+ mmap_read_unlock(mm);
err = lookup_node(mm, addr);
if (err < 0)
goto out;
diff --git a/mm/memremap.c b/mm/memremap.c
index 6aa5f0c2d11f..c17eca4a48ca 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -4,7 +4,7 @@
#include <linux/io.h>
#include <linux/kasan.h>
#include <linux/memory_hotplug.h>
-#include <linux/mm.h>
+#include <linux/memremap.h>
#include <linux/pfn_t.h>
#include <linux/swap.h>
#include <linux/mmzone.h>
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/wait_bit.h>
#include <linux/xarray.h>
+#include "internal.h"
static DEFINE_XARRAY(pgmap_array);
@@ -37,21 +38,19 @@ unsigned long memremap_compat_align(void)
EXPORT_SYMBOL_GPL(memremap_compat_align);
#endif
-#ifdef CONFIG_DEV_PAGEMAP_OPS
+#ifdef CONFIG_FS_DAX
DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
EXPORT_SYMBOL(devmap_managed_key);
static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
{
- if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
- pgmap->type == MEMORY_DEVICE_FS_DAX)
+ if (pgmap->type == MEMORY_DEVICE_FS_DAX)
static_branch_dec(&devmap_managed_key);
}
static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
{
- if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
- pgmap->type == MEMORY_DEVICE_FS_DAX)
+ if (pgmap->type == MEMORY_DEVICE_FS_DAX)
static_branch_inc(&devmap_managed_key);
}
#else
@@ -61,7 +60,7 @@ static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
{
}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
+#endif /* CONFIG_FS_DAX */
static void pgmap_array_delete(struct range *range)
{
@@ -102,23 +101,12 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
return (range->start + range_len(range)) >> PAGE_SHIFT;
}
-static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn)
-{
- if (pfn % (1024 << pgmap->vmemmap_shift))
- cond_resched();
- return pfn + pgmap_vmemmap_nr(pgmap);
-}
-
static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
{
return (pfn_end(pgmap, range_id) -
pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
}
-#define for_each_device_pfn(pfn, map, i) \
- for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \
- pfn = pfn_next(map, pfn))
-
static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
{
struct range *range = &pgmap->ranges[range_id];
@@ -147,13 +135,11 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
void memunmap_pages(struct dev_pagemap *pgmap)
{
- unsigned long pfn;
int i;
percpu_ref_kill(&pgmap->ref);
for (i = 0; i < pgmap->nr_range; i++)
- for_each_device_pfn(pfn, pgmap, i)
- put_page(pfn_to_page(pfn));
+ percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
wait_for_completion(&pgmap->done);
percpu_ref_exit(&pgmap->ref);
@@ -282,7 +268,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
return 0;
err_add_memory:
- kasan_remove_zero_shadow(__va(range->start), range_len(range));
+ if (!is_private)
+ kasan_remove_zero_shadow(__va(range->start), range_len(range));
err_kasan:
untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
err_pfn_remap:
@@ -328,8 +315,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
}
break;
case MEMORY_DEVICE_FS_DAX:
- if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
- IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
+ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
WARN(1, "File system DAX not supported\n");
return ERR_PTR(-EINVAL);
}
@@ -465,21 +451,17 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
}
EXPORT_SYMBOL_GPL(get_dev_pagemap);
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-void free_devmap_managed_page(struct page *page)
+void free_zone_device_page(struct page *page)
{
- /* notify page idle for dax */
- if (!is_device_private_page(page)) {
- wake_up_var(&page->_refcount);
+ if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
return;
- }
__ClearPageWaiters(page);
mem_cgroup_uncharge(page_folio(page));
/*
- * When a device_private page is freed, the page->mapping field
+ * When a device managed page is freed, the page->mapping field
* may still contain a (stale) mapping value. For example, the
* lower bits of page->mapping may still identify the page as an
* anonymous page. Ultimately, this entire field is just stale
@@ -501,5 +483,27 @@ void free_devmap_managed_page(struct page *page)
*/
page->mapping = NULL;
page->pgmap->ops->page_free(page);
+
+ /*
+ * Reset the page count to 1 to prepare for handing out the page again.
+ */
+ set_page_count(page, 1);
+}
+
+#ifdef CONFIG_FS_DAX
+bool __put_devmap_managed_page(struct page *page)
+{
+ if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
+ return false;
+
+ /*
+ * fsdax page refcounts are 1-based, rather than 0-based: if
+ * refcount is 1, then the page is free and the refcount is
+ * stable because nobody holds a reference on the page.
+ */
+ if (page_ref_dec_return(page) == 1)
+ wake_up_var(&page->_refcount);
+ return true;
}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
+EXPORT_SYMBOL(__put_devmap_managed_page);
+#endif /* CONFIG_FS_DAX */
diff --git a/mm/migrate.c b/mm/migrate.c
index c7da064b4781..4f30ed37856f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,12 +38,10 @@
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
-#include <linux/pagewalk.h>
#include <linux/pfn_t.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
#include <linux/balloon_compaction.h>
-#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/page_owner.h>
#include <linux/sched/mm.h>
@@ -51,6 +49,7 @@
#include <linux/oom.h>
#include <linux/memory.h>
#include <linux/random.h>
+#include <linux/sched/sysctl.h>
#include <asm/tlbflush.h>
@@ -107,7 +106,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
/* Driver shouldn't use PG_isolated bit of page->flags */
WARN_ON_ONCE(PageIsolated(page));
- __SetPageIsolated(page);
+ SetPageIsolated(page);
unlock_page(page);
return 0;
@@ -126,7 +125,7 @@ static void putback_movable_page(struct page *page)
mapping = page_mapping(page);
mapping->a_ops->putback_page(page);
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
}
/*
@@ -159,7 +158,7 @@ void putback_movable_pages(struct list_head *l)
if (PageMovable(page))
putback_movable_page(page);
else
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
unlock_page(page);
put_page(page);
} else {
@@ -173,37 +172,33 @@ void putback_movable_pages(struct list_head *l)
/*
* Restore a potential migration pte to a working pte entry
*/
-static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
- unsigned long addr, void *old)
+static bool remove_migration_pte(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr, void *old)
{
- struct page_vma_mapped_walk pvmw = {
- .page = old,
- .vma = vma,
- .address = addr,
- .flags = PVMW_SYNC | PVMW_MIGRATION,
- };
- struct page *new;
- pte_t pte;
- swp_entry_t entry;
+ DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
- VM_BUG_ON_PAGE(PageTail(page), page);
while (page_vma_mapped_walk(&pvmw)) {
- if (PageKsm(page))
- new = page;
- else
- new = page - pvmw.page->index +
- linear_page_index(vma, pvmw.address);
+ pte_t pte;
+ swp_entry_t entry;
+ struct page *new;
+ unsigned long idx = 0;
+
+ /* pgoff is invalid for ksm pages, but they are never large */
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff;
+ new = folio_page(folio, idx);
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
/* PMD-mapped THP migration entry */
if (!pvmw.pte) {
- VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
+ VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
+ !folio_test_pmd_mappable(folio), folio);
remove_migration_pmd(&pvmw, new);
continue;
}
#endif
- get_page(new);
+ folio_get(folio);
pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
if (pte_swp_soft_dirty(*pvmw.pte))
pte = pte_mksoft_dirty(pte);
@@ -232,12 +227,12 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
}
#ifdef CONFIG_HUGETLB_PAGE
- if (PageHuge(new)) {
+ if (folio_test_hugetlb(folio)) {
unsigned int shift = huge_page_shift(hstate_vma(vma));
pte = pte_mkhuge(pte);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
- if (PageAnon(new))
+ if (folio_test_anon(folio))
hugepage_add_anon_rmap(new, vma, pvmw.address);
else
page_dup_rmap(new, true);
@@ -245,17 +240,14 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
} else
#endif
{
- if (PageAnon(new))
+ if (folio_test_anon(folio))
page_add_anon_rmap(new, vma, pvmw.address, false);
else
- page_add_file_rmap(new, false);
+ page_add_file_rmap(new, vma, false);
set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
}
- if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
- mlock_vma_page(new);
-
- if (PageTransHuge(page) && PageMlocked(page))
- clear_page_mlock(page);
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_page_drain(smp_processor_id());
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte);
@@ -268,17 +260,17 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
-void remove_migration_ptes(struct page *old, struct page *new, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
{
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
- .arg = old,
+ .arg = src,
};
if (locked)
- rmap_walk_locked(new, &rwc);
+ rmap_walk_locked(dst, &rwc);
else
- rmap_walk(new, &rwc);
+ rmap_walk(dst, &rwc);
}
/*
@@ -341,14 +333,8 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
{
int expected_count = 1;
- /*
- * Device private pages have an extra refcount as they are
- * ZONE_DEVICE pages.
- */
- expected_count += is_device_private_page(page);
if (mapping)
expected_count += compound_nr(page) + page_has_private(page);
-
return expected_count;
}
@@ -771,6 +757,7 @@ int buffer_migrate_page_norefs(struct address_space *mapping,
*/
static int writeout(struct address_space *mapping, struct page *page)
{
+ struct folio *folio = page_folio(page);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = 1,
@@ -796,7 +783,7 @@ static int writeout(struct address_space *mapping, struct page *page)
* At this point we know that the migration attempt cannot
* be successful.
*/
- remove_migration_ptes(page, page, false);
+ remove_migration_ptes(folio, folio, false);
rc = mapping->a_ops->writepage(page, &wbc);
@@ -883,7 +870,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
VM_BUG_ON_PAGE(!PageIsolated(page), page);
if (!PageMovable(page)) {
rc = MIGRATEPAGE_SUCCESS;
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
goto out;
}
@@ -905,7 +892,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
* We clear PG_movable under page_lock so any compactor
* cannot try to migrate this page.
*/
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
}
/*
@@ -917,8 +904,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
page->mapping = NULL;
if (likely(!is_zone_device_page(newpage)))
- flush_dcache_page(newpage);
-
+ flush_dcache_folio(page_folio(newpage));
}
out:
return rc;
@@ -927,6 +913,8 @@ out:
static int __unmap_and_move(struct page *page, struct page *newpage,
int force, enum migrate_mode mode)
{
+ struct folio *folio = page_folio(page);
+ struct folio *dst = page_folio(newpage);
int rc = -EAGAIN;
bool page_was_mapped = false;
struct anon_vma *anon_vma = NULL;
@@ -1030,16 +1018,31 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
/* Establish migration ptes */
VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
page);
- try_to_migrate(page, 0);
+ try_to_migrate(folio, 0);
page_was_mapped = true;
}
if (!page_mapped(page))
rc = move_to_new_page(newpage, page, mode);
+ /*
+ * When successful, push newpage to LRU immediately: so that if it
+ * turns out to be an mlocked page, remove_migration_ptes() will
+ * automatically build up the correct newpage->mlock_count for it.
+ *
+ * We would like to do something similar for the old page, when
+ * unsuccessful, and other cases when a page has been temporarily
+ * isolated from the unevictable LRU: but this case is the easiest.
+ */
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ lru_cache_add(newpage);
+ if (page_was_mapped)
+ lru_add_drain();
+ }
+
if (page_was_mapped)
- remove_migration_ptes(page,
- rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
+ remove_migration_ptes(folio,
+ rc == MIGRATEPAGE_SUCCESS ? dst : folio, false);
out_unlock_both:
unlock_page(newpage);
@@ -1050,20 +1053,12 @@ out_unlock:
unlock_page(page);
out:
/*
- * If migration is successful, decrease refcount of the newpage
+ * If migration is successful, decrease refcount of the newpage,
* which will not free the page because new page owner increased
- * refcounter. As well, if it is LRU page, add the page to LRU
- * list in here. Use the old state of the isolated source page to
- * determine if we migrated a LRU page. newpage was already unlocked
- * and possibly modified by its owner - don't rely on the page
- * state.
+ * refcounter.
*/
- if (rc == MIGRATEPAGE_SUCCESS) {
- if (unlikely(!is_lru))
- put_page(newpage);
- else
- putback_lru_page(newpage);
- }
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(newpage);
return rc;
}
@@ -1092,7 +1087,7 @@ static int unmap_and_move(new_page_t get_new_page,
if (unlikely(__PageMovable(page))) {
lock_page(page);
if (!PageMovable(page))
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
unlock_page(page);
}
goto out;
@@ -1173,6 +1168,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
enum migrate_mode mode, int reason,
struct list_head *ret)
{
+ struct folio *dst, *src = page_folio(hpage);
int rc = -EAGAIN;
int page_was_mapped = 0;
struct page *new_hpage;
@@ -1200,6 +1196,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
new_hpage = get_new_page(hpage, private);
if (!new_hpage)
return -ENOMEM;
+ dst = page_folio(new_hpage);
if (!trylock_page(hpage)) {
if (!force)
@@ -1249,7 +1246,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
ttu |= TTU_RMAP_LOCKED;
}
- try_to_migrate(hpage, ttu);
+ try_to_migrate(src, ttu);
page_was_mapped = 1;
if (mapping_locked)
@@ -1260,8 +1257,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
rc = move_to_new_page(new_hpage, hpage, mode);
if (page_was_mapped)
- remove_migration_ptes(hpage,
- rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
+ remove_migration_ptes(src,
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
unlock_put_anon:
unlock_page(new_hpage);
@@ -1351,7 +1348,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
bool is_thp = false;
struct page *page;
struct page *page2;
- int swapwrite = current->flags & PF_SWAPWRITE;
int rc, nr_subpages;
LIST_HEAD(ret_pages);
LIST_HEAD(thp_split_pages);
@@ -1360,9 +1356,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
trace_mm_migrate_pages_start(mode, reason);
- if (!swapwrite)
- current->flags |= PF_SWAPWRITE;
-
thp_subpage_migration:
for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
retry = 0;
@@ -1517,9 +1510,6 @@ out:
trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
nr_thp_failed, nr_thp_split, mode, reason);
- if (!swapwrite)
- current->flags &= ~PF_SWAPWRITE;
-
if (ret_succeeded)
*ret_succeeded = nr_succeeded;
@@ -1612,7 +1602,6 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
struct page *page;
- unsigned int follflags;
int err;
mmap_read_lock(mm);
@@ -1622,8 +1611,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
goto out;
/* FOLL_DUMP to ignore special (like zero) pages */
- follflags = FOLL_GET | FOLL_DUMP;
- page = follow_page(vma, addr, follflags);
+ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -1762,6 +1750,13 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
}
/*
+ * The move_pages() man page does not have an -EEXIST choice, so
+ * use -EFAULT instead.
+ */
+ if (err == -EEXIST)
+ err = -EFAULT;
+
+ /*
* If the page is already on the target node (!err), store the
* node, otherwise, store the err.
*/
@@ -2034,16 +2029,27 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
int page_lru;
int nr_pages = thp_nr_pages(page);
+ int order = compound_order(page);
- VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
+ VM_BUG_ON_PAGE(order && !PageTransHuge(page), page);
/* Do not migrate THP mapped by multiple processes */
if (PageTransHuge(page) && total_mapcount(page) > 1)
return 0;
/* Avoid migrating to a node that is nearly full */
- if (!migrate_balanced_pgdat(pgdat, nr_pages))
+ if (!migrate_balanced_pgdat(pgdat, nr_pages)) {
+ int z;
+
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
+ return 0;
+ for (z = pgdat->nr_zones - 1; z >= 0; z--) {
+ if (populated_zone(pgdat->node_zones + z))
+ break;
+ }
+ wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE);
return 0;
+ }
if (isolate_lru_page(page))
return 0;
@@ -2072,6 +2078,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
pg_data_t *pgdat = NODE_DATA(node);
int isolated;
int nr_remaining;
+ unsigned int nr_succeeded;
LIST_HEAD(migratepages);
new_page_t *new;
bool compound;
@@ -2110,7 +2117,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
- MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
+ MIGRATE_ASYNC, MR_NUMA_MISPLACED,
+ &nr_succeeded);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
@@ -2119,8 +2127,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
putback_lru_page(page);
}
isolated = 0;
- } else
- count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
+ }
+ if (nr_succeeded) {
+ count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
+ if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node))
+ mod_node_page_state(pgdat, PGPROMOTE_SUCCESS,
+ nr_succeeded);
+ }
BUG_ON(!list_empty(&migratepages));
return isolated;
@@ -2131,761 +2144,6 @@ out:
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_DEVICE_PRIVATE
-static int migrate_vma_collect_skip(unsigned long start,
- unsigned long end,
- struct mm_walk *walk)
-{
- struct migrate_vma *migrate = walk->private;
- unsigned long addr;
-
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- migrate->dst[migrate->npages] = 0;
- migrate->src[migrate->npages++] = 0;
- }
-
- return 0;
-}
-
-static int migrate_vma_collect_hole(unsigned long start,
- unsigned long end,
- __always_unused int depth,
- struct mm_walk *walk)
-{
- struct migrate_vma *migrate = walk->private;
- unsigned long addr;
-
- /* Only allow populating anonymous memory. */
- if (!vma_is_anonymous(walk->vma))
- return migrate_vma_collect_skip(start, end, walk);
-
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
- migrate->dst[migrate->npages] = 0;
- migrate->npages++;
- migrate->cpages++;
- }
-
- return 0;
-}
-
-static int migrate_vma_collect_pmd(pmd_t *pmdp,
- unsigned long start,
- unsigned long end,
- struct mm_walk *walk)
-{
- struct migrate_vma *migrate = walk->private;
- struct vm_area_struct *vma = walk->vma;
- struct mm_struct *mm = vma->vm_mm;
- unsigned long addr = start, unmapped = 0;
- spinlock_t *ptl;
- pte_t *ptep;
-
-again:
- if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end, -1, walk);
-
- if (pmd_trans_huge(*pmdp)) {
- struct page *page;
-
- ptl = pmd_lock(mm, pmdp);
- if (unlikely(!pmd_trans_huge(*pmdp))) {
- spin_unlock(ptl);
- goto again;
- }
-
- page = pmd_page(*pmdp);
- if (is_huge_zero_page(page)) {
- spin_unlock(ptl);
- split_huge_pmd(vma, pmdp, addr);
- if (pmd_trans_unstable(pmdp))
- return migrate_vma_collect_skip(start, end,
- walk);
- } else {
- int ret;
-
- get_page(page);
- spin_unlock(ptl);
- if (unlikely(!trylock_page(page)))
- return migrate_vma_collect_skip(start, end,
- walk);
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
- if (ret)
- return migrate_vma_collect_skip(start, end,
- walk);
- if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end, -1,
- walk);
- }
- }
-
- if (unlikely(pmd_bad(*pmdp)))
- return migrate_vma_collect_skip(start, end, walk);
-
- ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
- arch_enter_lazy_mmu_mode();
-
- for (; addr < end; addr += PAGE_SIZE, ptep++) {
- unsigned long mpfn = 0, pfn;
- struct page *page;
- swp_entry_t entry;
- pte_t pte;
-
- pte = *ptep;
-
- if (pte_none(pte)) {
- if (vma_is_anonymous(vma)) {
- mpfn = MIGRATE_PFN_MIGRATE;
- migrate->cpages++;
- }
- goto next;
- }
-
- if (!pte_present(pte)) {
- /*
- * Only care about unaddressable device page special
- * page table entry. Other special swap entries are not
- * migratable, and we ignore regular swapped page.
- */
- entry = pte_to_swp_entry(pte);
- if (!is_device_private_entry(entry))
- goto next;
-
- page = pfn_swap_entry_to_page(entry);
- if (!(migrate->flags &
- MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
- page->pgmap->owner != migrate->pgmap_owner)
- goto next;
-
- mpfn = migrate_pfn(page_to_pfn(page)) |
- MIGRATE_PFN_MIGRATE;
- if (is_writable_device_private_entry(entry))
- mpfn |= MIGRATE_PFN_WRITE;
- } else {
- if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
- goto next;
- pfn = pte_pfn(pte);
- if (is_zero_pfn(pfn)) {
- mpfn = MIGRATE_PFN_MIGRATE;
- migrate->cpages++;
- goto next;
- }
- page = vm_normal_page(migrate->vma, addr, pte);
- mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
- mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
- }
-
- /* FIXME support THP */
- if (!page || !page->mapping || PageTransCompound(page)) {
- mpfn = 0;
- goto next;
- }
-
- /*
- * By getting a reference on the page we pin it and that blocks
- * any kind of migration. Side effect is that it "freezes" the
- * pte.
- *
- * We drop this reference after isolating the page from the lru
- * for non device page (device page are not on the lru and thus
- * can't be dropped from it).
- */
- get_page(page);
-
- /*
- * Optimize for the common case where page is only mapped once
- * in one process. If we can lock the page, then we can safely
- * set up a special migration page table entry now.
- */
- if (trylock_page(page)) {
- pte_t swp_pte;
-
- migrate->cpages++;
- ptep_get_and_clear(mm, addr, ptep);
-
- /* Setup special migration page table entry */
- if (mpfn & MIGRATE_PFN_WRITE)
- entry = make_writable_migration_entry(
- page_to_pfn(page));
- else
- entry = make_readable_migration_entry(
- page_to_pfn(page));
- swp_pte = swp_entry_to_pte(entry);
- if (pte_present(pte)) {
- if (pte_soft_dirty(pte))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pte))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- } else {
- if (pte_swp_soft_dirty(pte))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_swp_uffd_wp(pte))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
- }
- set_pte_at(mm, addr, ptep, swp_pte);
-
- /*
- * This is like regular unmap: we remove the rmap and
- * drop page refcount. Page won't be freed, as we took
- * a reference just above.
- */
- page_remove_rmap(page, false);
- put_page(page);
-
- if (pte_present(pte))
- unmapped++;
- } else {
- put_page(page);
- mpfn = 0;
- }
-
-next:
- migrate->dst[migrate->npages] = 0;
- migrate->src[migrate->npages++] = mpfn;
- }
- arch_leave_lazy_mmu_mode();
- pte_unmap_unlock(ptep - 1, ptl);
-
- /* Only flush the TLB if we actually modified any entries */
- if (unmapped)
- flush_tlb_range(walk->vma, start, end);
-
- return 0;
-}
-
-static const struct mm_walk_ops migrate_vma_walk_ops = {
- .pmd_entry = migrate_vma_collect_pmd,
- .pte_hole = migrate_vma_collect_hole,
-};
-
-/*
- * migrate_vma_collect() - collect pages over a range of virtual addresses
- * @migrate: migrate struct containing all migration information
- *
- * This will walk the CPU page table. For each virtual address backed by a
- * valid page, it updates the src array and takes a reference on the page, in
- * order to pin the page until we lock it and unmap it.
- */
-static void migrate_vma_collect(struct migrate_vma *migrate)
-{
- struct mmu_notifier_range range;
-
- /*
- * Note that the pgmap_owner is passed to the mmu notifier callback so
- * that the registered device driver can skip invalidating device
- * private page mappings that won't be migrated.
- */
- mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
- migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end,
- migrate->pgmap_owner);
- mmu_notifier_invalidate_range_start(&range);
-
- walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
- &migrate_vma_walk_ops, migrate);
-
- mmu_notifier_invalidate_range_end(&range);
- migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
-}
-
-/*
- * migrate_vma_check_page() - check if page is pinned or not
- * @page: struct page to check
- *
- * Pinned pages cannot be migrated. This is the same test as in
- * folio_migrate_mapping(), except that here we allow migration of a
- * ZONE_DEVICE page.
- */
-static bool migrate_vma_check_page(struct page *page)
-{
- /*
- * One extra ref because caller holds an extra reference, either from
- * isolate_lru_page() for a regular page, or migrate_vma_collect() for
- * a device page.
- */
- int extra = 1;
-
- /*
- * FIXME support THP (transparent huge page), it is bit more complex to
- * check them than regular pages, because they can be mapped with a pmd
- * or with a pte (split pte mapping).
- */
- if (PageCompound(page))
- return false;
-
- /* Page from ZONE_DEVICE have one extra reference */
- if (is_zone_device_page(page))
- extra++;
-
- /* For file back page */
- if (page_mapping(page))
- extra += 1 + page_has_private(page);
-
- if ((page_count(page) - extra) > page_mapcount(page))
- return false;
-
- return true;
-}
-
-/*
- * migrate_vma_unmap() - replace page mapping with special migration pte entry
- * @migrate: migrate struct containing all migration information
- *
- * Isolate pages from the LRU and replace mappings (CPU page table pte) with a
- * special migration pte entry and check if it has been pinned. Pinned pages are
- * restored because we cannot migrate them.
- *
- * This is the last step before we call the device driver callback to allocate
- * destination memory and copy contents of original page over to new page.
- */
-static void migrate_vma_unmap(struct migrate_vma *migrate)
-{
- const unsigned long npages = migrate->npages;
- unsigned long i, restore = 0;
- bool allow_drain = true;
-
- lru_add_drain();
-
- for (i = 0; i < npages; i++) {
- struct page *page = migrate_pfn_to_page(migrate->src[i]);
-
- if (!page)
- continue;
-
- /* ZONE_DEVICE pages are not on LRU */
- if (!is_zone_device_page(page)) {
- if (!PageLRU(page) && allow_drain) {
- /* Drain CPU's pagevec */
- lru_add_drain_all();
- allow_drain = false;
- }
-
- if (isolate_lru_page(page)) {
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- migrate->cpages--;
- restore++;
- continue;
- }
-
- /* Drop the reference we took in collect */
- put_page(page);
- }
-
- if (page_mapped(page))
- try_to_migrate(page, 0);
-
- if (page_mapped(page) || !migrate_vma_check_page(page)) {
- if (!is_zone_device_page(page)) {
- get_page(page);
- putback_lru_page(page);
- }
-
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- migrate->cpages--;
- restore++;
- continue;
- }
- }
-
- for (i = 0; i < npages && restore; i++) {
- struct page *page = migrate_pfn_to_page(migrate->src[i]);
-
- if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
- continue;
-
- remove_migration_ptes(page, page, false);
-
- migrate->src[i] = 0;
- unlock_page(page);
- put_page(page);
- restore--;
- }
-}
-
-/**
- * migrate_vma_setup() - prepare to migrate a range of memory
- * @args: contains the vma, start, and pfns arrays for the migration
- *
- * Returns: negative errno on failures, 0 when 0 or more pages were migrated
- * without an error.
- *
- * Prepare to migrate a range of memory virtual address range by collecting all
- * the pages backing each virtual address in the range, saving them inside the
- * src array. Then lock those pages and unmap them. Once the pages are locked
- * and unmapped, check whether each page is pinned or not. Pages that aren't
- * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
- * corresponding src array entry. Then restores any pages that are pinned, by
- * remapping and unlocking those pages.
- *
- * The caller should then allocate destination memory and copy source memory to
- * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
- * flag set). Once these are allocated and copied, the caller must update each
- * corresponding entry in the dst array with the pfn value of the destination
- * page and with MIGRATE_PFN_VALID. Destination pages must be locked via
- * lock_page().
- *
- * Note that the caller does not have to migrate all the pages that are marked
- * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
- * device memory to system memory. If the caller cannot migrate a device page
- * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
- * consequences for the userspace process, so it must be avoided if at all
- * possible.
- *
- * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
- * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
- * allowing the caller to allocate device memory for those unbacked virtual
- * addresses. For this the caller simply has to allocate device memory and
- * properly set the destination entry like for regular migration. Note that
- * this can still fail, and thus inside the device driver you must check if the
- * migration was successful for those entries after calling migrate_vma_pages(),
- * just like for regular migration.
- *
- * After that, the callers must call migrate_vma_pages() to go over each entry
- * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
- * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
- * then migrate_vma_pages() to migrate struct page information from the source
- * struct page to the destination struct page. If it fails to migrate the
- * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
- * src array.
- *
- * At this point all successfully migrated pages have an entry in the src
- * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
- * array entry with MIGRATE_PFN_VALID flag set.
- *
- * Once migrate_vma_pages() returns the caller may inspect which pages were
- * successfully migrated, and which were not. Successfully migrated pages will
- * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
- *
- * It is safe to update device page table after migrate_vma_pages() because
- * both destination and source page are still locked, and the mmap_lock is held
- * in read mode (hence no one can unmap the range being migrated).
- *
- * Once the caller is done cleaning up things and updating its page table (if it
- * chose to do so, this is not an obligation) it finally calls
- * migrate_vma_finalize() to update the CPU page table to point to new pages
- * for successfully migrated pages or otherwise restore the CPU page table to
- * point to the original source pages.
- */
-int migrate_vma_setup(struct migrate_vma *args)
-{
- long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
-
- args->start &= PAGE_MASK;
- args->end &= PAGE_MASK;
- if (!args->vma || is_vm_hugetlb_page(args->vma) ||
- (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
- return -EINVAL;
- if (nr_pages <= 0)
- return -EINVAL;
- if (args->start < args->vma->vm_start ||
- args->start >= args->vma->vm_end)
- return -EINVAL;
- if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
- return -EINVAL;
- if (!args->src || !args->dst)
- return -EINVAL;
-
- memset(args->src, 0, sizeof(*args->src) * nr_pages);
- args->cpages = 0;
- args->npages = 0;
-
- migrate_vma_collect(args);
-
- if (args->cpages)
- migrate_vma_unmap(args);
-
- /*
- * At this point pages are locked and unmapped, and thus they have
- * stable content and can safely be copied to destination memory that
- * is allocated by the drivers.
- */
- return 0;
-
-}
-EXPORT_SYMBOL(migrate_vma_setup);
-
-/*
- * This code closely matches the code in:
- * __handle_mm_fault()
- * handle_pte_fault()
- * do_anonymous_page()
- * to map in an anonymous zero page but the struct page will be a ZONE_DEVICE
- * private page.
- */
-static void migrate_vma_insert_page(struct migrate_vma *migrate,
- unsigned long addr,
- struct page *page,
- unsigned long *src)
-{
- struct vm_area_struct *vma = migrate->vma;
- struct mm_struct *mm = vma->vm_mm;
- bool flush = false;
- spinlock_t *ptl;
- pte_t entry;
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- /* Only allow populating anonymous memory */
- if (!vma_is_anonymous(vma))
- goto abort;
-
- pgdp = pgd_offset(mm, addr);
- p4dp = p4d_alloc(mm, pgdp, addr);
- if (!p4dp)
- goto abort;
- pudp = pud_alloc(mm, p4dp, addr);
- if (!pudp)
- goto abort;
- pmdp = pmd_alloc(mm, pudp, addr);
- if (!pmdp)
- goto abort;
-
- if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
- goto abort;
-
- /*
- * Use pte_alloc() instead of pte_alloc_map(). We can't run
- * pte_offset_map() on pmds where a huge pmd might be created
- * from a different thread.
- *
- * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
- * parallel threads are excluded by other means.
- *
- * Here we only have mmap_read_lock(mm).
- */
- if (pte_alloc(mm, pmdp))
- goto abort;
-
- /* See the comment in pte_alloc_one_map() */
- if (unlikely(pmd_trans_unstable(pmdp)))
- goto abort;
-
- if (unlikely(anon_vma_prepare(vma)))
- goto abort;
- if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
- goto abort;
-
- /*
- * The memory barrier inside __SetPageUptodate makes sure that
- * preceding stores to the page contents become visible before
- * the set_pte_at() write.
- */
- __SetPageUptodate(page);
-
- if (is_zone_device_page(page)) {
- if (is_device_private_page(page)) {
- swp_entry_t swp_entry;
-
- if (vma->vm_flags & VM_WRITE)
- swp_entry = make_writable_device_private_entry(
- page_to_pfn(page));
- else
- swp_entry = make_readable_device_private_entry(
- page_to_pfn(page));
- entry = swp_entry_to_pte(swp_entry);
- } else {
- /*
- * For now we only support migrating to un-addressable
- * device memory.
- */
- pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
- goto abort;
- }
- } else {
- entry = mk_pte(page, vma->vm_page_prot);
- if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
- }
-
- ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
-
- if (check_stable_address_space(mm))
- goto unlock_abort;
-
- if (pte_present(*ptep)) {
- unsigned long pfn = pte_pfn(*ptep);
-
- if (!is_zero_pfn(pfn))
- goto unlock_abort;
- flush = true;
- } else if (!pte_none(*ptep))
- goto unlock_abort;
-
- /*
- * Check for userfaultfd but do not deliver the fault. Instead,
- * just back off.
- */
- if (userfaultfd_missing(vma))
- goto unlock_abort;
-
- inc_mm_counter(mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, addr, false);
- if (!is_zone_device_page(page))
- lru_cache_add_inactive_or_unevictable(page, vma);
- get_page(page);
-
- if (flush) {
- flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush_notify(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, entry);
- update_mmu_cache(vma, addr, ptep);
- } else {
- /* No need to invalidate - it was non-present before */
- set_pte_at(mm, addr, ptep, entry);
- update_mmu_cache(vma, addr, ptep);
- }
-
- pte_unmap_unlock(ptep, ptl);
- *src = MIGRATE_PFN_MIGRATE;
- return;
-
-unlock_abort:
- pte_unmap_unlock(ptep, ptl);
-abort:
- *src &= ~MIGRATE_PFN_MIGRATE;
-}
-
-/**
- * migrate_vma_pages() - migrate meta-data from src page to dst page
- * @migrate: migrate struct containing all migration information
- *
- * This migrates struct page meta-data from source struct page to destination
- * struct page. This effectively finishes the migration from source page to the
- * destination page.
- */
-void migrate_vma_pages(struct migrate_vma *migrate)
-{
- const unsigned long npages = migrate->npages;
- const unsigned long start = migrate->start;
- struct mmu_notifier_range range;
- unsigned long addr, i;
- bool notified = false;
-
- for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
- struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
- struct page *page = migrate_pfn_to_page(migrate->src[i]);
- struct address_space *mapping;
- int r;
-
- if (!newpage) {
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- continue;
- }
-
- if (!page) {
- if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
- continue;
- if (!notified) {
- notified = true;
-
- mmu_notifier_range_init_owner(&range,
- MMU_NOTIFY_MIGRATE, 0, migrate->vma,
- migrate->vma->vm_mm, addr, migrate->end,
- migrate->pgmap_owner);
- mmu_notifier_invalidate_range_start(&range);
- }
- migrate_vma_insert_page(migrate, addr, newpage,
- &migrate->src[i]);
- continue;
- }
-
- mapping = page_mapping(page);
-
- if (is_zone_device_page(newpage)) {
- if (is_device_private_page(newpage)) {
- /*
- * For now only support private anonymous when
- * migrating to un-addressable device memory.
- */
- if (mapping) {
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- continue;
- }
- } else {
- /*
- * Other types of ZONE_DEVICE page are not
- * supported.
- */
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- continue;
- }
- }
-
- r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
- if (r != MIGRATEPAGE_SUCCESS)
- migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
- }
-
- /*
- * No need to double call mmu_notifier->invalidate_range() callback as
- * the above ptep_clear_flush_notify() inside migrate_vma_insert_page()
- * did already call it.
- */
- if (notified)
- mmu_notifier_invalidate_range_only_end(&range);
-}
-EXPORT_SYMBOL(migrate_vma_pages);
-
-/**
- * migrate_vma_finalize() - restore CPU page table entry
- * @migrate: migrate struct containing all migration information
- *
- * This replaces the special migration pte entry with either a mapping to the
- * new page if migration was successful for that page, or to the original page
- * otherwise.
- *
- * This also unlocks the pages and puts them back on the lru, or drops the extra
- * refcount, for device pages.
- */
-void migrate_vma_finalize(struct migrate_vma *migrate)
-{
- const unsigned long npages = migrate->npages;
- unsigned long i;
-
- for (i = 0; i < npages; i++) {
- struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
- struct page *page = migrate_pfn_to_page(migrate->src[i]);
-
- if (!page) {
- if (newpage) {
- unlock_page(newpage);
- put_page(newpage);
- }
- continue;
- }
-
- if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
- if (newpage) {
- unlock_page(newpage);
- put_page(newpage);
- }
- newpage = page;
- }
-
- remove_migration_ptes(page, newpage, false);
- unlock_page(page);
-
- if (is_zone_device_page(page))
- put_page(page);
- else
- putback_lru_page(page);
-
- if (newpage != page) {
- unlock_page(newpage);
- if (is_zone_device_page(newpage))
- put_page(newpage);
- else
- putback_lru_page(newpage);
- }
- }
-}
-EXPORT_SYMBOL(migrate_vma_finalize);
-#endif /* CONFIG_DEVICE_PRIVATE */
-
/*
* node_demotion[] example:
*
@@ -3082,18 +2340,21 @@ static int establish_migrate_target(int node, nodemask_t *used,
if (best_distance != -1) {
val = node_distance(node, migration_target);
if (val > best_distance)
- return NUMA_NO_NODE;
+ goto out_clear;
}
index = nd->nr;
if (WARN_ONCE(index >= DEMOTION_TARGET_NODES,
"Exceeds maximum demotion target nodes\n"))
- return NUMA_NO_NODE;
+ goto out_clear;
nd->nodes[index] = migration_target;
nd->nr++;
return migration_target;
+out_clear:
+ node_clear(migration_target, *used);
+ return NUMA_NO_NODE;
}
/*
@@ -3190,7 +2451,7 @@ again:
/*
* For callers that do not hold get_online_mems() already.
*/
-static void set_migration_target_nodes(void)
+void set_migration_target_nodes(void)
{
get_online_mems();
__set_migration_target_nodes();
@@ -3254,51 +2515,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
return notifier_from_errno(0);
}
-/*
- * React to hotplug events that might affect the migration targets
- * like events that online or offline NUMA nodes.
- *
- * The ordering is also currently dependent on which nodes have
- * CPUs. That means we need CPU on/offline notification too.
- */
-static int migration_online_cpu(unsigned int cpu)
-{
- set_migration_target_nodes();
- return 0;
-}
-
-static int migration_offline_cpu(unsigned int cpu)
+void __init migrate_on_reclaim_init(void)
{
- set_migration_target_nodes();
- return 0;
-}
-
-static int __init migrate_on_reclaim_init(void)
-{
- int ret;
-
node_demotion = kmalloc_array(nr_node_ids,
sizeof(struct demotion_nodes),
GFP_KERNEL);
WARN_ON(!node_demotion);
- ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
- NULL, migration_offline_cpu);
+ hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
/*
- * In the unlikely case that this fails, the automatic
- * migration targets may become suboptimal for nodes
- * where N_CPU changes. With such a small impact in a
- * rare case, do not bother trying to do anything special.
+ * At this point, all numa nodes with memory/CPus have their state
+ * properly set, so we can build the demotion order now.
+ * Let us hold the cpu_hotplug lock just, as we could possibily have
+ * CPU hotplug events during boot.
*/
- WARN_ON(ret < 0);
- ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
- migration_online_cpu, NULL);
- WARN_ON(ret < 0);
-
- hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
- return 0;
+ cpus_read_lock();
+ set_migration_target_nodes();
+ cpus_read_unlock();
}
-late_initcall(migrate_on_reclaim_init);
#endif /* CONFIG_HOTPLUG_CPU */
bool numa_demotion_enabled = false;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
new file mode 100644
index 000000000000..70c7dc05bbfc
--- /dev/null
+++ b/mm/migrate_device.c
@@ -0,0 +1,773 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device Memory Migration functionality.
+ *
+ * Originally written by Jérôme Glisse.
+ */
+#include <linux/export.h>
+#include <linux/memremap.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
+#include <linux/oom.h>
+#include <linux/pagewalk.h>
+#include <linux/rmap.h>
+#include <linux/swapops.h>
+#include <asm/tlbflush.h>
+#include "internal.h"
+
+static int migrate_vma_collect_skip(unsigned long start,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct migrate_vma *migrate = walk->private;
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ migrate->dst[migrate->npages] = 0;
+ migrate->src[migrate->npages++] = 0;
+ }
+
+ return 0;
+}
+
+static int migrate_vma_collect_hole(unsigned long start,
+ unsigned long end,
+ __always_unused int depth,
+ struct mm_walk *walk)
+{
+ struct migrate_vma *migrate = walk->private;
+ unsigned long addr;
+
+ /* Only allow populating anonymous memory. */
+ if (!vma_is_anonymous(walk->vma))
+ return migrate_vma_collect_skip(start, end, walk);
+
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
+ migrate->dst[migrate->npages] = 0;
+ migrate->npages++;
+ migrate->cpages++;
+ }
+
+ return 0;
+}
+
+static int migrate_vma_collect_pmd(pmd_t *pmdp,
+ unsigned long start,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct migrate_vma *migrate = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long addr = start, unmapped = 0;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+again:
+ if (pmd_none(*pmdp))
+ return migrate_vma_collect_hole(start, end, -1, walk);
+
+ if (pmd_trans_huge(*pmdp)) {
+ struct page *page;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (unlikely(!pmd_trans_huge(*pmdp))) {
+ spin_unlock(ptl);
+ goto again;
+ }
+
+ page = pmd_page(*pmdp);
+ if (is_huge_zero_page(page)) {
+ spin_unlock(ptl);
+ split_huge_pmd(vma, pmdp, addr);
+ if (pmd_trans_unstable(pmdp))
+ return migrate_vma_collect_skip(start, end,
+ walk);
+ } else {
+ int ret;
+
+ get_page(page);
+ spin_unlock(ptl);
+ if (unlikely(!trylock_page(page)))
+ return migrate_vma_collect_skip(start, end,
+ walk);
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+ if (ret)
+ return migrate_vma_collect_skip(start, end,
+ walk);
+ if (pmd_none(*pmdp))
+ return migrate_vma_collect_hole(start, end, -1,
+ walk);
+ }
+ }
+
+ if (unlikely(pmd_bad(*pmdp)))
+ return migrate_vma_collect_skip(start, end, walk);
+
+ ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ arch_enter_lazy_mmu_mode();
+
+ for (; addr < end; addr += PAGE_SIZE, ptep++) {
+ unsigned long mpfn = 0, pfn;
+ struct page *page;
+ swp_entry_t entry;
+ pte_t pte;
+
+ pte = *ptep;
+
+ if (pte_none(pte)) {
+ if (vma_is_anonymous(vma)) {
+ mpfn = MIGRATE_PFN_MIGRATE;
+ migrate->cpages++;
+ }
+ goto next;
+ }
+
+ if (!pte_present(pte)) {
+ /*
+ * Only care about unaddressable device page special
+ * page table entry. Other special swap entries are not
+ * migratable, and we ignore regular swapped page.
+ */
+ entry = pte_to_swp_entry(pte);
+ if (!is_device_private_entry(entry))
+ goto next;
+
+ page = pfn_swap_entry_to_page(entry);
+ if (!(migrate->flags &
+ MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
+ page->pgmap->owner != migrate->pgmap_owner)
+ goto next;
+
+ mpfn = migrate_pfn(page_to_pfn(page)) |
+ MIGRATE_PFN_MIGRATE;
+ if (is_writable_device_private_entry(entry))
+ mpfn |= MIGRATE_PFN_WRITE;
+ } else {
+ if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
+ goto next;
+ pfn = pte_pfn(pte);
+ if (is_zero_pfn(pfn)) {
+ mpfn = MIGRATE_PFN_MIGRATE;
+ migrate->cpages++;
+ goto next;
+ }
+ page = vm_normal_page(migrate->vma, addr, pte);
+ mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
+ mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
+ }
+
+ /* FIXME support THP */
+ if (!page || !page->mapping || PageTransCompound(page)) {
+ mpfn = 0;
+ goto next;
+ }
+
+ /*
+ * By getting a reference on the page we pin it and that blocks
+ * any kind of migration. Side effect is that it "freezes" the
+ * pte.
+ *
+ * We drop this reference after isolating the page from the lru
+ * for non device page (device page are not on the lru and thus
+ * can't be dropped from it).
+ */
+ get_page(page);
+
+ /*
+ * Optimize for the common case where page is only mapped once
+ * in one process. If we can lock the page, then we can safely
+ * set up a special migration page table entry now.
+ */
+ if (trylock_page(page)) {
+ pte_t swp_pte;
+
+ migrate->cpages++;
+ ptep_get_and_clear(mm, addr, ptep);
+
+ /* Setup special migration page table entry */
+ if (mpfn & MIGRATE_PFN_WRITE)
+ entry = make_writable_migration_entry(
+ page_to_pfn(page));
+ else
+ entry = make_readable_migration_entry(
+ page_to_pfn(page));
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_present(pte)) {
+ if (pte_soft_dirty(pte))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pte))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ } else {
+ if (pte_swp_soft_dirty(pte))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_swp_uffd_wp(pte))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ }
+ set_pte_at(mm, addr, ptep, swp_pte);
+
+ /*
+ * This is like regular unmap: we remove the rmap and
+ * drop page refcount. Page won't be freed, as we took
+ * a reference just above.
+ */
+ page_remove_rmap(page, vma, false);
+ put_page(page);
+
+ if (pte_present(pte))
+ unmapped++;
+ } else {
+ put_page(page);
+ mpfn = 0;
+ }
+
+next:
+ migrate->dst[migrate->npages] = 0;
+ migrate->src[migrate->npages++] = mpfn;
+ }
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(ptep - 1, ptl);
+
+ /* Only flush the TLB if we actually modified any entries */
+ if (unmapped)
+ flush_tlb_range(walk->vma, start, end);
+
+ return 0;
+}
+
+static const struct mm_walk_ops migrate_vma_walk_ops = {
+ .pmd_entry = migrate_vma_collect_pmd,
+ .pte_hole = migrate_vma_collect_hole,
+};
+
+/*
+ * migrate_vma_collect() - collect pages over a range of virtual addresses
+ * @migrate: migrate struct containing all migration information
+ *
+ * This will walk the CPU page table. For each virtual address backed by a
+ * valid page, it updates the src array and takes a reference on the page, in
+ * order to pin the page until we lock it and unmap it.
+ */
+static void migrate_vma_collect(struct migrate_vma *migrate)
+{
+ struct mmu_notifier_range range;
+
+ /*
+ * Note that the pgmap_owner is passed to the mmu notifier callback so
+ * that the registered device driver can skip invalidating device
+ * private page mappings that won't be migrated.
+ */
+ mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
+ migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end,
+ migrate->pgmap_owner);
+ mmu_notifier_invalidate_range_start(&range);
+
+ walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
+ &migrate_vma_walk_ops, migrate);
+
+ mmu_notifier_invalidate_range_end(&range);
+ migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
+}
+
+/*
+ * migrate_vma_check_page() - check if page is pinned or not
+ * @page: struct page to check
+ *
+ * Pinned pages cannot be migrated. This is the same test as in
+ * folio_migrate_mapping(), except that here we allow migration of a
+ * ZONE_DEVICE page.
+ */
+static bool migrate_vma_check_page(struct page *page)
+{
+ /*
+ * One extra ref because caller holds an extra reference, either from
+ * isolate_lru_page() for a regular page, or migrate_vma_collect() for
+ * a device page.
+ */
+ int extra = 1;
+
+ /*
+ * FIXME support THP (transparent huge page), it is bit more complex to
+ * check them than regular pages, because they can be mapped with a pmd
+ * or with a pte (split pte mapping).
+ */
+ if (PageCompound(page))
+ return false;
+
+ /* Page from ZONE_DEVICE have one extra reference */
+ if (is_zone_device_page(page))
+ extra++;
+
+ /* For file back page */
+ if (page_mapping(page))
+ extra += 1 + page_has_private(page);
+
+ if ((page_count(page) - extra) > page_mapcount(page))
+ return false;
+
+ return true;
+}
+
+/*
+ * migrate_vma_unmap() - replace page mapping with special migration pte entry
+ * @migrate: migrate struct containing all migration information
+ *
+ * Isolate pages from the LRU and replace mappings (CPU page table pte) with a
+ * special migration pte entry and check if it has been pinned. Pinned pages are
+ * restored because we cannot migrate them.
+ *
+ * This is the last step before we call the device driver callback to allocate
+ * destination memory and copy contents of original page over to new page.
+ */
+static void migrate_vma_unmap(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ unsigned long i, restore = 0;
+ bool allow_drain = true;
+
+ lru_add_drain();
+
+ for (i = 0; i < npages; i++) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+ struct folio *folio;
+
+ if (!page)
+ continue;
+
+ /* ZONE_DEVICE pages are not on LRU */
+ if (!is_zone_device_page(page)) {
+ if (!PageLRU(page) && allow_drain) {
+ /* Drain CPU's pagevec */
+ lru_add_drain_all();
+ allow_drain = false;
+ }
+
+ if (isolate_lru_page(page)) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ migrate->cpages--;
+ restore++;
+ continue;
+ }
+
+ /* Drop the reference we took in collect */
+ put_page(page);
+ }
+
+ folio = page_folio(page);
+ if (folio_mapped(folio))
+ try_to_migrate(folio, 0);
+
+ if (page_mapped(page) || !migrate_vma_check_page(page)) {
+ if (!is_zone_device_page(page)) {
+ get_page(page);
+ putback_lru_page(page);
+ }
+
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ migrate->cpages--;
+ restore++;
+ continue;
+ }
+ }
+
+ for (i = 0; i < npages && restore; i++) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+ struct folio *folio;
+
+ if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ folio = page_folio(page);
+ remove_migration_ptes(folio, folio, false);
+
+ migrate->src[i] = 0;
+ folio_unlock(folio);
+ folio_put(folio);
+ restore--;
+ }
+}
+
+/**
+ * migrate_vma_setup() - prepare to migrate a range of memory
+ * @args: contains the vma, start, and pfns arrays for the migration
+ *
+ * Returns: negative errno on failures, 0 when 0 or more pages were migrated
+ * without an error.
+ *
+ * Prepare to migrate a range of memory virtual address range by collecting all
+ * the pages backing each virtual address in the range, saving them inside the
+ * src array. Then lock those pages and unmap them. Once the pages are locked
+ * and unmapped, check whether each page is pinned or not. Pages that aren't
+ * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
+ * corresponding src array entry. Then restores any pages that are pinned, by
+ * remapping and unlocking those pages.
+ *
+ * The caller should then allocate destination memory and copy source memory to
+ * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
+ * flag set). Once these are allocated and copied, the caller must update each
+ * corresponding entry in the dst array with the pfn value of the destination
+ * page and with MIGRATE_PFN_VALID. Destination pages must be locked via
+ * lock_page().
+ *
+ * Note that the caller does not have to migrate all the pages that are marked
+ * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
+ * device memory to system memory. If the caller cannot migrate a device page
+ * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
+ * consequences for the userspace process, so it must be avoided if at all
+ * possible.
+ *
+ * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
+ * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
+ * allowing the caller to allocate device memory for those unbacked virtual
+ * addresses. For this the caller simply has to allocate device memory and
+ * properly set the destination entry like for regular migration. Note that
+ * this can still fail, and thus inside the device driver you must check if the
+ * migration was successful for those entries after calling migrate_vma_pages(),
+ * just like for regular migration.
+ *
+ * After that, the callers must call migrate_vma_pages() to go over each entry
+ * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
+ * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
+ * then migrate_vma_pages() to migrate struct page information from the source
+ * struct page to the destination struct page. If it fails to migrate the
+ * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
+ * src array.
+ *
+ * At this point all successfully migrated pages have an entry in the src
+ * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
+ * array entry with MIGRATE_PFN_VALID flag set.
+ *
+ * Once migrate_vma_pages() returns the caller may inspect which pages were
+ * successfully migrated, and which were not. Successfully migrated pages will
+ * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
+ *
+ * It is safe to update device page table after migrate_vma_pages() because
+ * both destination and source page are still locked, and the mmap_lock is held
+ * in read mode (hence no one can unmap the range being migrated).
+ *
+ * Once the caller is done cleaning up things and updating its page table (if it
+ * chose to do so, this is not an obligation) it finally calls
+ * migrate_vma_finalize() to update the CPU page table to point to new pages
+ * for successfully migrated pages or otherwise restore the CPU page table to
+ * point to the original source pages.
+ */
+int migrate_vma_setup(struct migrate_vma *args)
+{
+ long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
+
+ args->start &= PAGE_MASK;
+ args->end &= PAGE_MASK;
+ if (!args->vma || is_vm_hugetlb_page(args->vma) ||
+ (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
+ return -EINVAL;
+ if (nr_pages <= 0)
+ return -EINVAL;
+ if (args->start < args->vma->vm_start ||
+ args->start >= args->vma->vm_end)
+ return -EINVAL;
+ if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
+ return -EINVAL;
+ if (!args->src || !args->dst)
+ return -EINVAL;
+
+ memset(args->src, 0, sizeof(*args->src) * nr_pages);
+ args->cpages = 0;
+ args->npages = 0;
+
+ migrate_vma_collect(args);
+
+ if (args->cpages)
+ migrate_vma_unmap(args);
+
+ /*
+ * At this point pages are locked and unmapped, and thus they have
+ * stable content and can safely be copied to destination memory that
+ * is allocated by the drivers.
+ */
+ return 0;
+
+}
+EXPORT_SYMBOL(migrate_vma_setup);
+
+/*
+ * This code closely matches the code in:
+ * __handle_mm_fault()
+ * handle_pte_fault()
+ * do_anonymous_page()
+ * to map in an anonymous zero page but the struct page will be a ZONE_DEVICE
+ * private page.
+ */
+static void migrate_vma_insert_page(struct migrate_vma *migrate,
+ unsigned long addr,
+ struct page *page,
+ unsigned long *src)
+{
+ struct vm_area_struct *vma = migrate->vma;
+ struct mm_struct *mm = vma->vm_mm;
+ bool flush = false;
+ spinlock_t *ptl;
+ pte_t entry;
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ /* Only allow populating anonymous memory */
+ if (!vma_is_anonymous(vma))
+ goto abort;
+
+ pgdp = pgd_offset(mm, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (!p4dp)
+ goto abort;
+ pudp = pud_alloc(mm, p4dp, addr);
+ if (!pudp)
+ goto abort;
+ pmdp = pmd_alloc(mm, pudp, addr);
+ if (!pmdp)
+ goto abort;
+
+ if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
+ goto abort;
+
+ /*
+ * Use pte_alloc() instead of pte_alloc_map(). We can't run
+ * pte_offset_map() on pmds where a huge pmd might be created
+ * from a different thread.
+ *
+ * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
+ * parallel threads are excluded by other means.
+ *
+ * Here we only have mmap_read_lock(mm).
+ */
+ if (pte_alloc(mm, pmdp))
+ goto abort;
+
+ /* See the comment in pte_alloc_one_map() */
+ if (unlikely(pmd_trans_unstable(pmdp)))
+ goto abort;
+
+ if (unlikely(anon_vma_prepare(vma)))
+ goto abort;
+ if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
+ goto abort;
+
+ /*
+ * The memory barrier inside __SetPageUptodate makes sure that
+ * preceding stores to the page contents become visible before
+ * the set_pte_at() write.
+ */
+ __SetPageUptodate(page);
+
+ if (is_device_private_page(page)) {
+ swp_entry_t swp_entry;
+
+ if (vma->vm_flags & VM_WRITE)
+ swp_entry = make_writable_device_private_entry(
+ page_to_pfn(page));
+ else
+ swp_entry = make_readable_device_private_entry(
+ page_to_pfn(page));
+ entry = swp_entry_to_pte(swp_entry);
+ } else {
+ /*
+ * For now we only support migrating to un-addressable device
+ * memory.
+ */
+ if (is_zone_device_page(page)) {
+ pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
+ goto abort;
+ }
+ entry = mk_pte(page, vma->vm_page_prot);
+ if (vma->vm_flags & VM_WRITE)
+ entry = pte_mkwrite(pte_mkdirty(entry));
+ }
+
+ ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+
+ if (check_stable_address_space(mm))
+ goto unlock_abort;
+
+ if (pte_present(*ptep)) {
+ unsigned long pfn = pte_pfn(*ptep);
+
+ if (!is_zero_pfn(pfn))
+ goto unlock_abort;
+ flush = true;
+ } else if (!pte_none(*ptep))
+ goto unlock_abort;
+
+ /*
+ * Check for userfaultfd but do not deliver the fault. Instead,
+ * just back off.
+ */
+ if (userfaultfd_missing(vma))
+ goto unlock_abort;
+
+ inc_mm_counter(mm, MM_ANONPAGES);
+ page_add_new_anon_rmap(page, vma, addr, false);
+ if (!is_zone_device_page(page))
+ lru_cache_add_inactive_or_unevictable(page, vma);
+ get_page(page);
+
+ if (flush) {
+ flush_cache_page(vma, addr, pte_pfn(*ptep));
+ ptep_clear_flush_notify(vma, addr, ptep);
+ set_pte_at_notify(mm, addr, ptep, entry);
+ update_mmu_cache(vma, addr, ptep);
+ } else {
+ /* No need to invalidate - it was non-present before */
+ set_pte_at(mm, addr, ptep, entry);
+ update_mmu_cache(vma, addr, ptep);
+ }
+
+ pte_unmap_unlock(ptep, ptl);
+ *src = MIGRATE_PFN_MIGRATE;
+ return;
+
+unlock_abort:
+ pte_unmap_unlock(ptep, ptl);
+abort:
+ *src &= ~MIGRATE_PFN_MIGRATE;
+}
+
+/**
+ * migrate_vma_pages() - migrate meta-data from src page to dst page
+ * @migrate: migrate struct containing all migration information
+ *
+ * This migrates struct page meta-data from source struct page to destination
+ * struct page. This effectively finishes the migration from source page to the
+ * destination page.
+ */
+void migrate_vma_pages(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ const unsigned long start = migrate->start;
+ struct mmu_notifier_range range;
+ unsigned long addr, i;
+ bool notified = false;
+
+ for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
+ struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+ struct address_space *mapping;
+ int r;
+
+ if (!newpage) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+
+ if (!page) {
+ if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+ if (!notified) {
+ notified = true;
+
+ mmu_notifier_range_init_owner(&range,
+ MMU_NOTIFY_MIGRATE, 0, migrate->vma,
+ migrate->vma->vm_mm, addr, migrate->end,
+ migrate->pgmap_owner);
+ mmu_notifier_invalidate_range_start(&range);
+ }
+ migrate_vma_insert_page(migrate, addr, newpage,
+ &migrate->src[i]);
+ continue;
+ }
+
+ mapping = page_mapping(page);
+
+ if (is_device_private_page(newpage)) {
+ /*
+ * For now only support private anonymous when migrating
+ * to un-addressable device memory.
+ */
+ if (mapping) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+ } else if (is_zone_device_page(newpage)) {
+ /*
+ * Other types of ZONE_DEVICE page are not supported.
+ */
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+
+ r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
+ if (r != MIGRATEPAGE_SUCCESS)
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ }
+
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback as
+ * the above ptep_clear_flush_notify() inside migrate_vma_insert_page()
+ * did already call it.
+ */
+ if (notified)
+ mmu_notifier_invalidate_range_only_end(&range);
+}
+EXPORT_SYMBOL(migrate_vma_pages);
+
+/**
+ * migrate_vma_finalize() - restore CPU page table entry
+ * @migrate: migrate struct containing all migration information
+ *
+ * This replaces the special migration pte entry with either a mapping to the
+ * new page if migration was successful for that page, or to the original page
+ * otherwise.
+ *
+ * This also unlocks the pages and puts them back on the lru, or drops the extra
+ * refcount, for device pages.
+ */
+void migrate_vma_finalize(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ unsigned long i;
+
+ for (i = 0; i < npages; i++) {
+ struct folio *dst, *src;
+ struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+ if (!page) {
+ if (newpage) {
+ unlock_page(newpage);
+ put_page(newpage);
+ }
+ continue;
+ }
+
+ if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
+ if (newpage) {
+ unlock_page(newpage);
+ put_page(newpage);
+ }
+ newpage = page;
+ }
+
+ src = page_folio(page);
+ dst = page_folio(newpage);
+ remove_migration_ptes(src, dst, false);
+ folio_unlock(src);
+
+ if (is_zone_device_page(page))
+ put_page(page);
+ else
+ putback_lru_page(page);
+
+ if (newpage != page) {
+ unlock_page(newpage);
+ if (is_zone_device_page(newpage))
+ put_page(newpage);
+ else
+ putback_lru_page(newpage);
+ }
+ }
+}
+EXPORT_SYMBOL(migrate_vma_finalize);
diff --git a/mm/mlock.c b/mm/mlock.c
index 8f584eddd305..efd2dd2943de 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -14,6 +14,7 @@
#include <linux/swapops.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
+#include <linux/pagewalk.h>
#include <linux/mempolicy.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
@@ -27,6 +28,8 @@
#include "internal.h"
+static DEFINE_PER_CPU(struct pagevec, mlock_pvec);
+
bool can_do_mlock(void)
{
if (rlimit(RLIMIT_MEMLOCK) != 0)
@@ -46,441 +49,320 @@ EXPORT_SYMBOL(can_do_mlock);
* be placed on the LRU "unevictable" list, rather than the [in]active lists.
* The unevictable list is an LRU sibling list to the [in]active lists.
* PageUnevictable is set to indicate the unevictable state.
- *
- * When lazy mlocking via vmscan, it is important to ensure that the
- * vma's VM_LOCKED status is not concurrently being modified, otherwise we
- * may have mlocked a page that is being munlocked. So lazy mlock must take
- * the mmap_lock for read, and verify that the vma really is locked
- * (see mm/rmap.c).
*/
-/*
- * LRU accounting for clear_page_mlock()
- */
-void clear_page_mlock(struct page *page)
+static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec)
{
- int nr_pages;
+ /* There is nothing more we can do while it's off LRU */
+ if (!TestClearPageLRU(page))
+ return lruvec;
- if (!TestClearPageMlocked(page))
- return;
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
- nr_pages = thp_nr_pages(page);
- mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
- count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
- /*
- * The previous TestClearPageMlocked() corresponds to the smp_mb()
- * in __pagevec_lru_add_fn().
- *
- * See __pagevec_lru_add_fn for more explanation.
- */
- if (!isolate_lru_page(page)) {
- putback_lru_page(page);
- } else {
+ if (unlikely(page_evictable(page))) {
/*
- * We lost the race. the page already moved to evictable list.
+ * This is a little surprising, but quite possible:
+ * PageMlocked must have got cleared already by another CPU.
+ * Could this page be on the Unevictable LRU? I'm not sure,
+ * but move it now if so.
*/
- if (PageUnevictable(page))
- count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
+ if (PageUnevictable(page)) {
+ del_page_from_lru_list(page, lruvec);
+ ClearPageUnevictable(page);
+ add_page_to_lru_list(page, lruvec);
+ __count_vm_events(UNEVICTABLE_PGRESCUED,
+ thp_nr_pages(page));
+ }
+ goto out;
}
+
+ if (PageUnevictable(page)) {
+ if (PageMlocked(page))
+ page->mlock_count++;
+ goto out;
+ }
+
+ del_page_from_lru_list(page, lruvec);
+ ClearPageActive(page);
+ SetPageUnevictable(page);
+ page->mlock_count = !!PageMlocked(page);
+ add_page_to_lru_list(page, lruvec);
+ __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
+out:
+ SetPageLRU(page);
+ return lruvec;
}
-/*
- * Mark page as mlocked if not already.
- * If page on LRU, isolate and putback to move to unevictable list.
- */
-void mlock_vma_page(struct page *page)
+static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec)
{
- /* Serialize with page migration */
- BUG_ON(!PageLocked(page));
+ VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
- if (!TestSetPageMlocked(page)) {
- int nr_pages = thp_nr_pages(page);
+ /* As above, this is a little surprising, but possible */
+ if (unlikely(page_evictable(page)))
+ goto out;
- mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
- count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
- if (!isolate_lru_page(page))
- putback_lru_page(page);
- }
+ SetPageUnevictable(page);
+ page->mlock_count = !!PageMlocked(page);
+ __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
+out:
+ add_page_to_lru_list(page, lruvec);
+ SetPageLRU(page);
+ return lruvec;
}
-/*
- * Finish munlock after successful page isolation
- *
- * Page must be locked. This is a wrapper for page_mlock()
- * and putback_lru_page() with munlock accounting.
- */
-static void __munlock_isolated_page(struct page *page)
+static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec)
{
- /*
- * Optimization: if the page was mapped just once, that's our mapping
- * and we don't need to check all the other vmas.
- */
- if (page_mapcount(page) > 1)
- page_mlock(page);
+ int nr_pages = thp_nr_pages(page);
+ bool isolated = false;
+
+ if (!TestClearPageLRU(page))
+ goto munlock;
- /* Did try_to_unlock() succeed or punt? */
- if (!PageMlocked(page))
- count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page));
+ isolated = true;
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
- putback_lru_page(page);
+ if (PageUnevictable(page)) {
+ /* Then mlock_count is maintained, but might undercount */
+ if (page->mlock_count)
+ page->mlock_count--;
+ if (page->mlock_count)
+ goto out;
+ }
+ /* else assume that was the last mlock: reclaim will fix it if not */
+
+munlock:
+ if (TestClearPageMlocked(page)) {
+ __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+ if (isolated || !PageUnevictable(page))
+ __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
+ else
+ __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
+ }
+
+ /* page_evictable() has to be checked *after* clearing Mlocked */
+ if (isolated && PageUnevictable(page) && page_evictable(page)) {
+ del_page_from_lru_list(page, lruvec);
+ ClearPageUnevictable(page);
+ add_page_to_lru_list(page, lruvec);
+ __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
+ }
+out:
+ if (isolated)
+ SetPageLRU(page);
+ return lruvec;
}
/*
- * Accounting for page isolation fail during munlock
- *
- * Performs accounting when page isolation fails in munlock. There is nothing
- * else to do because it means some other task has already removed the page
- * from the LRU. putback_lru_page() will take care of removing the page from
- * the unevictable list, if necessary. vmscan [page_referenced()] will move
- * the page back to the unevictable list if some other vma has it mlocked.
+ * Flags held in the low bits of a struct page pointer on the mlock_pvec.
*/
-static void __munlock_isolation_failed(struct page *page)
+#define LRU_PAGE 0x1
+#define NEW_PAGE 0x2
+static inline struct page *mlock_lru(struct page *page)
{
- int nr_pages = thp_nr_pages(page);
+ return (struct page *)((unsigned long)page + LRU_PAGE);
+}
- if (PageUnevictable(page))
- __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
- else
- __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
+static inline struct page *mlock_new(struct page *page)
+{
+ return (struct page *)((unsigned long)page + NEW_PAGE);
}
-/**
- * munlock_vma_page - munlock a vma page
- * @page: page to be unlocked, either a normal page or THP page head
- *
- * returns the size of the page as a page mask (0 for normal page,
- * HPAGE_PMD_NR - 1 for THP head page)
- *
- * called from munlock()/munmap() path with page supposedly on the LRU.
- * When we munlock a page, because the vma where we found the page is being
- * munlock()ed or munmap()ed, we want to check whether other vmas hold the
- * page locked so that we can leave it on the unevictable lru list and not
- * bother vmscan with it. However, to walk the page's rmap list in
- * page_mlock() we must isolate the page from the LRU. If some other
- * task has removed the page from the LRU, we won't be able to do that.
- * So we clear the PageMlocked as we might not get another chance. If we
- * can't isolate the page, we leave it for putback_lru_page() and vmscan
- * [page_referenced()/try_to_unmap()] to deal with.
+/*
+ * mlock_pagevec() is derived from pagevec_lru_move_fn():
+ * perhaps that can make use of such page pointer flags in future,
+ * but for now just keep it for mlock. We could use three separate
+ * pagevecs instead, but one feels better (munlocking a full pagevec
+ * does not need to drain mlocking pagevecs first).
*/
-unsigned int munlock_vma_page(struct page *page)
+static void mlock_pagevec(struct pagevec *pvec)
{
- int nr_pages;
-
- /* For page_mlock() and to serialize with page migration */
- BUG_ON(!PageLocked(page));
- VM_BUG_ON_PAGE(PageTail(page), page);
+ struct lruvec *lruvec = NULL;
+ unsigned long mlock;
+ struct page *page;
+ int i;
- if (!TestClearPageMlocked(page)) {
- /* Potentially, PTE-mapped THP: do not skip the rest PTEs */
- return 0;
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ page = pvec->pages[i];
+ mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE);
+ page = (struct page *)((unsigned long)page - mlock);
+ pvec->pages[i] = page;
+
+ if (mlock & LRU_PAGE)
+ lruvec = __mlock_page(page, lruvec);
+ else if (mlock & NEW_PAGE)
+ lruvec = __mlock_new_page(page, lruvec);
+ else
+ lruvec = __munlock_page(page, lruvec);
}
- nr_pages = thp_nr_pages(page);
- mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+ if (lruvec)
+ unlock_page_lruvec_irq(lruvec);
+ release_pages(pvec->pages, pvec->nr);
+ pagevec_reinit(pvec);
+}
- if (!isolate_lru_page(page))
- __munlock_isolated_page(page);
- else
- __munlock_isolation_failed(page);
+void mlock_page_drain(int cpu)
+{
+ struct pagevec *pvec;
- return nr_pages - 1;
+ pvec = &per_cpu(mlock_pvec, cpu);
+ if (pagevec_count(pvec))
+ mlock_pagevec(pvec);
}
-/*
- * convert get_user_pages() return value to posix mlock() error
- */
-static int __mlock_posix_error_return(long retval)
+bool need_mlock_page_drain(int cpu)
{
- if (retval == -EFAULT)
- retval = -ENOMEM;
- else if (retval == -ENOMEM)
- retval = -EAGAIN;
- return retval;
+ return pagevec_count(&per_cpu(mlock_pvec, cpu));
}
-/*
- * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
- *
- * The fast path is available only for evictable pages with single mapping.
- * Then we can bypass the per-cpu pvec and get better performance.
- * when mapcount > 1 we need page_mlock() which can fail.
- * when !page_evictable(), we need the full redo logic of putback_lru_page to
- * avoid leaving evictable page in unevictable list.
- *
- * In case of success, @page is added to @pvec and @pgrescued is incremented
- * in case that the page was previously unevictable. @page is also unlocked.
+/**
+ * mlock_folio - mlock a folio already on (or temporarily off) LRU
+ * @folio: folio to be mlocked.
*/
-static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
- int *pgrescued)
+void mlock_folio(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
- if (page_mapcount(page) <= 1 && page_evictable(page)) {
- pagevec_add(pvec, page);
- if (TestClearPageUnevictable(page))
- (*pgrescued)++;
- unlock_page(page);
- return true;
+ if (!folio_test_set_mlocked(folio)) {
+ int nr_pages = folio_nr_pages(folio);
+
+ zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
+ __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
- return false;
+ folio_get(folio);
+ if (!pagevec_add(pvec, mlock_lru(&folio->page)) ||
+ folio_test_large(folio) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
}
-/*
- * Putback multiple evictable pages to the LRU
- *
- * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
- * the pages might have meanwhile become unevictable but that is OK.
+/**
+ * mlock_new_page - mlock a newly allocated page not yet on LRU
+ * @page: page to be mlocked, either a normal page or a THP head.
*/
-static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
+void mlock_new_page(struct page *page)
{
- count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
- /*
- *__pagevec_lru_add() calls release_pages() so we don't call
- * put_page() explicitly
- */
- __pagevec_lru_add(pvec);
- count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
+ int nr_pages = thp_nr_pages(page);
+
+ SetPageMlocked(page);
+ mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+ __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
+
+ get_page(page);
+ if (!pagevec_add(pvec, mlock_new(page)) ||
+ PageHead(page) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
}
-/*
- * Munlock a batch of pages from the same zone
- *
- * The work is split to two main phases. First phase clears the Mlocked flag
- * and attempts to isolate the pages, all under a single zone lru lock.
- * The second phase finishes the munlock only for pages where isolation
- * succeeded.
- *
- * Note that the pagevec may be modified during the process.
+/**
+ * munlock_page - munlock a page
+ * @page: page to be munlocked, either a normal page or a THP head.
*/
-static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+void munlock_page(struct page *page)
{
- int i;
- int nr = pagevec_count(pvec);
- int delta_munlocked = -nr;
- struct pagevec pvec_putback;
- struct lruvec *lruvec = NULL;
- int pgrescued = 0;
-
- pagevec_init(&pvec_putback);
-
- /* Phase 1: page isolation */
- for (i = 0; i < nr; i++) {
- struct page *page = pvec->pages[i];
- struct folio *folio = page_folio(page);
-
- if (TestClearPageMlocked(page)) {
- /*
- * We already have pin from follow_page_mask()
- * so we can spare the get_page() here.
- */
- if (TestClearPageLRU(page)) {
- lruvec = folio_lruvec_relock_irq(folio, lruvec);
- del_page_from_lru_list(page, lruvec);
- continue;
- } else
- __munlock_isolation_failed(page);
- } else {
- delta_munlocked++;
- }
-
- /*
- * We won't be munlocking this page in the next phase
- * but we still need to release the follow_page_mask()
- * pin. We cannot do it under lru_lock however. If it's
- * the last pin, __page_cache_release() would deadlock.
- */
- pagevec_add(&pvec_putback, pvec->pages[i]);
- pvec->pages[i] = NULL;
- }
- if (lruvec) {
- __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
- unlock_page_lruvec_irq(lruvec);
- } else if (delta_munlocked) {
- mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
- }
-
- /* Now we can release pins of pages that we are not munlocking */
- pagevec_release(&pvec_putback);
-
- /* Phase 2: page munlock */
- for (i = 0; i < nr; i++) {
- struct page *page = pvec->pages[i];
-
- if (page) {
- lock_page(page);
- if (!__putback_lru_fast_prepare(page, &pvec_putback,
- &pgrescued)) {
- /*
- * Slow path. We don't want to lose the last
- * pin before unlock_page()
- */
- get_page(page); /* for putback_lru_page() */
- __munlock_isolated_page(page);
- unlock_page(page);
- put_page(page); /* from follow_page_mask() */
- }
- }
- }
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
/*
- * Phase 3: page putback for pages that qualified for the fast path
- * This will also call put_page() to return pin from follow_page_mask()
+ * TestClearPageMlocked(page) must be left to __munlock_page(),
+ * which will check whether the page is multiply mlocked.
*/
- if (pagevec_count(&pvec_putback))
- __putback_lru_fast(&pvec_putback, pgrescued);
+
+ get_page(page);
+ if (!pagevec_add(pvec, page) ||
+ PageHead(page) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
}
-/*
- * Fill up pagevec for __munlock_pagevec using pte walk
- *
- * The function expects that the struct page corresponding to @start address is
- * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
- *
- * The rest of @pvec is filled by subsequent pages within the same pmd and same
- * zone, as long as the pte's are present and vm_normal_page() succeeds. These
- * pages also get pinned.
- *
- * Returns the address of the next page that should be scanned. This equals
- * @start + PAGE_SIZE when no page could be added by the pte walk.
- */
-static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
- struct vm_area_struct *vma, struct zone *zone,
- unsigned long start, unsigned long end)
+static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+
{
- pte_t *pte;
+ struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;
+ pte_t *start_pte, *pte;
+ struct page *page;
- /*
- * Initialize pte walk starting at the already pinned page where we
- * are sure that there is a pte, as it was pinned under the same
- * mmap_lock write op.
- */
- pte = get_locked_pte(vma->vm_mm, start, &ptl);
- /* Make sure we do not cross the page table boundary */
- end = pgd_addr_end(start, end);
- end = p4d_addr_end(start, end);
- end = pud_addr_end(start, end);
- end = pmd_addr_end(start, end);
-
- /* The page next to the pinned page is the first we will try to get */
- start += PAGE_SIZE;
- while (start < end) {
- struct page *page = NULL;
- pte++;
- if (pte_present(*pte))
- page = vm_normal_page(vma, start, *pte);
- /*
- * Break if page could not be obtained or the page's node+zone does not
- * match
- */
- if (!page || page_zone(page) != zone)
- break;
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
+ if (!pmd_present(*pmd))
+ goto out;
+ if (is_huge_zero_pmd(*pmd))
+ goto out;
+ page = pmd_page(*pmd);
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_folio(page_folio(page));
+ else
+ munlock_page(page);
+ goto out;
+ }
- /*
- * Do not use pagevec for PTE-mapped THP,
- * munlock_vma_pages_range() will handle them.
- */
+ start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
+ if (!pte_present(*pte))
+ continue;
+ page = vm_normal_page(vma, addr, *pte);
+ if (!page)
+ continue;
if (PageTransCompound(page))
- break;
-
- get_page(page);
- /*
- * Increase the address that will be returned *before* the
- * eventual break due to pvec becoming full by adding the page
- */
- start += PAGE_SIZE;
- if (pagevec_add(pvec, page) == 0)
- break;
+ continue;
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_folio(page_folio(page));
+ else
+ munlock_page(page);
}
- pte_unmap_unlock(pte, ptl);
- return start;
+ pte_unmap(start_pte);
+out:
+ spin_unlock(ptl);
+ cond_resched();
+ return 0;
}
/*
- * munlock_vma_pages_range() - munlock all pages in the vma range.'
- * @vma - vma containing range to be munlock()ed.
+ * mlock_vma_pages_range() - mlock any pages already in the range,
+ * or munlock all pages in the range.
+ * @vma - vma containing range to be mlock()ed or munlock()ed
* @start - start address in @vma of the range
- * @end - end of range in @vma.
+ * @end - end of range in @vma
+ * @newflags - the new set of flags for @vma.
*
- * For mremap(), munmap() and exit().
- *
- * Called with @vma VM_LOCKED.
- *
- * Returns with VM_LOCKED cleared. Callers must be prepared to
- * deal with this.
- *
- * We don't save and restore VM_LOCKED here because pages are
- * still on lru. In unmap path, pages might be scanned by reclaim
- * and re-mlocked by page_mlock/try_to_unmap before we unmap and
- * free them. This will result in freeing mlocked pages.
+ * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
+ * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
*/
-void munlock_vma_pages_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+static void mlock_vma_pages_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, vm_flags_t newflags)
{
- vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ static const struct mm_walk_ops mlock_walk_ops = {
+ .pmd_entry = mlock_pte_range,
+ };
- while (start < end) {
- struct page *page;
- unsigned int page_mask = 0;
- unsigned long page_increm;
- struct pagevec pvec;
- struct zone *zone;
+ /*
+ * There is a slight chance that concurrent page migration,
+ * or page reclaim finding a page of this now-VM_LOCKED vma,
+ * will call mlock_vma_page() and raise page's mlock_count:
+ * double counting, leaving the page unevictable indefinitely.
+ * Communicate this danger to mlock_vma_page() with VM_IO,
+ * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
+ * mmap_lock is held in write mode here, so this weird
+ * combination should not be visible to other mmap_lock users;
+ * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
+ */
+ if (newflags & VM_LOCKED)
+ newflags |= VM_IO;
+ WRITE_ONCE(vma->vm_flags, newflags);
- pagevec_init(&pvec);
- /*
- * Although FOLL_DUMP is intended for get_dump_page(),
- * it just so happens that its special treatment of the
- * ZERO_PAGE (returning an error instead of doing get_page)
- * suits munlock very well (and if somehow an abnormal page
- * has sneaked into the range, we won't oops here: great).
- */
- page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
-
- if (page && !IS_ERR(page)) {
- if (PageTransTail(page)) {
- VM_BUG_ON_PAGE(PageMlocked(page), page);
- put_page(page); /* follow_page_mask() */
- } else if (PageTransHuge(page)) {
- lock_page(page);
- /*
- * Any THP page found by follow_page_mask() may
- * have gotten split before reaching
- * munlock_vma_page(), so we need to compute
- * the page_mask here instead.
- */
- page_mask = munlock_vma_page(page);
- unlock_page(page);
- put_page(page); /* follow_page_mask() */
- } else {
- /*
- * Non-huge pages are handled in batches via
- * pagevec. The pin from follow_page_mask()
- * prevents them from collapsing by THP.
- */
- pagevec_add(&pvec, page);
- zone = page_zone(page);
-
- /*
- * Try to fill the rest of pagevec using fast
- * pte walk. This will also update start to
- * the next page to process. Then munlock the
- * pagevec.
- */
- start = __munlock_pagevec_fill(&pvec, vma,
- zone, start, end);
- __munlock_pagevec(&pvec, zone);
- goto next;
- }
- }
- page_increm = 1 + page_mask;
- start += page_increm * PAGE_SIZE;
-next:
- cond_resched();
+ lru_add_drain();
+ walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
+ lru_add_drain();
+
+ if (newflags & VM_IO) {
+ newflags &= ~VM_IO;
+ WRITE_ONCE(vma->vm_flags, newflags);
}
}
@@ -500,10 +382,9 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff_t pgoff;
int nr_pages;
int ret = 0;
- int lock = !!(newflags & VM_LOCKED);
- vm_flags_t old_flags = vma->vm_flags;
+ vm_flags_t oldflags = vma->vm_flags;
- if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
+ if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
vma_is_dax(vma) || vma_is_secretmem(vma))
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
@@ -512,7 +393,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, vma_anon_name(vma));
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (*prev) {
vma = *prev;
goto success;
@@ -535,9 +416,9 @@ success:
* Keep track of amount of locked VM.
*/
nr_pages = (end - start) >> PAGE_SHIFT;
- if (!lock)
+ if (!(newflags & VM_LOCKED))
nr_pages = -nr_pages;
- else if (old_flags & VM_LOCKED)
+ else if (oldflags & VM_LOCKED)
nr_pages = 0;
mm->locked_vm += nr_pages;
@@ -547,11 +428,12 @@ success:
* set VM_LOCKED, populate_vma_page_range will bring it back.
*/
- if (lock)
+ if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
+ /* No work to do, and mlocking twice would be wrong */
vma->vm_flags = newflags;
- else
- munlock_vma_pages_range(vma, start, end);
-
+ } else {
+ mlock_vma_pages_range(vma, start, end, newflags);
+ }
out:
*prev = vma;
return ret;
@@ -645,6 +527,18 @@ static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
return count >> PAGE_SHIFT;
}
+/*
+ * convert get_user_pages() return value to posix mlock() error
+ */
+static int __mlock_posix_error_return(long retval)
+{
+ if (retval == -EFAULT)
+ retval = -ENOMEM;
+ else if (retval == -ENOMEM)
+ retval = -EAGAIN;
+ return retval;
+}
+
static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
{
unsigned long locked;
@@ -839,6 +733,7 @@ int user_shm_lock(size_t size, struct ucounts *ucounts)
}
if (!get_ucounts(ucounts)) {
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+ allowed = 0;
goto out;
}
allowed = 1;
diff --git a/mm/mmap.c b/mm/mmap.c
index 1e8fdb0b51ed..3aa839f81e63 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1031,7 +1031,7 @@ again:
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- const char *anon_name)
+ struct anon_vma_name *anon_name)
{
/*
* VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -1049,7 +1049,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0;
if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
return 0;
- if (!is_same_vma_anon_name(vma, anon_name))
+ if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
return 0;
return 1;
}
@@ -1084,7 +1084,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- const char *anon_name)
+ struct anon_vma_name *anon_name)
{
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
@@ -1106,7 +1106,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- const char *anon_name)
+ struct anon_vma_name *anon_name)
{
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
@@ -1167,7 +1167,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct anon_vma *anon_vma, struct file *file,
pgoff_t pgoff, struct mempolicy *policy,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- const char *anon_name)
+ struct anon_vma_name *anon_name)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
@@ -1616,8 +1616,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
/*
* VM_NORESERVE is used because the reservations will be
* taken when vm_ops->mmap() is called
- * A dummy user value is used because we are not locking
- * memory so no accounting is necessary
*/
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
VM_NORESERVE,
@@ -2557,7 +2555,7 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
if (!*endptr)
stack_guard_gap = val << PAGE_SHIFT;
- return 0;
+ return 1;
}
__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
@@ -2674,6 +2672,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
vma->vm_prev = NULL;
do {
vma_rb_erase(vma, &mm->mm_rb);
+ if (vma->vm_flags & VM_LOCKED)
+ mm->locked_vm -= vma_pages(vma);
mm->map_count--;
tail_vma = vma;
vma = vma->vm_next;
@@ -2778,22 +2778,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return __split_vma(mm, vma, addr, new_below);
}
-static inline void
-unlock_range(struct vm_area_struct *start, unsigned long limit)
-{
- struct mm_struct *mm = start->vm_mm;
- struct vm_area_struct *tmp = start;
-
- while (tmp && tmp->vm_start < limit) {
- if (tmp->vm_flags & VM_LOCKED) {
- mm->locked_vm -= vma_pages(tmp);
- munlock_vma_pages_all(tmp);
- }
-
- tmp = tmp->vm_next;
- }
-}
-
/* Munmap is split into 2 main parts -- this part which finds
* what needs doing, and the areas themselves, which do the
* work. This now handles partial unmappings.
@@ -2874,12 +2858,6 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
return error;
}
- /*
- * unlock any mlock()ed ranges before detaching vmas
- */
- if (mm->locked_vm)
- unlock_range(vma, end);
-
/* Detach vmas from rbtree */
if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
downgrade = false;
@@ -3147,20 +3125,12 @@ void exit_mmap(struct mm_struct *mm)
* Nothing can be holding mm->mmap_lock here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
- *
- * This needs to be done before calling unlock_range(),
- * which clears VM_LOCKED, otherwise the oom reaper cannot
- * reliably test it.
*/
(void)__oom_reap_task_mm(mm);
-
set_bit(MMF_OOM_SKIP, &mm->flags);
}
mmap_write_lock(mm);
- if (mm->locked_vm)
- unlock_range(mm->mmap, ULONG_MAX);
-
arch_exit_mmap(mm);
vma = mm->mmap;
@@ -3186,6 +3156,7 @@ void exit_mmap(struct mm_struct *mm)
vma = remove_vma(vma);
cond_resched();
}
+ mm->mmap = NULL;
mmap_write_unlock(mm);
vm_unacct_memory(nr_accounted);
}
@@ -3255,7 +3226,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return NULL; /* should never get here */
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, vma_anon_name(vma));
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (new_vma) {
/*
* Source vma may have been merged into new_vma
@@ -3447,6 +3418,7 @@ static struct vm_area_struct *__install_special_mapping(
vma->vm_end = addr + len;
vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
+ vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = ops;
diff --git a/mm/mmzone.c b/mm/mmzone.c
index eb89d6e018e2..0ae7571e35ab 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -81,6 +81,13 @@ void lruvec_init(struct lruvec *lruvec)
for_each_lru(lru)
INIT_LIST_HEAD(&lruvec->lists[lru]);
+ /*
+ * The "Unevictable LRU" is imaginary: though its size is maintained,
+ * it is never scanned, and unevictable pages are not threaded on it
+ * (so that their lru fields can be reused to hold mlock_count).
+ * Poison its list head, so that any operations on it would crash.
+ */
+ list_del(&lruvec->lists[LRU_UNEVICTABLE]);
}
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
@@ -89,13 +96,14 @@ int page_cpupid_xchg_last(struct page *page, int cpupid)
unsigned long old_flags, flags;
int last_cpupid;
+ old_flags = READ_ONCE(page->flags);
do {
- old_flags = flags = page->flags;
- last_cpupid = page_cpupid_last(page);
+ flags = old_flags;
+ last_cpupid = (flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
- } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
return last_cpupid;
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 0138dfcdb1d8..b69ce7a7b2b7 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -29,6 +29,7 @@
#include <linux/uaccess.h>
#include <linux/mm_inline.h>
#include <linux/pgtable.h>
+#include <linux/sched/sysctl.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -83,6 +84,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
*/
if (prot_numa) {
struct page *page;
+ int nid;
/* Avoid TLB flush if possible */
if (pte_protnone(oldpte))
@@ -94,7 +96,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
/* Also skip shared copy-on-write pages */
if (is_cow_mapping(vma->vm_flags) &&
- page_mapcount(page) != 1)
+ page_count(page) != 1)
continue;
/*
@@ -109,7 +111,16 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* Don't mess with PTEs if page is already on the node
* a single-threaded process is running on.
*/
- if (target_node == page_to_nid(page))
+ nid = page_to_nid(page);
+ if (target_node == nid)
+ continue;
+
+ /*
+ * Skip scanning top tier node if normal numa
+ * balancing is disabled
+ */
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+ node_is_toptier(nid))
continue;
}
@@ -464,7 +475,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*pprev = vma_merge(mm, *pprev, start, end, newflags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, vma_anon_name(vma));
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (*pprev) {
vma = *pprev;
VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
diff --git a/mm/mremap.c b/mm/mremap.c
index 002eec83e91e..9d76da79594d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -942,8 +942,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (mmap_write_lock_killable(current->mm))
return -EINTR;
- vma = find_vma(mm, addr);
- if (!vma || vma->vm_start > addr) {
+ vma = vma_lookup(mm, addr);
+ if (!vma) {
ret = EFAULT;
goto out;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 832fb330376e..7ec38194f8e1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -93,9 +93,6 @@ static bool oom_cpuset_eligible(struct task_struct *start,
bool ret = false;
const nodemask_t *mask = oc->nodemask;
- if (is_memcg_oom(oc))
- return true;
-
rcu_read_lock();
for_each_thread(start, tsk) {
if (mask) {
@@ -526,7 +523,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
- if (!can_madv_lru_vma(vma))
+ if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP))
continue;
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 91d163f8d36b..435c02630593 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -324,18 +324,6 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
}
/*
- * Unreclaimable memory (kernel memory or anonymous memory
- * without swap) can bring down the dirtyable pages below
- * the zone's dirty balance reserve and the above calculation
- * will underflow. However we still want to add in nodes
- * which are below threshold (negative values) to get a more
- * accurate calculation but make sure that the total never
- * underflows.
- */
- if ((long)x < 0)
- x = 0;
-
- /*
* Make sure that the number of highmem pages is never larger
* than the number of the total dirtyable memory. This can only
* occur in very strange VM situations but we want to make sure
@@ -2430,13 +2418,13 @@ EXPORT_SYMBOL(folio_write_one);
/*
* For address_spaces which do not use buffers nor write back.
*/
-int __set_page_dirty_no_writeback(struct page *page)
+bool noop_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- if (!PageDirty(page))
- return !TestSetPageDirty(page);
- return 0;
+ if (!folio_test_dirty(folio))
+ return !folio_test_set_dirty(folio);
+ return false;
}
-EXPORT_SYMBOL(__set_page_dirty_no_writeback);
+EXPORT_SYMBOL(noop_dirty_folio);
/*
* Helper function for set_page_dirty family.
@@ -2530,7 +2518,7 @@ void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
* This is also sometimes used by filesystems which use buffer_heads when
* a single buffer is being dirtied: we want to set the folio dirty in
* that case, but not all the buffers. This is a "bottom-up" dirtying,
- * whereas __set_page_dirty_buffers() is a "top-down" dirtying.
+ * whereas block_dirty_folio() is a "top-down" dirtying.
*
* The caller must ensure this doesn't race with truncation. Most will
* simply hold the folio lock, but e.g. zap_pte_range() calls with the
@@ -2616,7 +2604,7 @@ EXPORT_SYMBOL(folio_redirty_for_writepage);
* folio_mark_dirty - Mark a folio as being modified.
* @folio: The folio.
*
- * For folios with a mapping this should be done under the page lock
+ * For folios with a mapping this should be done with the folio lock held
* for the benefit of asynchronous memory errors who prefer a consistent
* dirty state. This rule can be broken in some special cases,
* but should be better not to.
@@ -2630,23 +2618,21 @@ bool folio_mark_dirty(struct folio *folio)
if (likely(mapping)) {
/*
* readahead/lru_deactivate_page could remain
- * PG_readahead/PG_reclaim due to race with end_page_writeback
- * About readahead, if the page is written, the flags would be
+ * PG_readahead/PG_reclaim due to race with folio_end_writeback
+ * About readahead, if the folio is written, the flags would be
* reset. So no problem.
- * About lru_deactivate_page, if the page is redirty, the flag
- * will be reset. So no problem. but if the page is used by readahead
- * it will confuse readahead and make it restart the size rampup
- * process. But it's a trivial problem.
+ * About lru_deactivate_page, if the folio is redirtied,
+ * the flag will be reset. So no problem. but if the
+ * folio is used by readahead it will confuse readahead
+ * and make it restart the size rampup process. But it's
+ * a trivial problem.
*/
if (folio_test_reclaim(folio))
folio_clear_reclaim(folio);
- return mapping->a_ops->set_page_dirty(&folio->page);
+ return mapping->a_ops->dirty_folio(mapping, folio);
}
- if (!folio_test_dirty(folio)) {
- if (!folio_test_set_dirty(folio))
- return true;
- }
- return false;
+
+ return noop_dirty_folio(mapping, folio);
}
EXPORT_SYMBOL(folio_mark_dirty);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3589febc6d31..6e0b4596cde9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -128,7 +128,7 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
struct pagesets {
local_lock_t lock;
};
-static DEFINE_PER_CPU(struct pagesets, pagesets) = {
+static DEFINE_PER_CPU(struct pagesets, pagesets) __maybe_unused = {
.lock = INIT_LOCAL_LOCK(lock),
};
@@ -734,8 +734,7 @@ static void prep_compound_head(struct page *page, unsigned int order)
set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
set_compound_order(page, order);
atomic_set(compound_mapcount_ptr(page), -1);
- if (hpage_pincount_available(page))
- atomic_set(compound_pincount_ptr(page), 0);
+ atomic_set(compound_pincount_ptr(page), 0);
}
static void prep_compound_tail(struct page *head, int tail_idx)
@@ -1072,14 +1071,12 @@ static inline void __free_one_page(struct page *page,
int migratetype, fpi_t fpi_flags)
{
struct capture_control *capc = task_capc(zone);
+ unsigned int max_order = pageblock_order;
unsigned long buddy_pfn;
unsigned long combined_pfn;
- unsigned int max_order;
struct page *buddy;
bool to_tail;
- max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
-
VM_BUG_ON(!zone_is_initialized(zone));
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -1117,25 +1114,24 @@ continue_merging:
}
if (order < MAX_ORDER - 1) {
/* If we are here, it means order is >= pageblock_order.
- * We want to prevent merge between freepages on isolate
- * pageblock and normal pageblock. Without this, pageblock
- * isolation could cause incorrect freepage or CMA accounting.
+ * We want to prevent merge between freepages on pageblock
+ * without fallbacks and normal pageblock. Without this,
+ * pageblock isolation could cause incorrect freepage or CMA
+ * accounting or HIGHATOMIC accounting.
*
* We don't want to hit this code for the more frequent
* low-order merging.
*/
- if (unlikely(has_isolate_pageblock(zone))) {
- int buddy_mt;
+ int buddy_mt;
- buddy_pfn = __find_buddy_pfn(pfn, order);
- buddy = page + (buddy_pfn - pfn);
- buddy_mt = get_pageblock_migratetype(buddy);
+ buddy_pfn = __find_buddy_pfn(pfn, order);
+ buddy = page + (buddy_pfn - pfn);
+ buddy_mt = get_pageblock_migratetype(buddy);
- if (migratetype != buddy_mt
- && (is_migrate_isolate(migratetype) ||
- is_migrate_isolate(buddy_mt)))
- goto done_merging;
- }
+ if (migratetype != buddy_mt
+ && (!migratetype_is_mergeable(migratetype) ||
+ !migratetype_is_mergeable(buddy_mt)))
+ goto done_merging;
max_order = order + 1;
goto continue_merging;
}
@@ -1432,120 +1428,83 @@ static bool bulkfree_pcp_prepare(struct page *page)
}
#endif /* CONFIG_DEBUG_VM */
-static inline void prefetch_buddy(struct page *page)
-{
- unsigned long pfn = page_to_pfn(page);
- unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0);
- struct page *buddy = page + (buddy_pfn - pfn);
-
- prefetch(buddy);
-}
-
/*
* Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone.
* count is the number of pages to free.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct per_cpu_pages *pcp,
+ int pindex)
{
- int pindex = 0;
- int batch_free = 0;
- int nr_freed = 0;
+ int min_pindex = 0;
+ int max_pindex = NR_PCP_LISTS - 1;
unsigned int order;
- int prefetch_nr = READ_ONCE(pcp->batch);
bool isolated_pageblocks;
- struct page *page, *tmp;
- LIST_HEAD(head);
+ struct page *page;
/*
* Ensure proper count is passed which otherwise would stuck in the
* below while (list_empty(list)) loop.
*/
count = min(pcp->count, count);
+
+ /* Ensure requested pindex is drained first. */
+ pindex = pindex - 1;
+
+ /*
+ * local_lock_irq held so equivalent to spin_lock_irqsave for
+ * both PREEMPT_RT and non-PREEMPT_RT configurations.
+ */
+ spin_lock(&zone->lock);
+ isolated_pageblocks = has_isolate_pageblock(zone);
+
while (count > 0) {
struct list_head *list;
+ int nr_pages;
- /*
- * Remove pages from lists in a round-robin fashion. A
- * batch_free count is maintained that is incremented when an
- * empty list is encountered. This is so more pages are freed
- * off fuller lists instead of spinning excessively around empty
- * lists
- */
+ /* Remove pages from lists in a round-robin fashion. */
do {
- batch_free++;
- if (++pindex == NR_PCP_LISTS)
- pindex = 0;
+ if (++pindex > max_pindex)
+ pindex = min_pindex;
list = &pcp->lists[pindex];
- } while (list_empty(list));
+ if (!list_empty(list))
+ break;
- /* This is the only non-empty list. Free them all. */
- if (batch_free == NR_PCP_LISTS)
- batch_free = count;
+ if (pindex == max_pindex)
+ max_pindex--;
+ if (pindex == min_pindex)
+ min_pindex++;
+ } while (1);
order = pindex_to_order(pindex);
+ nr_pages = 1 << order;
BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
+ int mt;
+
page = list_last_entry(list, struct page, lru);
+ mt = get_pcppage_migratetype(page);
+
/* must delete to avoid corrupting pcp list */
list_del(&page->lru);
- nr_freed += 1 << order;
- count -= 1 << order;
+ count -= nr_pages;
+ pcp->count -= nr_pages;
if (bulkfree_pcp_prepare(page))
continue;
- /* Encode order with the migratetype */
- page->index <<= NR_PCP_ORDER_WIDTH;
- page->index |= order;
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
+ if (unlikely(isolated_pageblocks))
+ mt = get_pageblock_migratetype(page);
- list_add_tail(&page->lru, &head);
-
- /*
- * We are going to put the page back to the global
- * pool, prefetch its buddy to speed up later access
- * under zone->lock. It is believed the overhead of
- * an additional test and calculating buddy_pfn here
- * can be offset by reduced memory latency later. To
- * avoid excessive prefetching due to large count, only
- * prefetch buddy for the first pcp->batch nr of pages.
- */
- if (prefetch_nr) {
- prefetch_buddy(page);
- prefetch_nr--;
- }
- } while (count > 0 && --batch_free && !list_empty(list));
+ __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+ trace_mm_page_pcpu_drain(page, order, mt);
+ } while (count > 0 && !list_empty(list));
}
- pcp->count -= nr_freed;
-
- /*
- * local_lock_irq held so equivalent to spin_lock_irqsave for
- * both PREEMPT_RT and non-PREEMPT_RT configurations.
- */
- spin_lock(&zone->lock);
- isolated_pageblocks = has_isolate_pageblock(zone);
-
- /*
- * Use safe version since after __free_one_page(),
- * page->lru.next will not point to original list.
- */
- list_for_each_entry_safe(page, tmp, &head, lru) {
- int mt = get_pcppage_migratetype(page);
-
- /* mt has been encoded with the order (see above) */
- order = mt & NR_PCP_ORDER_MASK;
- mt >>= NR_PCP_ORDER_WIDTH;
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
- trace_mm_page_pcpu_drain(page, order, mt);
- }
spin_unlock(&zone->lock);
}
@@ -2260,19 +2219,8 @@ void __init init_cma_reserved_pageblock(struct page *page)
} while (++p, --i);
set_pageblock_migratetype(page, MIGRATE_CMA);
-
- if (pageblock_order >= MAX_ORDER) {
- i = pageblock_nr_pages;
- p = page;
- do {
- set_page_refcounted(p);
- __free_pages(p, MAX_ORDER - 1);
- p += MAX_ORDER_NR_PAGES;
- } while (i -= MAX_ORDER_NR_PAGES);
- } else {
- set_page_refcounted(page);
- __free_pages(page, pageblock_order);
- }
+ set_page_refcounted(page);
+ __free_pages(page, pageblock_order);
adjust_managed_page_count(page, pageblock_nr_pages);
page_zone(page)->cma_pages += pageblock_nr_pages;
@@ -2342,23 +2290,36 @@ static inline int check_new_page(struct page *page)
return 1;
}
+static bool check_new_pages(struct page *page, unsigned int order)
+{
+ int i;
+ for (i = 0; i < (1 << order); i++) {
+ struct page *p = page + i;
+
+ if (unlikely(check_new_page(p)))
+ return true;
+ }
+
+ return false;
+}
+
#ifdef CONFIG_DEBUG_VM
/*
* With DEBUG_VM enabled, order-0 pages are checked for expected state when
* being allocated from pcp lists. With debug_pagealloc also enabled, they are
* also checked when pcp lists are refilled from the free lists.
*/
-static inline bool check_pcp_refill(struct page *page)
+static inline bool check_pcp_refill(struct page *page, unsigned int order)
{
if (debug_pagealloc_enabled_static())
- return check_new_page(page);
+ return check_new_pages(page, order);
else
return false;
}
-static inline bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page, unsigned int order)
{
- return check_new_page(page);
+ return check_new_pages(page, order);
}
#else
/*
@@ -2366,32 +2327,19 @@ static inline bool check_new_pcp(struct page *page)
* when pcp lists are being refilled from the free lists. With debug_pagealloc
* enabled, they are also checked when being allocated from the pcp lists.
*/
-static inline bool check_pcp_refill(struct page *page)
+static inline bool check_pcp_refill(struct page *page, unsigned int order)
{
- return check_new_page(page);
+ return check_new_pages(page, order);
}
-static inline bool check_new_pcp(struct page *page)
+static inline bool check_new_pcp(struct page *page, unsigned int order)
{
if (debug_pagealloc_enabled_static())
- return check_new_page(page);
+ return check_new_pages(page, order);
else
return false;
}
#endif /* CONFIG_DEBUG_VM */
-static bool check_new_pages(struct page *page, unsigned int order)
-{
- int i;
- for (i = 0; i < (1 << order); i++) {
- struct page *p = page + i;
-
- if (unlikely(check_new_page(p)))
- return true;
- }
-
- return false;
-}
-
inline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
@@ -2479,17 +2427,13 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
/*
* This array describes the order lists are fallen back to when
* the free lists for the desirable migrate type are depleted
+ *
+ * The other migratetypes do not have fallbacks.
*/
static int fallbacks[MIGRATE_TYPES][3] = {
[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES },
-#ifdef CONFIG_CMA
- [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */
-#endif
-#ifdef CONFIG_MEMORY_ISOLATION
- [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */
-#endif
};
#ifdef CONFIG_CMA
@@ -2795,8 +2739,8 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
/* Yoink! */
mt = get_pageblock_migratetype(page);
- if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt)
- && !is_migrate_cma(mt)) {
+ /* Only reserve normal pageblocks (i.e., they can merge with others) */
+ if (migratetype_is_mergeable(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages;
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
@@ -3037,7 +2981,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
- if (unlikely(check_pcp_refill(page)))
+ if (unlikely(check_pcp_refill(page, order)))
continue;
/*
@@ -3086,7 +3030,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
- free_pcppages_bulk(zone, to_drain, pcp);
+ free_pcppages_bulk(zone, to_drain, pcp, 0);
local_unlock_irqrestore(&pagesets.lock, flags);
}
#endif
@@ -3107,7 +3051,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count)
- free_pcppages_bulk(zone, pcp->count, pcp);
+ free_pcppages_bulk(zone, pcp->count, pcp, 0);
local_unlock_irqrestore(&pagesets.lock, flags);
}
@@ -3330,10 +3274,15 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
return true;
}
-static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
+static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch,
+ bool free_high)
{
int min_nr_free, max_nr_free;
+ /* Free everything if batch freeing high-order pages. */
+ if (unlikely(free_high))
+ return pcp->count;
+
/* Check for PCP disabled or boot pageset */
if (unlikely(high < batch))
return 1;
@@ -3354,11 +3303,12 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
return batch;
}
-static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
+static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
+ bool free_high)
{
int high = READ_ONCE(pcp->high);
- if (unlikely(!high))
+ if (unlikely(!high || free_high))
return 0;
if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags))
@@ -3371,24 +3321,34 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
return min(READ_ONCE(pcp->batch) << 2, high);
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn,
- int migratetype, unsigned int order)
+static void free_unref_page_commit(struct page *page, int migratetype,
+ unsigned int order)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
int high;
int pindex;
+ bool free_high;
__count_vm_event(PGFREE);
pcp = this_cpu_ptr(zone->per_cpu_pageset);
pindex = order_to_pindex(migratetype, order);
list_add(&page->lru, &pcp->lists[pindex]);
pcp->count += 1 << order;
- high = nr_pcp_high(pcp, zone);
+
+ /*
+ * As high-order pages other than THP's stored on PCP can contribute
+ * to fragmentation, limit the number stored when PCP is heavily
+ * freeing without allocation. The remainder after bulk freeing
+ * stops will be drained from vmstat refresh context.
+ */
+ free_high = (pcp->free_factor && order && order <= PAGE_ALLOC_COSTLY_ORDER);
+
+ high = nr_pcp_high(pcp, zone, free_high);
if (pcp->count >= high) {
int batch = READ_ONCE(pcp->batch);
- free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
+ free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp, pindex);
}
}
@@ -3421,7 +3381,7 @@ void free_unref_page(struct page *page, unsigned int order)
}
local_lock_irqsave(&pagesets.lock, flags);
- free_unref_page_commit(page, pfn, migratetype, order);
+ free_unref_page_commit(page, migratetype, order);
local_unlock_irqrestore(&pagesets.lock, flags);
}
@@ -3431,13 +3391,13 @@ void free_unref_page(struct page *page, unsigned int order)
void free_unref_page_list(struct list_head *list)
{
struct page *page, *next;
- unsigned long flags, pfn;
+ unsigned long flags;
int batch_count = 0;
int migratetype;
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
- pfn = page_to_pfn(page);
+ unsigned long pfn = page_to_pfn(page);
if (!free_unref_page_prepare(page, pfn, 0)) {
list_del(&page->lru);
continue;
@@ -3453,15 +3413,10 @@ void free_unref_page_list(struct list_head *list)
free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
continue;
}
-
- set_page_private(page, pfn);
}
local_lock_irqsave(&pagesets.lock, flags);
list_for_each_entry_safe(page, next, list, lru) {
- pfn = page_private(page);
- set_page_private(page, 0);
-
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
@@ -3471,7 +3426,7 @@ void free_unref_page_list(struct list_head *list)
migratetype = MIGRATE_MOVABLE;
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn, migratetype, 0);
+ free_unref_page_commit(page, migratetype, 0);
/*
* Guard against excessive IRQ disabled times when we get
@@ -3545,8 +3500,11 @@ int __isolate_free_page(struct page *page, unsigned int order)
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
int mt = get_pageblock_migratetype(page);
- if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
- && !is_migrate_highatomic(mt))
+ /*
+ * Only change normal pageblocks (i.e., they can merge
+ * with others)
+ */
+ if (migratetype_is_mergeable(mt))
set_pageblock_migratetype(page,
MIGRATE_MOVABLE);
}
@@ -3641,7 +3599,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count -= 1 << order;
- } while (check_new_pcp(page));
+ } while (check_new_pcp(page, order));
return page;
}
@@ -3706,10 +3664,10 @@ struct page *rmqueue(struct zone *preferred_zone,
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
- spin_lock_irqsave(&zone->lock, flags);
do {
page = NULL;
+ spin_lock_irqsave(&zone->lock, flags);
/*
* order-0 request can reach here when the pcplist is skipped
* due to non-CMA allocation context. HIGHATOMIC area is
@@ -3721,15 +3679,15 @@ struct page *rmqueue(struct zone *preferred_zone,
if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
- if (!page)
+ if (!page) {
page = __rmqueue(zone, order, migratetype, alloc_flags);
- } while (page && check_new_pages(page, order));
- if (!page)
- goto failed;
-
- __mod_zone_freepage_state(zone, -(1 << order),
- get_pcppage_migratetype(page));
- spin_unlock_irqrestore(&zone->lock, flags);
+ if (!page)
+ goto failed;
+ }
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pcppage_migratetype(page));
+ spin_unlock_irqrestore(&zone->lock, flags);
+ } while (check_new_pages(page, order));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone, 1);
@@ -4595,13 +4553,12 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac)
{
unsigned int noreclaim_flag;
- unsigned long pflags, progress;
+ unsigned long progress;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
- psi_memstall_enter(&pflags);
fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
@@ -4610,7 +4567,6 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask);
- psi_memstall_leave(&pflags);
cond_resched();
@@ -4624,11 +4580,13 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
unsigned long *did_some_progress)
{
struct page *page = NULL;
+ unsigned long pflags;
bool drained = false;
+ psi_memstall_enter(&pflags);
*did_some_progress = __perform_reclaim(gfp_mask, order, ac);
if (unlikely(!(*did_some_progress)))
- return NULL;
+ goto out;
retry:
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
@@ -4644,6 +4602,8 @@ retry:
drained = true;
goto retry;
}
+out:
+ psi_memstall_leave(&pflags);
return page;
}
@@ -6380,7 +6340,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
#define BOOT_PAGESET_BATCH 1
static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
-static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void __build_all_zonelists(void *data)
{
@@ -6402,7 +6362,11 @@ static void __build_all_zonelists(void *data)
if (self && !node_online(self->node_id)) {
build_zonelists(self);
} else {
- for_each_online_node(nid) {
+ /*
+ * All possible nodes have pgdat preallocated
+ * in free_area_init
+ */
+ for_each_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
@@ -7389,16 +7353,15 @@ static inline void setup_usemap(struct zone *zone) {}
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
void __init set_pageblock_order(void)
{
- unsigned int order;
+ unsigned int order = MAX_ORDER - 1;
/* Check that pageblock_nr_pages has not already been setup */
if (pageblock_order)
return;
- if (HPAGE_SHIFT > PAGE_SHIFT)
+ /* Don't let pageblocks exceed the maximum allocation granularity. */
+ if (HPAGE_SHIFT > PAGE_SHIFT && HUGETLB_PAGE_ORDER < order)
order = HUGETLB_PAGE_ORDER;
- else
- order = MAX_ORDER - 1;
/*
* Assume the largest contiguous order of interest is a huge page.
@@ -7502,12 +7465,33 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx,
* NOTE: this function is only called during memory hotplug
*/
#ifdef CONFIG_MEMORY_HOTPLUG
-void __ref free_area_init_core_hotplug(int nid)
+void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
{
+ int nid = pgdat->node_id;
enum zone_type z;
- pg_data_t *pgdat = NODE_DATA(nid);
+ int cpu;
pgdat_init_internals(pgdat);
+
+ if (pgdat->per_cpu_nodestats == &boot_nodestats)
+ pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
+
+ /*
+ * Reset the nr_zones, order and highest_zoneidx before reuse.
+ * Note that kswapd will init kswapd_highest_zoneidx properly
+ * when it starts in the near future.
+ */
+ pgdat->nr_zones = 0;
+ pgdat->kswapd_order = 0;
+ pgdat->kswapd_highest_zoneidx = 0;
+ pgdat->node_start_pfn = 0;
+ for_each_online_cpu(cpu) {
+ struct per_cpu_nodestat *p;
+
+ p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
+ memset(p, 0, sizeof(*p));
+ }
+
for (z = 0; z < MAX_NR_ZONES; z++)
zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
}
@@ -7657,9 +7641,14 @@ static void __init free_area_init_node(int nid)
pgdat->node_start_pfn = start_pfn;
pgdat->per_cpu_nodestats = NULL;
- pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
- (u64)start_pfn << PAGE_SHIFT,
- end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
+ if (start_pfn != end_pfn) {
+ pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
+ (u64)start_pfn << PAGE_SHIFT,
+ end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
+ } else {
+ pr_info("Initmem setup node %d as memoryless\n", nid);
+ }
+
calculate_node_totalpages(pgdat, start_pfn, end_pfn);
alloc_node_mem_map(pgdat);
@@ -7668,7 +7657,7 @@ static void __init free_area_init_node(int nid)
free_area_init_core(pgdat);
}
-void __init free_area_init_memoryless_node(int nid)
+static void __init free_area_init_memoryless_node(int nid)
{
free_area_init_node(nid);
}
@@ -7972,10 +7961,17 @@ restart:
out2:
/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
- for (nid = 0; nid < MAX_NUMNODES; nid++)
+ for (nid = 0; nid < MAX_NUMNODES; nid++) {
+ unsigned long start_pfn, end_pfn;
+
zone_movable_pfn[nid] =
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ if (zone_movable_pfn[nid] >= end_pfn)
+ zone_movable_pfn[nid] = 0;
+ }
+
out:
/* restore the node_state */
node_states[N_MEMORY] = saved_node_state;
@@ -8096,8 +8092,36 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- for_each_online_node(nid) {
- pg_data_t *pgdat = NODE_DATA(nid);
+ for_each_node(nid) {
+ pg_data_t *pgdat;
+
+ if (!node_online(nid)) {
+ pr_info("Initializing node %d as memoryless\n", nid);
+
+ /* Allocator not initialized yet */
+ pgdat = arch_alloc_nodedata(nid);
+ if (!pgdat) {
+ pr_err("Cannot allocate %zuB for node %d.\n",
+ sizeof(*pgdat), nid);
+ continue;
+ }
+ arch_refresh_nodedata(nid, pgdat);
+ free_area_init_memoryless_node(nid);
+
+ /*
+ * We do not want to confuse userspace by sysfs
+ * files/directories for node without any memory
+ * attached to it, so this node is not marked as
+ * N_MEMORY and not marked online so that no sysfs
+ * hierarchy will be created via register_one_node for
+ * it. The pgdat will get fully initialized by
+ * hotadd_init_pgdat() when memory is hotplugged into
+ * this node.
+ */
+ continue;
+ }
+
+ pgdat = NODE_DATA(nid);
free_area_init_node(nid);
/* Any memory on that node */
@@ -8474,7 +8498,8 @@ static void __setup_per_zone_wmarks(void)
zone->watermark_boost = 0;
zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
- zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
+ zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp;
+ zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp;
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -8986,14 +9011,12 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
#ifdef CONFIG_CONTIG_ALLOC
static unsigned long pfn_max_align_down(unsigned long pfn)
{
- return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
- pageblock_nr_pages) - 1);
+ return ALIGN_DOWN(pfn, MAX_ORDER_NR_PAGES);
}
static unsigned long pfn_max_align_up(unsigned long pfn)
{
- return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
- pageblock_nr_pages));
+ return ALIGN(pfn, MAX_ORDER_NR_PAGES);
}
#if defined(CONFIG_DYNAMIC_DEBUG) || \
@@ -9452,6 +9475,7 @@ bool is_free_buddy_page(struct page *page)
return order < MAX_ORDER;
}
+EXPORT_SYMBOL(is_free_buddy_page);
#ifdef CONFIG_MEMORY_FAILURE
/*
diff --git a/mm/page_idle.c b/mm/page_idle.c
index edead6a8a5f9..fc0435abf909 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -13,6 +13,8 @@
#include <linux/page_ext.h>
#include <linux/page_idle.h>
+#include "internal.h"
+
#define BITMAP_CHUNK_SIZE sizeof(u64)
#define BITMAP_CHUNK_BITS (BITMAP_CHUNK_SIZE * BITS_PER_BYTE)
@@ -44,15 +46,11 @@ static struct page *page_idle_get_page(unsigned long pfn)
return page;
}
-static bool page_idle_clear_pte_refs_one(struct page *page,
+static bool page_idle_clear_pte_refs_one(struct folio *folio,
struct vm_area_struct *vma,
unsigned long addr, void *arg)
{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
bool referenced = false;
while (page_vma_mapped_walk(&pvmw)) {
@@ -74,41 +72,41 @@ static bool page_idle_clear_pte_refs_one(struct page *page,
}
if (referenced) {
- clear_page_idle(page);
+ folio_clear_idle(folio);
/*
* We cleared the referenced bit in a mapping to this page. To
* avoid interference with page reclaim, mark it young so that
- * page_referenced() will return > 0.
+ * folio_referenced() will return > 0.
*/
- set_page_young(page);
+ folio_set_young(folio);
}
return true;
}
static void page_idle_clear_pte_refs(struct page *page)
{
+ struct folio *folio = page_folio(page);
/*
* Since rwc.arg is unused, rwc is effectively immutable, so we
* can make it static const to save some cycles and stack.
*/
static const struct rmap_walk_control rwc = {
.rmap_one = page_idle_clear_pte_refs_one,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
bool need_lock;
- if (!page_mapped(page) ||
- !page_rmapping(page))
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio))
return;
- need_lock = !PageAnon(page) || PageKsm(page);
- if (need_lock && !trylock_page(page))
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio))
return;
- rmap_walk(page, (struct rmap_walk_control *)&rwc);
+ rmap_walk(folio, &rwc);
if (need_lock)
- unlock_page(page);
+ folio_unlock(folio);
}
static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,
diff --git a/mm/page_io.c b/mm/page_io.c
index 0bf8e40f4e57..b417f000b49e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,10 +338,10 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
return 0;
}
- bio = bio_alloc(GFP_NOIO, 1);
- bio_set_dev(bio, sis->bdev);
+ bio = bio_alloc(sis->bdev, 1,
+ REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
+ GFP_NOIO);
bio->bi_iter.bi_sector = swap_page_sector(page);
- bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
bio->bi_end_io = end_write_func;
bio_add_page(bio, page, thp_size(page), 0);
@@ -359,6 +359,7 @@ int swap_readpage(struct page *page, bool synchronous)
struct bio *bio;
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
+ bool workingset = PageWorkingset(page);
unsigned long pflags;
VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
@@ -370,7 +371,8 @@ int swap_readpage(struct page *page, bool synchronous)
* or the submitting cgroup IO-throttled, submission can be a
* significant part of overall IO time.
*/
- psi_memstall_enter(&pflags);
+ if (workingset)
+ psi_memstall_enter(&pflags);
delayacct_swapin_start();
if (frontswap_load(page) == 0) {
@@ -403,9 +405,7 @@ int swap_readpage(struct page *page, bool synchronous)
}
ret = 0;
- bio = bio_alloc(GFP_KERNEL, 1);
- bio_set_dev(bio, sis->bdev);
- bio->bi_opf = REQ_OP_READ;
+ bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = swap_page_sector(page);
bio->bi_end_io = end_swap_bio_read;
bio_add_page(bio, page, thp_size(page), 0);
@@ -433,21 +433,25 @@ int swap_readpage(struct page *page, bool synchronous)
bio_put(bio);
out:
- psi_memstall_leave(&pflags);
+ if (workingset)
+ psi_memstall_leave(&pflags);
delayacct_swapin_end();
return ret;
}
-int swap_set_page_dirty(struct page *page)
+bool swap_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- struct swap_info_struct *sis = page_swap_info(page);
+ struct swap_info_struct *sis = swp_swap_info(folio_swap_entry(folio));
if (data_race(sis->flags & SWP_FS_OPS)) {
- struct address_space *mapping = sis->swap_file->f_mapping;
+ const struct address_space_operations *aops;
+
+ mapping = sis->swap_file->f_mapping;
+ aops = mapping->a_ops;
- VM_BUG_ON_PAGE(!PageSwapCache(page), page);
- return mapping->a_ops->set_page_dirty(page);
+ VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
+ return aops->dirty_folio(mapping, folio);
} else {
- return __set_page_dirty_no_writeback(page);
+ return noop_dirty_folio(mapping, folio);
}
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 6a0ddda6b3c5..f67c4c70f17f 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -115,7 +115,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* onlining - just onlined memory won't immediately be considered for
* allocation.
*/
- if (!isolated_page && PageBuddy(page)) {
+ if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype, NULL);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 7504e7caa2a1..2458281bff89 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -23,15 +23,7 @@ EXPORT_SYMBOL(page_table_check_disabled);
static int __init early_page_table_check_param(char *buf)
{
- if (!buf)
- return -EINVAL;
-
- if (strcmp(buf, "on") == 0)
- __page_table_check_enabled = true;
- else if (strcmp(buf, "off") == 0)
- __page_table_check_enabled = false;
-
- return 0;
+ return strtobool(buf, &__page_table_check_enabled);
}
early_param("page_table_check", early_page_table_check_param);
@@ -86,8 +78,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
{
struct page_ext *page_ext;
struct page *page;
+ unsigned long i;
bool anon;
- int i;
if (!pfn_valid(pfn))
return;
@@ -121,8 +113,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
{
struct page_ext *page_ext;
struct page *page;
+ unsigned long i;
bool anon;
- int i;
if (!pfn_valid(pfn))
return;
@@ -152,10 +144,10 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
void __page_table_check_zero(struct page *page, unsigned int order)
{
struct page_ext *page_ext = lookup_page_ext(page);
- int i;
+ unsigned long i;
BUG_ON(!page_ext);
- for (i = 0; i < (1 << order); i++) {
+ for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);
BUG_ON(atomic_read(&ptc->anon_map_count));
@@ -206,17 +198,10 @@ EXPORT_SYMBOL(__page_table_check_pud_clear);
void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- pte_t old_pte;
-
if (&init_mm == mm)
return;
- old_pte = *ptep;
- if (pte_user_accessible_page(old_pte)) {
- page_table_check_clear(mm, addr, pte_pfn(old_pte),
- PAGE_SIZE >> PAGE_SHIFT);
- }
-
+ __page_table_check_pte_clear(mm, addr, *ptep);
if (pte_user_accessible_page(pte)) {
page_table_check_set(mm, addr, pte_pfn(pte),
PAGE_SIZE >> PAGE_SHIFT,
@@ -228,17 +213,10 @@ EXPORT_SYMBOL(__page_table_check_pte_set);
void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- pmd_t old_pmd;
-
if (&init_mm == mm)
return;
- old_pmd = *pmdp;
- if (pmd_user_accessible_page(old_pmd)) {
- page_table_check_clear(mm, addr, pmd_pfn(old_pmd),
- PMD_PAGE_SIZE >> PAGE_SHIFT);
- }
-
+ __page_table_check_pmd_clear(mm, addr, *pmdp);
if (pmd_user_accessible_page(pmd)) {
page_table_check_set(mm, addr, pmd_pfn(pmd),
PMD_PAGE_SIZE >> PAGE_SHIFT,
@@ -250,17 +228,10 @@ EXPORT_SYMBOL(__page_table_check_pmd_set);
void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- pud_t old_pud;
-
if (&init_mm == mm)
return;
- old_pud = *pudp;
- if (pud_user_accessible_page(old_pud)) {
- page_table_check_clear(mm, addr, pud_pfn(old_pud),
- PUD_PAGE_SIZE >> PAGE_SHIFT);
- }
-
+ __page_table_check_pud_clear(mm, addr, *pudp);
if (pud_user_accessible_page(pud)) {
page_table_check_set(mm, addr, pud_pfn(pud),
PUD_PAGE_SIZE >> PAGE_SHIFT,
@@ -268,3 +239,23 @@ void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
}
}
EXPORT_SYMBOL(__page_table_check_pud_set);
+
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t pmd)
+{
+ if (&init_mm == mm)
+ return;
+
+ if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
+ pte_t *ptep = pte_offset_map(&pmd, addr);
+ unsigned long i;
+
+ pte_unmap(ptep);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ __page_table_check_pte_clear(mm, addr, *ptep);
+ addr += PAGE_SIZE;
+ ptep++;
+ }
+ }
+}
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index f7b331081791..1187f9c1ec5b 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -53,18 +53,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
return true;
}
-static inline bool pfn_is_match(struct page *page, unsigned long pfn)
-{
- unsigned long page_pfn = page_to_pfn(page);
-
- /* normal page and hugetlbfs page */
- if (!PageTransCompound(page) || PageHuge(page))
- return page_pfn == pfn;
-
- /* THP can be referenced by any subpage */
- return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page);
-}
-
/**
* check_pte - check if @pvmw->page is mapped at the @pvmw->pte
* @pvmw: page_vma_mapped_walk struct, includes a pair pte and page for checking
@@ -116,7 +104,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
pfn = pte_pfn(*pvmw->pte);
}
- return pfn_is_match(pvmw->page, pfn);
+ return (pfn - pvmw->pfn) < pvmw->nr_pages;
+}
+
+/* Returns true if the two ranges overlap. Careful to not overflow. */
+static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
+{
+ if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
+ return false;
+ if (pfn > pvmw->pfn + pvmw->nr_pages - 1)
+ return false;
+ return true;
}
static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
@@ -127,7 +125,7 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
}
/**
- * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+ * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
* @pvmw->address
* @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
* must be set. pmd, pte and ptl must be NULL.
@@ -152,8 +150,8 @@ static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
*/
bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
{
- struct mm_struct *mm = pvmw->vma->vm_mm;
- struct page *page = pvmw->page;
+ struct vm_area_struct *vma = pvmw->vma;
+ struct mm_struct *mm = vma->vm_mm;
unsigned long end;
pgd_t *pgd;
p4d_t *p4d;
@@ -164,32 +162,26 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw);
- if (unlikely(PageHuge(page))) {
+ if (unlikely(is_vm_hugetlb_page(vma))) {
+ unsigned long size = pvmw->nr_pages * PAGE_SIZE;
/* The only possible mapping was handled on last iteration */
if (pvmw->pte)
return not_found(pvmw);
/* when pud is not present, pte will be NULL */
- pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+ pvmw->pte = huge_pte_offset(mm, pvmw->address, size);
if (!pvmw->pte)
return false;
- pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
+ pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm,
+ pvmw->pte);
spin_lock(pvmw->ptl);
if (!check_pte(pvmw))
return not_found(pvmw);
return true;
}
- /*
- * Seek to next pte only makes sense for THP.
- * But more important than that optimization, is to filter out
- * any PageKsm page: whose page->index misleads vma_address()
- * and vma_address_end() to disaster.
- */
- end = PageTransCompound(page) ?
- vma_address_end(page, pvmw->vma) :
- pvmw->address + PAGE_SIZE;
+ end = vma_address_end(pvmw);
if (pvmw->pte)
goto next_pte;
restart:
@@ -224,7 +216,7 @@ restart:
if (likely(pmd_trans_huge(pmde))) {
if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw);
- if (pmd_page(pmde) != page)
+ if (!check_pmd(pmd_pfn(pmde), pvmw))
return not_found(pvmw);
return true;
}
@@ -236,7 +228,7 @@ restart:
return not_found(pvmw);
entry = pmd_to_swp_entry(pmde);
if (!is_migration_entry(entry) ||
- pfn_swap_entry_to_page(entry) != page)
+ !check_pmd(swp_offset(entry), pvmw))
return not_found(pvmw);
return true;
}
@@ -250,7 +242,8 @@ restart:
* cleared *pmd but not decremented compound_mapcount().
*/
if ((pvmw->flags & PVMW_SYNC) &&
- PageTransCompound(page)) {
+ transparent_hugepage_active(vma) &&
+ (pvmw->nr_pages >= HPAGE_PMD_NR)) {
spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
spin_unlock(ptl);
@@ -307,7 +300,8 @@ next_pte:
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
struct page_vma_mapped_walk pvmw = {
- .page = page,
+ .pfn = page_to_pfn(page),
+ .nr_pages = 1,
.vma = vma,
.flags = PVMW_SYNC,
};
diff --git a/mm/ptdump.c b/mm/ptdump.c
index da751448d0e4..eea3d28d173c 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -40,8 +40,10 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
if (st->effective_prot)
st->effective_prot(st, 0, pgd_val(val));
- if (pgd_leaf(val))
+ if (pgd_leaf(val)) {
st->note_page(st, addr, 0, pgd_val(val));
+ walk->action = ACTION_CONTINUE;
+ }
return 0;
}
@@ -61,8 +63,10 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
if (st->effective_prot)
st->effective_prot(st, 1, p4d_val(val));
- if (p4d_leaf(val))
+ if (p4d_leaf(val)) {
st->note_page(st, addr, 1, p4d_val(val));
+ walk->action = ACTION_CONTINUE;
+ }
return 0;
}
@@ -82,8 +86,10 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
if (st->effective_prot)
st->effective_prot(st, 2, pud_val(val));
- if (pud_leaf(val))
+ if (pud_leaf(val)) {
st->note_page(st, addr, 2, pud_val(val));
+ walk->action = ACTION_CONTINUE;
+ }
return 0;
}
@@ -101,8 +107,10 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
if (st->effective_prot)
st->effective_prot(st, 3, pmd_val(val));
- if (pmd_leaf(val))
+ if (pmd_leaf(val)) {
st->note_page(st, addr, 3, pmd_val(val));
+ walk->action = ACTION_CONTINUE;
+ }
return 0;
}
diff --git a/mm/readahead.c b/mm/readahead.c
index cf0dcf89eb69..d3a47546d17d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -8,6 +8,111 @@
* Initial version.
*/
+/**
+ * DOC: Readahead Overview
+ *
+ * Readahead is used to read content into the page cache before it is
+ * explicitly requested by the application. Readahead only ever
+ * attempts to read pages that are not yet in the page cache. If a
+ * page is present but not up-to-date, readahead will not try to read
+ * it. In that case a simple ->readpage() will be requested.
+ *
+ * Readahead is triggered when an application read request (whether a
+ * systemcall or a page fault) finds that the requested page is not in
+ * the page cache, or that it is in the page cache and has the
+ * %PG_readahead flag set. This flag indicates that the page was loaded
+ * as part of a previous read-ahead request and now that it has been
+ * accessed, it is time for the next read-ahead.
+ *
+ * Each readahead request is partly synchronous read, and partly async
+ * read-ahead. This is reflected in the struct file_ra_state which
+ * contains ->size being to total number of pages, and ->async_size
+ * which is the number of pages in the async section. The first page in
+ * this async section will have %PG_readahead set as a trigger for a
+ * subsequent read ahead. Once a series of sequential reads has been
+ * established, there should be no need for a synchronous component and
+ * all read ahead request will be fully asynchronous.
+ *
+ * When either of the triggers causes a readahead, three numbers need to
+ * be determined: the start of the region, the size of the region, and
+ * the size of the async tail.
+ *
+ * The start of the region is simply the first page address at or after
+ * the accessed address, which is not currently populated in the page
+ * cache. This is found with a simple search in the page cache.
+ *
+ * The size of the async tail is determined by subtracting the size that
+ * was explicitly requested from the determined request size, unless
+ * this would be less than zero - then zero is used. NOTE THIS
+ * CALCULATION IS WRONG WHEN THE START OF THE REGION IS NOT THE ACCESSED
+ * PAGE.
+ *
+ * The size of the region is normally determined from the size of the
+ * previous readahead which loaded the preceding pages. This may be
+ * discovered from the struct file_ra_state for simple sequential reads,
+ * or from examining the state of the page cache when multiple
+ * sequential reads are interleaved. Specifically: where the readahead
+ * was triggered by the %PG_readahead flag, the size of the previous
+ * readahead is assumed to be the number of pages from the triggering
+ * page to the start of the new readahead. In these cases, the size of
+ * the previous readahead is scaled, often doubled, for the new
+ * readahead, though see get_next_ra_size() for details.
+ *
+ * If the size of the previous read cannot be determined, the number of
+ * preceding pages in the page cache is used to estimate the size of
+ * a previous read. This estimate could easily be misled by random
+ * reads being coincidentally adjacent, so it is ignored unless it is
+ * larger than the current request, and it is not scaled up, unless it
+ * is at the start of file.
+ *
+ * In general read ahead is accelerated at the start of the file, as
+ * reads from there are often sequential. There are other minor
+ * adjustments to the read ahead size in various special cases and these
+ * are best discovered by reading the code.
+ *
+ * The above calculation determines the readahead, to which any requested
+ * read size may be added.
+ *
+ * Readahead requests are sent to the filesystem using the ->readahead()
+ * address space operation, for which mpage_readahead() is a canonical
+ * implementation. ->readahead() should normally initiate reads on all
+ * pages, but may fail to read any or all pages without causing an IO
+ * error. The page cache reading code will issue a ->readpage() request
+ * for any page which ->readahead() does not provided, and only an error
+ * from this will be final.
+ *
+ * ->readahead() will generally call readahead_page() repeatedly to get
+ * each page from those prepared for read ahead. It may fail to read a
+ * page by:
+ *
+ * * not calling readahead_page() sufficiently many times, effectively
+ * ignoring some pages, as might be appropriate if the path to
+ * storage is congested.
+ *
+ * * failing to actually submit a read request for a given page,
+ * possibly due to insufficient resources, or
+ *
+ * * getting an error during subsequent processing of a request.
+ *
+ * In the last two cases, the page should be unlocked to indicate that
+ * the read attempt has failed. In the first case the page will be
+ * unlocked by the caller.
+ *
+ * Those pages not in the final ``async_size`` of the request should be
+ * considered to be important and ->readahead() should not fail them due
+ * to congestion or temporary resource unavailability, but should wait
+ * for necessary resources (e.g. memory or indexing information) to
+ * become available. Pages in the final ``async_size`` may be
+ * considered less urgent and failure to read them is more acceptable.
+ * In this case it is best to use delete_from_page_cache() to remove the
+ * pages from the page cache as is automatically done for pages that
+ * were not fetched with readahead_page(). This will allow a
+ * subsequent synchronous read ahead request to try them again. If they
+ * are left in the page cache, then they will be read individually using
+ * ->readpage().
+ *
+ */
+
#include <linux/kernel.h>
#include <linux/dax.h>
#include <linux/gfp.h>
@@ -51,7 +156,7 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping,
if (!trylock_page(page))
BUG();
page->mapping = mapping;
- do_invalidatepage(page, 0, PAGE_SIZE);
+ folio_invalidate(page_folio(page), 0, PAGE_SIZE);
page->mapping = NULL;
unlock_page(page);
}
@@ -127,8 +232,17 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
if (aops->readahead) {
aops->readahead(rac);
- /* Clean up the remaining pages */
+ /*
+ * Clean up the remaining pages. The sizes in ->ra
+ * maybe be used to size next read-ahead, so make sure
+ * they accurately reflect what happened.
+ */
while ((page = readahead_page(rac))) {
+ rac->ra->size -= 1;
+ if (rac->ra->async_size > 0) {
+ rac->ra->async_size -= 1;
+ delete_from_page_cache(page);
+ }
unlock_page(page);
put_page(page);
}
@@ -148,7 +262,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
blk_finish_plug(&plug);
- BUG_ON(!list_empty(pages));
+ BUG_ON(pages && !list_empty(pages));
BUG_ON(readahead_count(rac));
out:
@@ -247,7 +361,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
* behaviour which would occur if page allocations are causing VM writeback.
* We really don't want to intermingle reads and writes like that.
*/
-void do_page_cache_ra(struct readahead_control *ractl,
+static void do_page_cache_ra(struct readahead_control *ractl,
unsigned long nr_to_read, unsigned long lookahead_size)
{
struct inode *inode = ractl->mapping->host;
@@ -432,10 +546,102 @@ static int try_context_readahead(struct address_space *mapping,
}
/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
+static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+ pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+ int err;
+ struct folio *folio = filemap_alloc_folio(gfp, order);
+
+ if (!folio)
+ return -ENOMEM;
+ if (mark - index < (1UL << order))
+ folio_set_readahead(folio);
+ err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+ if (err)
+ folio_put(folio);
+ else
+ ractl->_nr_pages += 1UL << order;
+ return err;
+}
+
+void page_cache_ra_order(struct readahead_control *ractl,
+ struct file_ra_state *ra, unsigned int new_order)
+{
+ struct address_space *mapping = ractl->mapping;
+ pgoff_t index = readahead_index(ractl);
+ pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+ pgoff_t mark = index + ra->size - ra->async_size;
+ int err = 0;
+ gfp_t gfp = readahead_gfp_mask(mapping);
+
+ if (!mapping_large_folio_support(mapping) || ra->size < 4)
+ goto fallback;
+
+ limit = min(limit, index + ra->size - 1);
+
+ if (new_order < MAX_PAGECACHE_ORDER) {
+ new_order += 2;
+ if (new_order > MAX_PAGECACHE_ORDER)
+ new_order = MAX_PAGECACHE_ORDER;
+ while ((1 << new_order) > ra->size)
+ new_order--;
+ }
+
+ while (index <= limit) {
+ unsigned int order = new_order;
+
+ /* Align with smaller pages if needed */
+ if (index & ((1UL << order) - 1)) {
+ order = __ffs(index);
+ if (order == 1)
+ order = 0;
+ }
+ /* Don't allocate pages past EOF */
+ while (index + (1UL << order) - 1 > limit) {
+ if (--order == 1)
+ order = 0;
+ }
+ err = ra_alloc_folio(ractl, index, mark, order, gfp);
+ if (err)
+ break;
+ index += 1UL << order;
+ }
+
+ if (index > limit) {
+ ra->size += index - limit - 1;
+ ra->async_size += index - limit - 1;
+ }
+
+ read_pages(ractl, NULL, false);
+
+ /*
+ * If there were already pages in the page cache, then we may have
+ * left some gaps. Let the regular readahead code take care of this
+ * situation.
+ */
+ if (!err)
+ return;
+fallback:
+ do_page_cache_ra(ractl, ra->size, ra->async_size);
+}
+
+/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
static void ondemand_readahead(struct readahead_control *ractl,
- bool hit_readahead_marker, unsigned long req_size)
+ struct folio *folio, unsigned long req_size)
{
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
struct file_ra_state *ra = ractl->ra;
@@ -470,12 +676,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
}
/*
- * Hit a marked page without valid readahead state.
+ * Hit a marked folio without valid readahead state.
* E.g. interleaved reads.
* Query the pagecache for async_size, which normally equals to
* readahead size. Ramp it up and use it as the new readahead size.
*/
- if (hit_readahead_marker) {
+ if (folio) {
pgoff_t start;
rcu_read_lock();
@@ -548,7 +754,7 @@ readit:
}
ractl->_index = ra->start;
- do_page_cache_ra(ractl, ra->size, ra->async_size);
+ page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
}
void page_cache_sync_ra(struct readahead_control *ractl,
@@ -576,7 +782,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
}
/* do read-ahead */
- ondemand_readahead(ractl, false, req_count);
+ ondemand_readahead(ractl, NULL, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
@@ -595,17 +801,11 @@ void page_cache_async_ra(struct readahead_control *ractl,
folio_clear_readahead(folio);
- /*
- * Defer asynchronous read-ahead on IO congestion.
- */
- if (inode_read_congested(ractl->mapping->host))
- return;
-
if (blk_cgroup_congested())
return;
/* do read-ahead */
- ondemand_readahead(ractl, true, req_count);
+ ondemand_readahead(ractl, folio, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_async_ra);
diff --git a/mm/rmap.c b/mm/rmap.c
index 6a1e8c7f6213..615b5d323ee2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -31,8 +31,8 @@
* mm->page_table_lock or pte_lock
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
- * mapping->private_lock (in __set_page_dirty_buffers)
- * lock_page_memcg move_lock (in __set_page_dirty_buffers)
+ * mapping->private_lock (in block_dirty_folio)
+ * folio_lock_memcg move_lock (in block_dirty_folio)
* i_pages lock (widely used)
* lruvec->lru_lock (in folio_lruvec_lock_irq)
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
@@ -107,15 +107,15 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
VM_BUG_ON(atomic_read(&anon_vma->refcount));
/*
- * Synchronize against page_lock_anon_vma_read() such that
+ * Synchronize against folio_lock_anon_vma_read() such that
* we can safely hold the lock without the anon_vma getting
* freed.
*
* Relies on the full mb implied by the atomic_dec_and_test() from
* put_anon_vma() against the acquire barrier implied by
- * down_read_trylock() from page_lock_anon_vma_read(). This orders:
+ * down_read_trylock() from folio_lock_anon_vma_read(). This orders:
*
- * page_lock_anon_vma_read() VS put_anon_vma()
+ * folio_lock_anon_vma_read() VS put_anon_vma()
* down_read_trylock() atomic_dec_and_test()
* LOCK MB
* atomic_read() rwsem_is_locked()
@@ -168,7 +168,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
* allocate a new one.
*
* Anon-vma allocations are very subtle, because we may have
- * optimistically looked up an anon_vma in page_lock_anon_vma_read()
+ * optimistically looked up an anon_vma in folio_lock_anon_vma_read()
* and that may actually touch the rwsem even in the newly
* allocated vma (it depends on RCU to make sure that the
* anon_vma isn't actually destroyed).
@@ -526,28 +526,28 @@ out:
* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
* reference like with page_get_anon_vma() and then block on the mutex.
*/
-struct anon_vma *page_lock_anon_vma_read(struct page *page)
+struct anon_vma *folio_lock_anon_vma_read(struct folio *folio)
{
struct anon_vma *anon_vma = NULL;
struct anon_vma *root_anon_vma;
unsigned long anon_mapping;
rcu_read_lock();
- anon_mapping = (unsigned long)READ_ONCE(page->mapping);
+ anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
- if (!page_mapped(page))
+ if (!folio_mapped(folio))
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
root_anon_vma = READ_ONCE(anon_vma->root);
if (down_read_trylock(&root_anon_vma->rwsem)) {
/*
- * If the page is still mapped, then this anon_vma is still
+ * If the folio is still mapped, then this anon_vma is still
* its anon_vma, and holding the mutex ensures that it will
* not go away, see anon_vma_free().
*/
- if (!page_mapped(page)) {
+ if (!folio_mapped(folio)) {
up_read(&root_anon_vma->rwsem);
anon_vma = NULL;
}
@@ -560,7 +560,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
goto out;
}
- if (!page_mapped(page)) {
+ if (!folio_mapped(folio)) {
rcu_read_unlock();
put_anon_vma(anon_vma);
return NULL;
@@ -737,8 +737,9 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- if (PageAnon(page)) {
- struct anon_vma *page__anon_vma = page_anon_vma(page);
+ struct folio *folio = page_folio(page);
+ if (folio_test_anon(folio)) {
+ struct anon_vma *page__anon_vma = folio_anon_vma(folio);
/*
* Note: swapoff's unuse_vma() is more efficient with this
* check, and needs it to match anon_vma when KSM is active.
@@ -748,7 +749,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
return -EFAULT;
} else if (!vma->vm_file) {
return -EFAULT;
- } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
+ } else if (vma->vm_file->f_mapping != folio->mapping) {
return -EFAULT;
}
@@ -789,30 +790,29 @@ out:
return pmd;
}
-struct page_referenced_arg {
+struct folio_referenced_arg {
int mapcount;
int referenced;
unsigned long vm_flags;
struct mem_cgroup *memcg;
};
/*
- * arg: page_referenced_arg will be passed
+ * arg: folio_referenced_arg will be passed
*/
-static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
- unsigned long address, void *arg)
+static bool folio_referenced_one(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long address, void *arg)
{
- struct page_referenced_arg *pra = arg;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
+ struct folio_referenced_arg *pra = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
int referenced = 0;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
- if (vma->vm_flags & VM_LOCKED) {
+ if ((vma->vm_flags & VM_LOCKED) &&
+ (!folio_test_large(folio) || !pvmw.pte)) {
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma, !pvmw.pte);
page_vma_mapped_walk_done(&pvmw);
pra->vm_flags |= VM_LOCKED;
return false; /* To break the loop */
@@ -824,10 +824,10 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
/*
* Don't treat a reference through
* a sequentially read mapping as such.
- * If the page has been used in another mapping,
+ * If the folio has been used in another mapping,
* we will catch it; if this other mapping is
* already gone, the unmap path will have set
- * PG_referenced or activated the page.
+ * the referenced flag or activated the folio.
*/
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
referenced++;
@@ -837,7 +837,7 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
pvmw.pmd))
referenced++;
} else {
- /* unexpected pmd-mapped page? */
+ /* unexpected pmd-mapped folio? */
WARN_ON_ONCE(1);
}
@@ -845,13 +845,13 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
}
if (referenced)
- clear_page_idle(page);
- if (test_and_clear_page_young(page))
+ folio_clear_idle(folio);
+ if (folio_test_clear_young(folio))
referenced++;
if (referenced) {
pra->referenced++;
- pra->vm_flags |= vma->vm_flags;
+ pra->vm_flags |= vma->vm_flags & ~VM_LOCKED;
}
if (!pra->mapcount)
@@ -860,9 +860,9 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
return true;
}
-static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
+static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
{
- struct page_referenced_arg *pra = arg;
+ struct folio_referenced_arg *pra = arg;
struct mem_cgroup *memcg = pra->memcg;
if (!mm_match_cgroup(vma->vm_mm, memcg))
@@ -872,40 +872,39 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
}
/**
- * page_referenced - test if the page was referenced
- * @page: the page to test
- * @is_locked: caller holds lock on the page
+ * folio_referenced() - Test if the folio was referenced.
+ * @folio: The folio to test.
+ * @is_locked: Caller holds lock on the folio.
* @memcg: target memory cgroup
- * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
+ * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
*
- * Quick test_and_clear_referenced for all mappings to a page,
- * returns the number of ptes which referenced the page.
+ * Quick test_and_clear_referenced for all mappings of a folio,
+ *
+ * Return: The number of mappings which referenced the folio.
*/
-int page_referenced(struct page *page,
- int is_locked,
- struct mem_cgroup *memcg,
- unsigned long *vm_flags)
+int folio_referenced(struct folio *folio, int is_locked,
+ struct mem_cgroup *memcg, unsigned long *vm_flags)
{
int we_locked = 0;
- struct page_referenced_arg pra = {
- .mapcount = total_mapcount(page),
+ struct folio_referenced_arg pra = {
+ .mapcount = folio_mapcount(folio),
.memcg = memcg,
};
struct rmap_walk_control rwc = {
- .rmap_one = page_referenced_one,
+ .rmap_one = folio_referenced_one,
.arg = (void *)&pra,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
*vm_flags = 0;
if (!pra.mapcount)
return 0;
- if (!page_rmapping(page))
+ if (!folio_raw_mapping(folio))
return 0;
- if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
- we_locked = trylock_page(page);
+ if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) {
+ we_locked = folio_trylock(folio);
if (!we_locked)
return 1;
}
@@ -916,37 +915,32 @@ int page_referenced(struct page *page,
* cgroups
*/
if (memcg) {
- rwc.invalid_vma = invalid_page_referenced_vma;
+ rwc.invalid_vma = invalid_folio_referenced_vma;
}
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
*vm_flags = pra.vm_flags;
if (we_locked)
- unlock_page(page);
+ folio_unlock(folio);
return pra.referenced;
}
-static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
+static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- .flags = PVMW_SYNC,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);
struct mmu_notifier_range range;
int *cleaned = arg;
/*
* We have to assume the worse case ie pmd for invalidation. Note that
- * the page can not be free from this function.
+ * the folio can not be freed from this function.
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
0, vma, vma->vm_mm, address,
- vma_address_end(page, vma));
+ vma_address_end(&pvmw));
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
@@ -974,14 +968,14 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
continue;
- flush_cache_page(vma, address, page_to_pfn(page));
+ flush_cache_page(vma, address, folio_pfn(folio));
entry = pmdp_invalidate(vma, address, pmd);
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
ret = 1;
#else
- /* unexpected pmd-mapped page? */
+ /* unexpected pmd-mapped folio? */
WARN_ON_ONCE(1);
#endif
}
@@ -1029,7 +1023,7 @@ int folio_mkclean(struct folio *folio)
if (!mapping)
return 0;
- rmap_walk(&folio->page, &rwc);
+ rmap_walk(folio, &rwc);
return cleaned;
}
@@ -1057,8 +1051,8 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
/*
* Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
- * simultaneously, so a concurrent reader (eg page_referenced()'s
- * PageAnon()) will not see one without the other.
+ * simultaneously, so a concurrent reader (eg folio_referenced()'s
+ * folio_test_anon()) will not see one without the other.
*/
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
}
@@ -1108,6 +1102,7 @@ static void __page_set_anon_rmap(struct page *page,
static void __page_check_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
+ struct folio *folio = page_folio(page);
/*
* The page's anon-rmap details (mapping and index) are guaranteed to
* be set up correctly at this point.
@@ -1119,7 +1114,8 @@ static void __page_check_anon_rmap(struct page *page,
* are initially only visible via the pagetables, and the pte is locked
* over the call to page_add_new_anon_rmap.
*/
- VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
+ VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
+ folio);
VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
page);
}
@@ -1181,17 +1177,17 @@ void do_page_add_anon_rmap(struct page *page,
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
}
- if (unlikely(PageKsm(page))) {
+ if (unlikely(PageKsm(page)))
unlock_page_memcg(page);
- return;
- }
/* address might be in next vma when migration races vma_adjust */
- if (first)
+ else if (first)
__page_set_anon_rmap(page, vma, address,
flags & RMAP_EXCLUSIVE);
else
__page_check_anon_rmap(page, vma, address);
+
+ mlock_vma_page(page, vma, compound);
}
/**
@@ -1216,8 +1212,7 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
- if (hpage_pincount_available(page))
- atomic_set(compound_pincount_ptr(page), 0);
+ atomic_set(compound_pincount_ptr(page), 0);
__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
} else {
@@ -1232,12 +1227,14 @@ void page_add_new_anon_rmap(struct page *page,
/**
* page_add_file_rmap - add pte mapping to a file page
- * @page: the page to add the mapping to
- * @compound: charge the page as compound or small page
+ * @page: the page to add the mapping to
+ * @vma: the vm area in which the mapping is added
+ * @compound: charge the page as compound or small page
*
* The caller needs to hold the pte lock.
*/
-void page_add_file_rmap(struct page *page, bool compound)
+void page_add_file_rmap(struct page *page,
+ struct vm_area_struct *vma, bool compound)
{
int i, nr = 1;
@@ -1252,6 +1249,17 @@ void page_add_file_rmap(struct page *page, bool compound)
}
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
+
+ /*
+ * It is racy to ClearPageDoubleMap in page_remove_file_rmap();
+ * but page lock is held by all page_add_file_rmap() compound
+ * callers, and SetPageDoubleMap below warns if !PageLocked:
+ * so here is a place that DoubleMap can be safely cleared.
+ */
+ VM_WARN_ON_ONCE(!PageLocked(page));
+ if (nr == nr_pages && PageDoubleMap(page))
+ ClearPageDoubleMap(page);
+
if (PageSwapBacked(page))
__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
nr_pages);
@@ -1260,13 +1268,8 @@ void page_add_file_rmap(struct page *page, bool compound)
nr_pages);
} else {
if (PageTransCompound(page) && page_mapping(page)) {
- struct page *head = compound_head(page);
-
VM_WARN_ON_ONCE(!PageLocked(page));
-
- SetPageDoubleMap(head);
- if (PageMlocked(page))
- clear_page_mlock(head);
+ SetPageDoubleMap(compound_head(page));
}
if (!atomic_inc_and_test(&page->_mapcount))
goto out;
@@ -1274,6 +1277,8 @@ void page_add_file_rmap(struct page *page, bool compound)
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
out:
unlock_page_memcg(page);
+
+ mlock_vma_page(page, vma, compound);
}
static void page_remove_file_rmap(struct page *page, bool compound)
@@ -1316,9 +1321,6 @@ static void page_remove_file_rmap(struct page *page, bool compound)
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
-
- if (unlikely(PageMlocked(page)))
- clear_page_mlock(page);
}
static void page_remove_anon_compound_rmap(struct page *page)
@@ -1358,9 +1360,6 @@ static void page_remove_anon_compound_rmap(struct page *page)
nr = thp_nr_pages(page);
}
- if (unlikely(PageMlocked(page)))
- clear_page_mlock(page);
-
if (nr)
__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
}
@@ -1368,11 +1367,13 @@ static void page_remove_anon_compound_rmap(struct page *page)
/**
* page_remove_rmap - take down pte mapping from a page
* @page: page to remove mapping from
+ * @vma: the vm area from which the mapping is removed
* @compound: uncharge the page as compound or small page
*
* The caller needs to hold the pte lock.
*/
-void page_remove_rmap(struct page *page, bool compound)
+void page_remove_rmap(struct page *page,
+ struct vm_area_struct *vma, bool compound)
{
lock_page_memcg(page);
@@ -1397,9 +1398,6 @@ void page_remove_rmap(struct page *page, bool compound)
*/
__dec_lruvec_page_state(page, NR_ANON_MAPPED);
- if (unlikely(PageMlocked(page)))
- clear_page_mlock(page);
-
if (PageTransCompound(page))
deferred_split_huge_page(compound_head(page));
@@ -1414,20 +1412,18 @@ void page_remove_rmap(struct page *page, bool compound)
*/
out:
unlock_page_memcg(page);
+
+ munlock_vma_page(page, vma, compound);
}
/*
* @arg: enum ttu_flags will be passed to this argument
*/
-static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
pte_t pteval;
struct page *subpage;
bool ret = true;
@@ -1444,21 +1440,20 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pvmw.flags = PVMW_SYNC;
if (flags & TTU_SPLIT_HUGE_PMD)
- split_huge_pmd_address(vma, address, false, page);
+ split_huge_pmd_address(vma, address, false, folio);
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
* For hugetlb, it could be much worse if we need to do pud
* invalidation in the case of pmd sharing.
*
- * Note that the page can not be free in this function as call of
- * try_to_unmap() must hold a reference on the page.
+ * Note that the folio can not be freed in this function as call of
+ * try_to_unmap() must hold a reference on the folio.
*/
- range.end = PageKsm(page) ?
- address + PAGE_SIZE : vma_address_end(page, vma);
+ range.end = vma_address_end(&pvmw);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address, range.end);
- if (PageHuge(page)) {
+ if (folio_test_hugetlb(folio)) {
/*
* If sharing is possible, start and end will be adjusted
* accordingly.
@@ -1469,32 +1464,26 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_FOLIO(!pvmw.pte, folio);
+
/*
- * If the page is mlock()d, we cannot swap it out.
+ * If the folio is in an mlock()d vma, we must not swap it out.
*/
if (!(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED)) {
- /*
- * PTE-mapped THP are never marked as mlocked: so do
- * not set it on a DoubleMap THP, nor on an Anon THP
- * (which may still be PTE-mapped after DoubleMap was
- * cleared). But stop unmapping even in those cases.
- */
- if (!PageTransCompound(page) || (PageHead(page) &&
- !PageDoubleMap(page) && !PageAnon(page)))
- mlock_vma_page(page);
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma, false);
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
}
- /* Unexpected PMD-mapped THP? */
- VM_BUG_ON_PAGE(!pvmw.pte, page);
-
- subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
address = pvmw.address;
- if (PageHuge(page) && !PageAnon(page)) {
+ if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
* To call huge_pmd_unshare, i_mmap_rwsem must be
* held in write mode. Caller needs to explicitly
@@ -1533,7 +1522,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (should_defer_flush(mm, flags)) {
/*
* We clear the PTE but do not flush so potentially
- * a remote CPU could still be writing to the page.
+ * a remote CPU could still be writing to the folio.
* If the entry was previously clean then the
* architecture must guarantee that a clear->dirty
* transition on a cached TLB entry is written through
@@ -1546,22 +1535,22 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
- /* Move the dirty bit to the page. Now the pte is gone. */
+ /* Set the dirty flag on the folio now the pte is gone. */
if (pte_dirty(pteval))
- set_page_dirty(page);
+ folio_mark_dirty(folio);
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+ if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
- if (PageHuge(page)) {
- hugetlb_count_sub(compound_nr(page), mm);
+ if (folio_test_hugetlb(folio)) {
+ hugetlb_count_sub(folio_nr_pages(folio), mm);
set_huge_swap_pte_at(mm, address,
pvmw.pte, pteval,
vma_mmu_pagesize(vma));
} else {
- dec_mm_counter(mm, mm_counter(page));
+ dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
}
@@ -1576,18 +1565,19 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* migration) will not expect userfaults on already
* copied pages.
*/
- dec_mm_counter(mm, mm_counter(page));
+ dec_mm_counter(mm, mm_counter(&folio->page));
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
- } else if (PageAnon(page)) {
+ } else if (folio_test_anon(folio)) {
swp_entry_t entry = { .val = page_private(subpage) };
pte_t swp_pte;
/*
* Store the swap location in the pte.
* See handle_pte_fault() ...
*/
- if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
+ if (unlikely(folio_test_swapbacked(folio) !=
+ folio_test_swapcache(folio))) {
WARN_ON_ONCE(1);
ret = false;
/* We have to invalidate as we cleared the pte */
@@ -1598,8 +1588,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
/* MADV_FREE page check */
- if (!PageSwapBacked(page)) {
- if (!PageDirty(page)) {
+ if (!folio_test_swapbacked(folio)) {
+ if (!folio_test_dirty(folio)) {
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm,
address, address + PAGE_SIZE);
@@ -1608,11 +1598,11 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
/*
- * If the page was redirtied, it cannot be
+ * If the folio was redirtied, it cannot be
* discarded. Remap the page to page table.
*/
set_pte_at(mm, address, pvmw.pte, pteval);
- SetPageSwapBacked(page);
+ folio_set_swapbacked(folio);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
@@ -1649,16 +1639,17 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
address + PAGE_SIZE);
} else {
/*
- * This is a locked file-backed page, thus it cannot
- * be removed from the page cache and replaced by a new
- * page before mmu_notifier_invalidate_range_end, so no
- * concurrent thread might update its page table to
- * point at new page while a device still is using this
- * page.
+ * This is a locked file-backed folio,
+ * so it cannot be removed from the page
+ * cache and replaced by a new folio before
+ * mmu_notifier_invalidate_range_end, so no
+ * concurrent thread might update its page table
+ * to point at a new folio while a device is
+ * still using this folio.
*
* See Documentation/vm/mmu_notifier.rst
*/
- dec_mm_counter(mm, mm_counter_file(page));
+ dec_mm_counter(mm, mm_counter_file(&folio->page));
}
discard:
/*
@@ -1668,8 +1659,10 @@ discard:
*
* See Documentation/vm/mmu_notifier.rst
*/
- page_remove_rmap(subpage, PageHuge(page));
- put_page(page);
+ page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_page_drain(smp_processor_id());
+ folio_put(folio);
}
mmu_notifier_invalidate_range_end(&range);
@@ -1682,35 +1675,35 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return vma_is_temporary_stack(vma);
}
-static int page_not_mapped(struct page *page)
+static int page_not_mapped(struct folio *folio)
{
- return !page_mapped(page);
+ return !folio_mapped(folio);
}
/**
- * try_to_unmap - try to remove all page table mappings to a page
- * @page: the page to get unmapped
+ * try_to_unmap - Try to remove all page table mappings to a folio.
+ * @folio: The folio to unmap.
* @flags: action and flags
*
* Tries to remove all the page table entries which are mapping this
- * page, used in the pageout path. Caller must hold the page lock.
+ * folio. It is the caller's responsibility to check if the folio is
+ * still mapped if needed (use TTU_SYNC to prevent accounting races).
*
- * It is the caller's responsibility to check if the page is still
- * mapped when needed (use TTU_SYNC to prevent accounting races).
+ * Context: Caller must hold the folio lock.
*/
-void try_to_unmap(struct page *page, enum ttu_flags flags)
+void try_to_unmap(struct folio *folio, enum ttu_flags flags)
{
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
.done = page_not_mapped,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
if (flags & TTU_RMAP_LOCKED)
- rmap_walk_locked(page, &rwc);
+ rmap_walk_locked(folio, &rwc);
else
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
}
/*
@@ -1719,15 +1712,11 @@ void try_to_unmap(struct page *page, enum ttu_flags flags)
* If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
* containing migration entries.
*/
-static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
+static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
pte_t pteval;
struct page *subpage;
bool ret = true;
@@ -1748,7 +1737,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
* TTU_SPLIT_HUGE_PMD and it wants to freeze.
*/
if (flags & TTU_SPLIT_HUGE_PMD)
- split_huge_pmd_address(vma, address, true, page);
+ split_huge_pmd_address(vma, address, true, folio);
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
@@ -1758,11 +1747,10 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
- range.end = PageKsm(page) ?
- address + PAGE_SIZE : vma_address_end(page, vma);
+ range.end = vma_address_end(&pvmw);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address, range.end);
- if (PageHuge(page)) {
+ if (folio_test_hugetlb(folio)) {
/*
* If sharing is possible, start and end will be adjusted
* accordingly.
@@ -1776,21 +1764,24 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
/* PMD-mapped THP migration entry */
if (!pvmw.pte) {
- VM_BUG_ON_PAGE(PageHuge(page) ||
- !PageTransCompound(page), page);
+ subpage = folio_page(folio,
+ pmd_pfn(*pvmw.pmd) - folio_pfn(folio));
+ VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
+ !folio_test_pmd_mappable(folio), folio);
- set_pmd_migration_entry(&pvmw, page);
+ set_pmd_migration_entry(&pvmw, subpage);
continue;
}
#endif
/* Unexpected PMD-mapped THP? */
- VM_BUG_ON_PAGE(!pvmw.pte, page);
+ VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
address = pvmw.address;
- if (PageHuge(page) && !PageAnon(page)) {
+ if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
* To call huge_pmd_unshare, i_mmap_rwsem must be
* held in write mode. Caller needs to explicitly
@@ -1828,15 +1819,15 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
pteval = ptep_clear_flush(vma, address, pvmw.pte);
- /* Move the dirty bit to the page. Now the pte is gone. */
+ /* Set the dirty flag on the folio now the pte is gone. */
if (pte_dirty(pteval))
- set_page_dirty(page);
+ folio_mark_dirty(folio);
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (is_zone_device_page(page)) {
- unsigned long pfn = page_to_pfn(page);
+ if (folio_is_zone_device(folio)) {
+ unsigned long pfn = folio_pfn(folio);
swp_entry_t entry;
pte_t swp_pte;
@@ -1872,16 +1863,16 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
* changed when hugepage migrations to device private
* memory are supported.
*/
- subpage = page;
- } else if (PageHWPoison(page)) {
+ subpage = &folio->page;
+ } else if (PageHWPoison(subpage)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
- if (PageHuge(page)) {
- hugetlb_count_sub(compound_nr(page), mm);
+ if (folio_test_hugetlb(folio)) {
+ hugetlb_count_sub(folio_nr_pages(folio), mm);
set_huge_swap_pte_at(mm, address,
pvmw.pte, pteval,
vma_mmu_pagesize(vma));
} else {
- dec_mm_counter(mm, mm_counter(page));
+ dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
}
@@ -1896,7 +1887,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
* migration) will not expect userfaults on already
* copied pages.
*/
- dec_mm_counter(mm, mm_counter(page));
+ dec_mm_counter(mm, mm_counter(&folio->page));
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
@@ -1942,8 +1933,10 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
*
* See Documentation/vm/mmu_notifier.rst
*/
- page_remove_rmap(subpage, PageHuge(page));
- put_page(page);
+ page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_page_drain(smp_processor_id());
+ folio_put(folio);
}
mmu_notifier_invalidate_range_end(&range);
@@ -1953,19 +1946,19 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
/**
* try_to_migrate - try to replace all page table mappings with swap entries
- * @page: the page to replace page table entries for
+ * @folio: the folio to replace page table entries for
* @flags: action and flags
*
- * Tries to remove all the page table entries which are mapping this page and
- * replace them with special swap entries. Caller must hold the page lock.
+ * Tries to remove all the page table entries which are mapping this folio and
+ * replace them with special swap entries. Caller must hold the folio lock.
*/
-void try_to_migrate(struct page *page, enum ttu_flags flags)
+void try_to_migrate(struct folio *folio, enum ttu_flags flags)
{
struct rmap_walk_control rwc = {
.rmap_one = try_to_migrate_one,
.arg = (void *)flags,
.done = page_not_mapped,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
};
/*
@@ -1976,7 +1969,7 @@ void try_to_migrate(struct page *page, enum ttu_flags flags)
TTU_SYNC)))
return;
- if (is_zone_device_page(page) && !is_device_private_page(page))
+ if (folio_is_zone_device(folio) && !folio_is_device_private(folio))
return;
/*
@@ -1987,83 +1980,13 @@ void try_to_migrate(struct page *page, enum ttu_flags flags)
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
- if (!PageKsm(page) && PageAnon(page))
+ if (!folio_test_ksm(folio) && folio_test_anon(folio))
rwc.invalid_vma = invalid_migration_vma;
if (flags & TTU_RMAP_LOCKED)
- rmap_walk_locked(page, &rwc);
+ rmap_walk_locked(folio, &rwc);
else
- rmap_walk(page, &rwc);
-}
-
-/*
- * Walks the vma's mapping a page and mlocks the page if any locked vma's are
- * found. Once one is found the page is locked and the scan can be terminated.
- */
-static bool page_mlock_one(struct page *page, struct vm_area_struct *vma,
- unsigned long address, void *unused)
-{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
-
- /* An un-locked vma doesn't have any pages to lock, continue the scan */
- if (!(vma->vm_flags & VM_LOCKED))
- return true;
-
- while (page_vma_mapped_walk(&pvmw)) {
- /*
- * Need to recheck under the ptl to serialise with
- * __munlock_pagevec_fill() after VM_LOCKED is cleared in
- * munlock_vma_pages_range().
- */
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * PTE-mapped THP are never marked as mlocked; but
- * this function is never called on a DoubleMap THP,
- * nor on an Anon THP (which may still be PTE-mapped
- * after DoubleMap was cleared).
- */
- mlock_vma_page(page);
- /*
- * No need to scan further once the page is marked
- * as mlocked.
- */
- page_vma_mapped_walk_done(&pvmw);
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * page_mlock - try to mlock a page
- * @page: the page to be mlocked
- *
- * Called from munlock code. Checks all of the VMAs mapping the page and mlocks
- * the page if any are found. The page will be returned with PG_mlocked cleared
- * if it is not mapped by any locked vmas.
- */
-void page_mlock(struct page *page)
-{
- struct rmap_walk_control rwc = {
- .rmap_one = page_mlock_one,
- .done = page_not_mapped,
- .anon_lock = page_lock_anon_vma_read,
-
- };
-
- VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
- VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
-
- /* Anon THP are only marked as mlocked when singly mapped */
- if (PageTransCompound(page) && PageAnon(page))
- return;
-
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
}
#ifdef CONFIG_DEVICE_PRIVATE
@@ -2074,15 +1997,11 @@ struct make_exclusive_args {
bool valid;
};
-static bool page_make_device_exclusive_one(struct page *page,
+static bool page_make_device_exclusive_one(struct folio *folio,
struct vm_area_struct *vma, unsigned long address, void *priv)
{
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
struct make_exclusive_args *args = priv;
pte_t pteval;
struct page *subpage;
@@ -2093,12 +2012,13 @@ static bool page_make_device_exclusive_one(struct page *page,
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
vma->vm_mm, address, min(vma->vm_end,
- address + page_size(page)), args->owner);
+ address + folio_size(folio)),
+ args->owner);
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
/* Unexpected PMD-mapped THP? */
- VM_BUG_ON_PAGE(!pvmw.pte, page);
+ VM_BUG_ON_FOLIO(!pvmw.pte, folio);
if (!pte_present(*pvmw.pte)) {
ret = false;
@@ -2106,16 +2026,17 @@ static bool page_make_device_exclusive_one(struct page *page,
break;
}
- subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
address = pvmw.address;
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
pteval = ptep_clear_flush(vma, address, pvmw.pte);
- /* Move the dirty bit to the page. Now the pte is gone. */
+ /* Set the dirty flag on the folio now the pte is gone. */
if (pte_dirty(pteval))
- set_page_dirty(page);
+ folio_mark_dirty(folio);
/*
* Check that our target page is still mapped at the expected
@@ -2148,7 +2069,7 @@ static bool page_make_device_exclusive_one(struct page *page,
* There is a reference on the page for the swap entry which has
* been removed, so shouldn't take another.
*/
- page_remove_rmap(subpage, false);
+ page_remove_rmap(subpage, vma, false);
}
mmu_notifier_invalidate_range_end(&range);
@@ -2157,21 +2078,22 @@ static bool page_make_device_exclusive_one(struct page *page,
}
/**
- * page_make_device_exclusive - mark the page exclusively owned by a device
- * @page: the page to replace page table entries for
- * @mm: the mm_struct where the page is expected to be mapped
- * @address: address where the page is expected to be mapped
+ * folio_make_device_exclusive - Mark the folio exclusively owned by a device.
+ * @folio: The folio to replace page table entries for.
+ * @mm: The mm_struct where the folio is expected to be mapped.
+ * @address: Address where the folio is expected to be mapped.
* @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
*
- * Tries to remove all the page table entries which are mapping this page and
- * replace them with special device exclusive swap entries to grant a device
- * exclusive access to the page. Caller must hold the page lock.
+ * Tries to remove all the page table entries which are mapping this
+ * folio and replace them with special device exclusive swap entries to
+ * grant a device exclusive access to the folio.
*
- * Returns false if the page is still mapped, or if it could not be unmapped
+ * Context: Caller must hold the folio lock.
+ * Return: false if the page is still mapped, or if it could not be unmapped
* from the expected address. Otherwise returns true (success).
*/
-static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
- unsigned long address, void *owner)
+static bool folio_make_device_exclusive(struct folio *folio,
+ struct mm_struct *mm, unsigned long address, void *owner)
{
struct make_exclusive_args args = {
.mm = mm,
@@ -2182,21 +2104,20 @@ static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
struct rmap_walk_control rwc = {
.rmap_one = page_make_device_exclusive_one,
.done = page_not_mapped,
- .anon_lock = page_lock_anon_vma_read,
+ .anon_lock = folio_lock_anon_vma_read,
.arg = &args,
};
/*
- * Restrict to anonymous pages for now to avoid potential writeback
- * issues. Also tail pages shouldn't be passed to rmap_walk so skip
- * those.
+ * Restrict to anonymous folios for now to avoid potential writeback
+ * issues.
*/
- if (!PageAnon(page) || PageTail(page))
+ if (!folio_test_anon(folio))
return false;
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
- return args.valid && !page_mapcount(page);
+ return args.valid && !folio_mapcount(folio);
}
/**
@@ -2234,15 +2155,16 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
return npages;
for (i = 0; i < npages; i++, start += PAGE_SIZE) {
- if (!trylock_page(pages[i])) {
- put_page(pages[i]);
+ struct folio *folio = page_folio(pages[i]);
+ if (PageTail(pages[i]) || !folio_trylock(folio)) {
+ folio_put(folio);
pages[i] = NULL;
continue;
}
- if (!page_make_device_exclusive(pages[i], mm, start, owner)) {
- unlock_page(pages[i]);
- put_page(pages[i]);
+ if (!folio_make_device_exclusive(folio, mm, start, owner)) {
+ folio_unlock(folio);
+ folio_put(folio);
pages[i] = NULL;
}
}
@@ -2261,21 +2183,21 @@ void __put_anon_vma(struct anon_vma *anon_vma)
anon_vma_free(root);
}
-static struct anon_vma *rmap_walk_anon_lock(struct page *page,
- struct rmap_walk_control *rwc)
+static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
+ const struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
if (rwc->anon_lock)
- return rwc->anon_lock(page);
+ return rwc->anon_lock(folio);
/*
- * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
+ * Note: remove_migration_ptes() cannot use folio_lock_anon_vma_read()
* because that depends on page_mapped(); but not all its usages
* are holding mmap_lock. Users without mmap_lock are required to
* take a reference count to prevent the anon_vma disappearing
*/
- anon_vma = page_anon_vma(page);
+ anon_vma = folio_anon_vma(folio);
if (!anon_vma)
return NULL;
@@ -2291,35 +2213,30 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the anon_vma struct it points to.
- *
- * When called from page_mlock(), the mmap_lock of the mm containing the vma
- * where the page was found will be held for write. So, we won't recheck
- * vm_flags for that VMA. That should be OK, because that vma shouldn't be
- * LOCKED.
*/
-static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
- bool locked)
+static void rmap_walk_anon(struct folio *folio,
+ const struct rmap_walk_control *rwc, bool locked)
{
struct anon_vma *anon_vma;
pgoff_t pgoff_start, pgoff_end;
struct anon_vma_chain *avc;
if (locked) {
- anon_vma = page_anon_vma(page);
+ anon_vma = folio_anon_vma(folio);
/* anon_vma disappear under us? */
- VM_BUG_ON_PAGE(!anon_vma, page);
+ VM_BUG_ON_FOLIO(!anon_vma, folio);
} else {
- anon_vma = rmap_walk_anon_lock(page, rwc);
+ anon_vma = rmap_walk_anon_lock(folio, rwc);
}
if (!anon_vma)
return;
- pgoff_start = page_to_pgoff(page);
- pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
+ pgoff_start = folio_pgoff(folio);
+ pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
- unsigned long address = vma_address(page, vma);
+ unsigned long address = vma_address(&folio->page, vma);
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2327,9 +2244,9 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
continue;
- if (!rwc->rmap_one(page, vma, address, rwc->arg))
+ if (!rwc->rmap_one(folio, vma, address, rwc->arg))
break;
- if (rwc->done && rwc->done(page))
+ if (rwc->done && rwc->done(folio))
break;
}
@@ -2344,16 +2261,11 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
- *
- * When called from page_mlock(), the mmap_lock of the mm containing the vma
- * where the page was found will be held for write. So, we won't recheck
- * vm_flags for that VMA. That should be OK, because that vma shouldn't be
- * LOCKED.
*/
-static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
- bool locked)
+static void rmap_walk_file(struct folio *folio,
+ const struct rmap_walk_control *rwc, bool locked)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio_mapping(folio);
pgoff_t pgoff_start, pgoff_end;
struct vm_area_struct *vma;
@@ -2363,18 +2275,18 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
* structure at mapping cannot be freed and reused yet,
* so we can safely take mapping->i_mmap_rwsem.
*/
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
if (!mapping)
return;
- pgoff_start = page_to_pgoff(page);
- pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
+ pgoff_start = folio_pgoff(folio);
+ pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
if (!locked)
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
- unsigned long address = vma_address(page, vma);
+ unsigned long address = vma_address(&folio->page, vma);
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2382,9 +2294,9 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
continue;
- if (!rwc->rmap_one(page, vma, address, rwc->arg))
+ if (!rwc->rmap_one(folio, vma, address, rwc->arg))
goto done;
- if (rwc->done && rwc->done(page))
+ if (rwc->done && rwc->done(folio))
goto done;
}
@@ -2393,25 +2305,25 @@ done:
i_mmap_unlock_read(mapping);
}
-void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
+void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc)
{
- if (unlikely(PageKsm(page)))
- rmap_walk_ksm(page, rwc);
- else if (PageAnon(page))
- rmap_walk_anon(page, rwc, false);
+ if (unlikely(folio_test_ksm(folio)))
+ rmap_walk_ksm(folio, rwc);
+ else if (folio_test_anon(folio))
+ rmap_walk_anon(folio, rwc, false);
else
- rmap_walk_file(page, rwc, false);
+ rmap_walk_file(folio, rwc, false);
}
/* Like rmap_walk, but caller holds relevant rmap lock */
-void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
+void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc)
{
/* no ksm support for now */
- VM_BUG_ON_PAGE(PageKsm(page), page);
- if (PageAnon(page))
- rmap_walk_anon(page, rwc, true);
+ VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio);
+ if (folio_test_anon(folio))
+ rmap_walk_anon(folio, rwc, true);
else
- rmap_walk_file(page, rwc, true);
+ rmap_walk_file(folio, rwc, true);
}
#ifdef CONFIG_HUGETLB_PAGE
@@ -2439,8 +2351,7 @@ void hugepage_add_new_anon_rmap(struct page *page,
{
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
atomic_set(compound_mapcount_ptr(page), 0);
- if (hpage_pincount_available(page))
- atomic_set(compound_pincount_ptr(page), 0);
+ atomic_set(compound_pincount_ptr(page), 0);
__page_set_anon_rmap(page, vma, address, 1);
}
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 22b310adb53d..098638d3b8a4 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -152,7 +152,7 @@ static void secretmem_freepage(struct page *page)
}
const struct address_space_operations secretmem_aops = {
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
.freepage = secretmem_freepage,
.migratepage = secretmem_migratepage,
.isolate_page = secretmem_isolate_page,
diff --git a/mm/shmem.c b/mm/shmem.c
index a09b29ec2b45..529c9ad3e926 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -476,6 +476,8 @@ bool shmem_is_huge(struct vm_area_struct *vma,
{
loff_t i_size;
+ if (!S_ISREG(inode->i_mode))
+ return false;
if (shmem_huge == SHMEM_HUGE_DENY)
return false;
if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
@@ -1061,6 +1063,12 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
if (shmem_is_huge(NULL, inode, 0))
stat->blksize = HPAGE_PMD_SIZE;
+ if (request_mask & STATX_BTIME) {
+ stat->result_mask |= STATX_BTIME;
+ stat->btime.tv_sec = info->i_crtime.tv_sec;
+ stat->btime.tv_nsec = info->i_crtime.tv_nsec;
+ }
+
return 0;
}
@@ -1121,6 +1129,7 @@ static void shmem_evict_inode(struct inode *inode)
if (shmem_mapping(inode->i_mapping)) {
shmem_unacct_size(info->flags, inode->i_size);
inode->i_size = 0;
+ mapping_set_exiting(inode->i_mapping);
shmem_truncate_range(inode, 0, (loff_t)-1);
if (!list_empty(&info->shrinklist)) {
spin_lock(&sbinfo->shrinklist_lock);
@@ -1854,9 +1863,6 @@ repeat:
return 0;
}
- /* Never use a huge page for shmem_symlink() */
- if (S_ISLNK(inode->i_mode))
- goto alloc_nohuge;
if (!shmem_is_huge(vma, inode, index))
goto alloc_nohuge;
@@ -2265,6 +2271,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
+ info->i_crtime = inode->i_mtime;
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
@@ -2357,8 +2364,10 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
/* don't free the page */
goto out_unacct_blocks;
}
+
+ flush_dcache_page(page);
} else { /* ZEROPAGE */
- clear_highpage(page);
+ clear_user_highpage(page, dst_addr);
}
} else {
page = *pagep;
@@ -2492,19 +2501,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct address_space *mapping = inode->i_mapping;
pgoff_t index;
unsigned long offset;
- enum sgp_type sgp = SGP_READ;
int error = 0;
ssize_t retval = 0;
loff_t *ppos = &iocb->ki_pos;
- /*
- * Might this read be for a stacking filesystem? Then when reading
- * holes of a sparse file, we actually need to allocate those pages,
- * and even mark them dirty, so it cannot exceed the max_blocks limit.
- */
- if (!iter_is_iovec(to))
- sgp = SGP_CACHE;
-
index = *ppos >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;
@@ -2513,6 +2513,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
pgoff_t end_index;
unsigned long nr, ret;
loff_t i_size = i_size_read(inode);
+ bool got_page;
end_index = i_size >> PAGE_SHIFT;
if (index > end_index)
@@ -2523,15 +2524,13 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
break;
}
- error = shmem_getpage(inode, index, &page, sgp);
+ error = shmem_getpage(inode, index, &page, SGP_READ);
if (error) {
if (error == -EINVAL)
error = 0;
break;
}
if (page) {
- if (sgp == SGP_CACHE)
- set_page_dirty(page);
unlock_page(page);
if (PageHWPoison(page)) {
@@ -2571,9 +2570,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
*/
if (!offset)
mark_page_accessed(page);
+ got_page = true;
} else {
page = ZERO_PAGE(0);
- get_page(page);
+ got_page = false;
}
/*
@@ -2586,7 +2586,8 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
index += offset >> PAGE_SHIFT;
offset &= ~PAGE_MASK;
- put_page(page);
+ if (got_page)
+ put_page(page);
if (!iov_iter_count(to))
break;
if (ret < nr) {
@@ -3196,6 +3197,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
#endif /* CONFIG_TMPFS_XATTR */
static const struct inode_operations shmem_short_symlink_operations = {
+ .getattr = shmem_getattr,
.get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3203,6 +3205,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
};
static const struct inode_operations shmem_symlink_inode_operations = {
+ .getattr = shmem_getattr,
.get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3707,7 +3710,7 @@ static struct kmem_cache *shmem_inode_cachep;
static struct inode *shmem_alloc_inode(struct super_block *sb)
{
struct shmem_inode_info *info;
- info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
+ info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL);
if (!info)
return NULL;
return &info->vfs_inode;
@@ -3753,7 +3756,7 @@ static int shmem_error_remove_page(struct address_space *mapping,
const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
- .set_page_dirty = __set_page_dirty_no_writeback,
+ .dirty_folio = noop_dirty_folio,
#ifdef CONFIG_TMPFS
.write_begin = shmem_write_begin,
.write_end = shmem_write_end,
@@ -3790,6 +3793,7 @@ static const struct inode_operations shmem_inode_operations = {
static const struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
+ .getattr = shmem_getattr,
.create = shmem_create,
.lookup = simple_lookup,
.link = shmem_link,
@@ -3811,6 +3815,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
};
static const struct inode_operations shmem_special_inode_operations = {
+ .getattr = shmem_getattr,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
#endif
@@ -3962,8 +3967,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
return count;
}
-struct kobj_attribute shmem_enabled_attr =
- __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */
diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..d9dec7a8fd79 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3211,7 +3211,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_
bool init = false;
flags &= gfp_allowed_mask;
- cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
+ cachep = slab_pre_alloc_hook(cachep, NULL, &objcg, 1, flags);
if (unlikely(!cachep))
return NULL;
@@ -3287,7 +3287,8 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
#endif /* CONFIG_NUMA */
static __always_inline void *
-slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller)
+slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
+ size_t orig_size, unsigned long caller)
{
unsigned long save_flags;
void *objp;
@@ -3295,7 +3296,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned lo
bool init = false;
flags &= gfp_allowed_mask;
- cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
+ cachep = slab_pre_alloc_hook(cachep, lru, &objcg, 1, flags);
if (unlikely(!cachep))
return NULL;
@@ -3484,6 +3485,18 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
__free_one(ac, objp);
}
+static __always_inline
+void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+ gfp_t flags)
+{
+ void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
+
+ trace_kmem_cache_alloc(_RET_IP_, ret,
+ cachep->object_size, cachep->size, flags);
+
+ return ret;
+}
+
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
@@ -3496,15 +3509,17 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_);
-
- trace_kmem_cache_alloc(_RET_IP_, ret,
- cachep->object_size, cachep->size, flags);
-
- return ret;
+ return __kmem_cache_alloc_lru(cachep, NULL, flags);
}
EXPORT_SYMBOL(kmem_cache_alloc);
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+ gfp_t flags)
+{
+ return __kmem_cache_alloc_lru(cachep, lru, flags);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_lru);
+
static __always_inline void
cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, unsigned long caller)
@@ -3521,7 +3536,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
size_t i;
struct obj_cgroup *objcg = NULL;
- s = slab_pre_alloc_hook(s, &objcg, size, flags);
+ s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (!s)
return 0;
@@ -3562,7 +3577,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
{
void *ret;
- ret = slab_alloc(cachep, flags, size, _RET_IP_);
+ ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(_RET_IP_, ret,
@@ -3689,7 +3704,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- ret = slab_alloc(cachep, flags, size, caller);
+ ret = slab_alloc(cachep, NULL, flags, size, caller);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(caller, ret,
diff --git a/mm/slab.h b/mm/slab.h
index c7f2abc2b154..fd7ae2024897 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -231,6 +231,7 @@ struct kmem_cache {
#include <linux/kmemleak.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
+#include <linux/list_lru.h>
/*
* State of the slab allocator.
@@ -472,6 +473,7 @@ static inline size_t obj_full_size(struct kmem_cache *s)
* Returns false if the allocation should fail.
*/
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
@@ -487,13 +489,26 @@ static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
if (!objcg)
return true;
- if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
- obj_cgroup_put(objcg);
- return false;
+ if (lru) {
+ int ret;
+ struct mem_cgroup *memcg;
+
+ memcg = get_mem_cgroup_from_objcg(objcg);
+ ret = memcg_list_lru_alloc(memcg, lru, flags);
+ css_put(&memcg->css);
+
+ if (ret)
+ goto out;
}
+ if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
+ goto out;
+
*objcgp = objcg;
return true;
+out:
+ obj_cgroup_put(objcg);
+ return false;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
@@ -598,6 +613,7 @@ static inline void memcg_free_slab_cgroups(struct slab *slab)
}
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
@@ -697,6 +713,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
}
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t size, gfp_t flags)
{
@@ -707,7 +724,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
if (should_failslab(s, flags))
return NULL;
- if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
+ if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
return NULL;
return s;
diff --git a/mm/slob.c b/mm/slob.c
index 60c5842215f1..8a8795520361 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -635,6 +635,12 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
}
EXPORT_SYMBOL(kmem_cache_alloc);
+
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags)
+{
+ return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_lru);
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
diff --git a/mm/slub.c b/mm/slub.c
index 261474092e43..07cdd999c3fe 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3131,7 +3131,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
-static __always_inline void *slab_alloc_node(struct kmem_cache *s,
+static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
@@ -3141,7 +3141,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
struct obj_cgroup *objcg = NULL;
bool init = false;
- s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
+ s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
return NULL;
@@ -3232,27 +3232,41 @@ out:
return object;
}
-static __always_inline void *slab_alloc(struct kmem_cache *s,
+static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, unsigned long addr, size_t orig_size)
{
- return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
+ return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
}
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+static __always_inline
+void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t gfpflags)
{
- void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
+ void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
s->size, gfpflags);
return ret;
}
+
+void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return __kmem_cache_alloc_lru(s, NULL, gfpflags);
+}
EXPORT_SYMBOL(kmem_cache_alloc);
+void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t gfpflags)
+{
+ return __kmem_cache_alloc_lru(s, lru, gfpflags);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_lru);
+
#ifdef CONFIG_TRACING
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
- void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
+ void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
@@ -3263,7 +3277,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
+ void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
s->object_size, s->size, gfpflags, node);
@@ -3277,7 +3291,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags,
int node, size_t size)
{
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
+ void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
trace_kmalloc_node(_RET_IP_, ret,
size, s->size, gfpflags, node);
@@ -3667,7 +3681,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
struct obj_cgroup *objcg = NULL;
/* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, &objcg, size, flags);
+ s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (unlikely(!s))
return false;
/*
@@ -4417,7 +4431,7 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc(s, flags, _RET_IP_, size);
+ ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
@@ -4465,7 +4479,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
+ ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
@@ -4923,7 +4937,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc(s, gfpflags, caller, size);
+ ret = slab_alloc(s, NULL, gfpflags, caller, size);
/* Honor the call site pointer we received. */
trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -4954,7 +4968,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc_node(s, gfpflags, node, caller, size);
+ ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
/* Honor the call site pointer we received. */
trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index db6df27c852a..8aecd6b3896c 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -34,6 +34,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
/**
* struct vmemmap_remap_walk - walk vmemmap page table
*
@@ -53,8 +54,7 @@ struct vmemmap_remap_walk {
struct list_head *vmemmap_pages;
};
-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
- struct vmemmap_remap_walk *walk)
+static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
{
pmd_t __pmd;
int i;
@@ -76,15 +76,34 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
set_pte_at(&init_mm, addr, pte, entry);
}
- /* Make pte visible before pmd. See comment in pmd_install(). */
- smp_wmb();
- pmd_populate_kernel(&init_mm, pmd, pgtable);
-
- flush_tlb_kernel_range(start, start + PMD_SIZE);
+ spin_lock(&init_mm.page_table_lock);
+ if (likely(pmd_leaf(*pmd))) {
+ /* Make pte visible before pmd. See comment in pmd_install(). */
+ smp_wmb();
+ pmd_populate_kernel(&init_mm, pmd, pgtable);
+ flush_tlb_kernel_range(start, start + PMD_SIZE);
+ } else {
+ pte_free_kernel(&init_mm, pgtable);
+ }
+ spin_unlock(&init_mm.page_table_lock);
return 0;
}
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+{
+ int leaf;
+
+ spin_lock(&init_mm.page_table_lock);
+ leaf = pmd_leaf(*pmd);
+ spin_unlock(&init_mm.page_table_lock);
+
+ if (!leaf)
+ return 0;
+
+ return __split_vmemmap_huge_pmd(pmd, start);
+}
+
static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end,
struct vmemmap_remap_walk *walk)
@@ -121,13 +140,12 @@ static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
pmd = pmd_offset(pud, addr);
do {
- if (pmd_leaf(*pmd)) {
- int ret;
+ int ret;
+
+ ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
+ if (ret)
+ return ret;
- ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK, walk);
- if (ret)
- return ret;
- }
next = pmd_addr_end(addr, end);
vmemmap_pte_range(pmd, addr, next, walk);
} while (pmd++, addr = next, addr != end);
@@ -245,6 +263,26 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
set_pte_at(&init_mm, addr, pte, entry);
}
+/*
+ * How many struct page structs need to be reset. When we reuse the head
+ * struct page, the special metadata (e.g. page->flags or page->mapping)
+ * cannot copy to the tail struct page structs. The invalid value will be
+ * checked in the free_tail_pages_check(). In order to avoid the message
+ * of "corrupted mapping in tail page". We need to reset at least 3 (one
+ * head struct page struct and two tail struct page structs) struct page
+ * structs.
+ */
+#define NR_RESET_STRUCT_PAGE 3
+
+static inline void reset_struct_pages(struct page *start)
+{
+ int i;
+ struct page *from = start + NR_RESET_STRUCT_PAGE;
+
+ for (i = 0; i < NR_RESET_STRUCT_PAGE; i++)
+ memcpy(start + i, from, sizeof(*from));
+}
+
static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
struct vmemmap_remap_walk *walk)
{
@@ -258,6 +296,7 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
list_del(&page->lru);
to = page_to_virt(page);
copy_page(to, (void *)walk->reuse_addr);
+ reset_struct_pages(to);
set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
}
@@ -300,10 +339,8 @@ int vmemmap_remap_free(unsigned long start, unsigned long end,
*/
BUG_ON(start - reuse != PAGE_SIZE);
- mmap_write_lock(&init_mm);
+ mmap_read_lock(&init_mm);
ret = vmemmap_remap_range(reuse, end, &walk);
- mmap_write_downgrade(&init_mm);
-
if (ret && walk.nr_walked) {
end = reuse + walk.nr_walked * PAGE_SIZE;
/*
@@ -383,6 +420,7 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end,
return 0;
}
+#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
/*
* Allocate a block of memory to be used to back the virtual memory map
diff --git a/mm/sparse.c b/mm/sparse.c
index d21c6e5910d0..952f06d8f373 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -126,7 +126,7 @@ static inline int sparse_early_nid(struct mem_section *section)
}
/* Validate the physical addressing limitations of the model */
-void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
+static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn)
{
unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
diff --git a/mm/swap.c b/mm/swap.c
index bcf3ac288b56..5b30045207e1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -74,8 +74,8 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
};
/*
- * This path almost never happens for VM activity - pages are normally
- * freed via pagevecs. But it gets used by networking.
+ * This path almost never happens for VM activity - pages are normally freed
+ * via pagevecs. But it gets used by networking - and for compound pages.
*/
static void __page_cache_release(struct page *page)
{
@@ -89,6 +89,14 @@ static void __page_cache_release(struct page *page)
__clear_page_lru_flags(page);
unlock_page_lruvec_irqrestore(lruvec, flags);
}
+ /* See comment on PageMlocked in release_pages() */
+ if (unlikely(PageMlocked(page))) {
+ int nr_pages = thp_nr_pages(page);
+
+ __ClearPageMlocked(page);
+ mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+ count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
+ }
__ClearPageWaiters(page);
}
@@ -114,17 +122,9 @@ static void __put_compound_page(struct page *page)
void __put_page(struct page *page)
{
- if (is_zone_device_page(page)) {
- put_dev_pagemap(page->pgmap);
-
- /*
- * The page belongs to the device that created pgmap. Do
- * not return it to page allocator.
- */
- return;
- }
-
- if (unlikely(PageCompound(page)))
+ if (unlikely(is_zone_device_page(page)))
+ free_zone_device_page(page);
+ else if (unlikely(PageCompound(page)))
__put_compound_page(page);
else
__put_single_page(page);
@@ -425,7 +425,7 @@ void folio_mark_accessed(struct folio *folio)
/*
* Unevictable pages are on the "LRU_UNEVICTABLE" list. But,
* this list is never rotated or maintained, so marking an
- * evictable page accessed has no effect.
+ * unevictable page accessed has no effect.
*/
} else if (!folio_test_active(folio)) {
/*
@@ -482,22 +482,12 @@ EXPORT_SYMBOL(folio_add_lru);
void lru_cache_add_inactive_or_unevictable(struct page *page,
struct vm_area_struct *vma)
{
- bool unevictable;
-
VM_BUG_ON_PAGE(PageLRU(page), page);
- unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
- if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
- int nr_pages = thp_nr_pages(page);
- /*
- * We use the irq-unsafe __mod_zone_page_state because this
- * counter is not modified from interrupt context, and the pte
- * lock is held(spinlock), which implies preemption disabled.
- */
- __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
- count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
- }
- lru_cache_add(page);
+ if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
+ mlock_new_page(page);
+ else
+ lru_cache_add(page);
}
/*
@@ -636,35 +626,37 @@ void lru_add_drain_cpu(int cpu)
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
activate_page_drain(cpu);
+ mlock_page_drain(cpu);
}
/**
- * deactivate_file_page - forcefully deactivate a file page
- * @page: page to deactivate
+ * deactivate_file_folio() - Forcefully deactivate a file folio.
+ * @folio: Folio to deactivate.
*
- * This function hints the VM that @page is a good reclaim candidate,
- * for example if its invalidation fails due to the page being dirty
+ * This function hints to the VM that @folio is a good reclaim candidate,
+ * for example if its invalidation fails due to the folio being dirty
* or under writeback.
+ *
+ * Context: Caller holds a reference on the page.
*/
-void deactivate_file_page(struct page *page)
+void deactivate_file_folio(struct folio *folio)
{
+ struct pagevec *pvec;
+
/*
- * In a workload with many unevictable page such as mprotect,
- * unevictable page deactivation for accelerating reclaim is pointless.
+ * In a workload with many unevictable pages such as mprotect,
+ * unevictable folio deactivation for accelerating reclaim is pointless.
*/
- if (PageUnevictable(page))
+ if (folio_test_unevictable(folio))
return;
- if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec;
-
- local_lock(&lru_pvecs.lock);
- pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
+ folio_get(folio);
+ local_lock(&lru_pvecs.lock);
+ pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
- if (pagevec_add_and_need_flush(pvec, page))
- pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
- local_unlock(&lru_pvecs.lock);
- }
+ if (pagevec_add_and_need_flush(pvec, &folio->page))
+ pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
+ local_unlock(&lru_pvecs.lock);
}
/*
@@ -831,13 +823,13 @@ inline void __lru_add_drain_all(bool force_all_cpus)
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
- if (force_all_cpus ||
- pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+ if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
need_activate_page_drain(cpu) ||
+ need_mlock_page_drain(cpu) ||
has_bh_in_lru(cpu, NULL)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
@@ -876,15 +868,21 @@ atomic_t lru_disable_count = ATOMIC_INIT(0);
void lru_cache_disable(void)
{
atomic_inc(&lru_disable_count);
-#ifdef CONFIG_SMP
/*
- * lru_add_drain_all in the force mode will schedule draining on
- * all online CPUs so any calls of lru_cache_disabled wrapped by
- * local_lock or preemption disabled would be ordered by that.
- * The atomic operation doesn't need to have stronger ordering
- * requirements because that is enforced by the scheduling
- * guarantees.
+ * Readers of lru_disable_count are protected by either disabling
+ * preemption or rcu_read_lock:
+ *
+ * preempt_disable, local_irq_disable [bh_lru_lock()]
+ * rcu_read_lock [rt_spin_lock CONFIG_PREEMPT_RT]
+ * preempt_disable [local_lock !CONFIG_PREEMPT_RT]
+ *
+ * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on
+ * preempt_disable() regions of code. So any CPU which sees
+ * lru_disable_count = 0 will have exited the critical
+ * section when synchronize_rcu() returns.
*/
+ synchronize_rcu();
+#ifdef CONFIG_SMP
__lru_add_drain_all(true);
#else
lru_add_and_bh_lrus_drain();
@@ -930,18 +928,10 @@ void release_pages(struct page **pages, int nr)
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
- /*
- * ZONE_DEVICE pages that return 'false' from
- * page_is_devmap_managed() do not require special
- * processing, and instead, expect a call to
- * put_page_testzero().
- */
- if (page_is_devmap_managed(page)) {
- put_devmap_managed_page(page);
+ if (put_devmap_managed_page(page))
continue;
- }
if (put_page_testzero(page))
- put_dev_pagemap(page->pgmap);
+ free_zone_device_page(page);
continue;
}
@@ -969,6 +959,18 @@ void release_pages(struct page **pages, int nr)
__clear_page_lru_flags(page);
}
+ /*
+ * In rare cases, when truncation or holepunching raced with
+ * munlock after VM_LOCKED was cleared, Mlocked may still be
+ * found set here. This does not indicate a problem, unless
+ * "unevictable_pgs_cleared" appears worryingly large.
+ */
+ if (unlikely(PageMlocked(page))) {
+ __ClearPageMlocked(page);
+ dec_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGCLEARED);
+ }
+
__ClearPageWaiters(page);
list_add(&page->lru, &pages_to_free);
@@ -1009,43 +1011,32 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+ folio_set_lru(folio);
/*
- * A folio becomes evictable in two ways:
- * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()].
- * 2) Before acquiring LRU lock to put the folio on the correct LRU
- * and then
- * a) do PageLRU check with lock [check_move_unevictable_pages]
- * b) do PageLRU check before lock [clear_page_mlock]
- *
- * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
- * following strict ordering:
- *
- * #0: __pagevec_lru_add_fn #1: clear_page_mlock
- *
- * folio_set_lru() folio_test_clear_mlocked()
- * smp_mb() // explicit ordering // above provides strict
- * // ordering
- * folio_test_mlocked() folio_test_lru()
- *
+ * Is an smp_mb__after_atomic() still required here, before
+ * folio_evictable() tests PageMlocked, to rule out the possibility
+ * of stranding an evictable folio on an unevictable LRU? I think
+ * not, because __munlock_page() only clears PageMlocked while the LRU
+ * lock is held.
*
- * if '#1' does not observe setting of PG_lru by '#0' and
- * fails isolation, the explicit barrier will make sure that
- * folio_evictable check will put the folio on the correct
- * LRU. Without smp_mb(), folio_set_lru() can be reordered
- * after folio_test_mlocked() check and can make '#1' fail the
- * isolation of the folio whose mlocked bit is cleared (#0 is
- * also looking at the same folio) and the evictable folio will
- * be stranded on an unevictable LRU.
+ * (That is not true of __page_cache_release(), and not necessarily
+ * true of release_pages(): but those only clear PageMlocked after
+ * put_page_testzero() has excluded any other users of the page.)
*/
- folio_set_lru(folio);
- smp_mb__after_atomic();
-
if (folio_evictable(folio)) {
if (was_unevictable)
__count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
} else {
folio_clear_active(folio);
folio_set_unevictable(folio);
+ /*
+ * folio->mlock_count = !!folio_test_mlocked(folio)?
+ * But that leaves __mlock_page() in doubt whether another
+ * actor has already counted the mlock or not. Err on the
+ * safe side, underestimate, let page reclaim fix it, rather
+ * than leaving a page on the unevictable LRU indefinitely.
+ */
+ folio->mlock_count = 0;
if (!was_unevictable)
__count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
}
@@ -1153,26 +1144,3 @@ void __init swap_setup(void)
* _really_ don't want to cluster much more
*/
}
-
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-void put_devmap_managed_page(struct page *page)
-{
- int count;
-
- if (WARN_ON_ONCE(!page_is_devmap_managed(page)))
- return;
-
- count = page_ref_dec_return(page);
-
- /*
- * devmap page refcounts are 1-based, rather than 0-based: if
- * refcount is 1, then the page is free and the refcount is
- * stable because nobody holds a reference on the page.
- */
- if (count == 1)
- free_devmap_managed_page(page);
- else if (!count)
- __put_page(page);
-}
-EXPORT_SYMBOL(put_devmap_managed_page);
-#endif
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8d4104242100..013856004825 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -30,7 +30,7 @@
*/
static const struct address_space_operations swap_aops = {
.writepage = swap_writepage,
- .set_page_dirty = swap_set_page_dirty,
+ .dirty_folio = swap_dirty_folio,
#ifdef CONFIG_MIGRATION
.migratepage = migrate_page,
#endif
@@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* __read_swap_cache_async(), which has set SWAP_HAS_CACHE
* in swap_map, but not yet added its page to swap cache.
*/
- cond_resched();
+ schedule_timeout_uninterruptible(1);
}
/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bf0df7aa7158..33c7abb16610 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1951,6 +1951,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
struct vm_fault vmf = {
.vma = vma,
.address = addr,
+ .real_address = addr,
.pmd = pmd,
};
diff --git a/mm/truncate.c b/mm/truncate.c
index 9dbf0b75da5d..ab50d0d59a2a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -19,8 +19,7 @@
#include <linux/highmem.h>
#include <linux/pagevec.h>
#include <linux/task_io_accounting_ops.h>
-#include <linux/buffer_head.h> /* grr. try_to_release_page,
- do_invalidatepage */
+#include <linux/buffer_head.h> /* grr. try_to_release_page */
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include "internal.h"
@@ -138,33 +137,28 @@ static int invalidate_exceptional_entry2(struct address_space *mapping,
}
/**
- * do_invalidatepage - invalidate part or all of a page
- * @page: the page which is affected
+ * folio_invalidate - Invalidate part or all of a folio.
+ * @folio: The folio which is affected.
* @offset: start of the range to invalidate
* @length: length of the range to invalidate
*
- * do_invalidatepage() is called when all or part of the page has become
+ * folio_invalidate() is called when all or part of the folio has become
* invalidated by a truncate operation.
*
- * do_invalidatepage() does not have to release all buffers, but it must
+ * folio_invalidate() does not have to release all buffers, but it must
* ensure that no dirty buffer is left outside @offset and that no I/O
* is underway against any of the blocks which are outside the truncation
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
-void do_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+void folio_invalidate(struct folio *folio, size_t offset, size_t length)
{
- void (*invalidatepage)(struct page *, unsigned int, unsigned int);
-
- invalidatepage = page->mapping->a_ops->invalidatepage;
-#ifdef CONFIG_BLOCK
- if (!invalidatepage)
- invalidatepage = block_invalidatepage;
-#endif
- if (invalidatepage)
- (*invalidatepage)(page, offset, length);
+ const struct address_space_operations *aops = folio->mapping->a_ops;
+
+ if (aops->invalidate_folio)
+ aops->invalidate_folio(folio, offset, length);
}
+EXPORT_SYMBOL_GPL(folio_invalidate);
/*
* If truncate cannot remove the fs-private metadata from the page, the page
@@ -182,7 +176,7 @@ static void truncate_cleanup_folio(struct folio *folio)
unmap_mapping_folio(folio);
if (folio_has_private(folio))
- do_invalidatepage(&folio->page, 0, folio_size(folio));
+ folio_invalidate(folio, 0, folio_size(folio));
/*
* Some filesystems seem to re-dirty the page even after
@@ -193,27 +187,6 @@ static void truncate_cleanup_folio(struct folio *folio)
folio_clear_mappedtodisk(folio);
}
-/*
- * This is for invalidate_mapping_pages(). That function can be called at
- * any time, and is not supposed to throw away dirty pages. But pages can
- * be marked dirty at any time too, so use remove_mapping which safely
- * discards clean, unused pages.
- *
- * Returns non-zero if the page was successfully invalidated.
- */
-static int
-invalidate_complete_page(struct address_space *mapping, struct page *page)
-{
-
- if (page->mapping != mapping)
- return 0;
-
- if (page_has_private(page) && !try_to_release_page(page, 0))
- return 0;
-
- return remove_mapping(mapping, page);
-}
-
int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
{
if (folio->mapping != mapping)
@@ -264,7 +237,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
folio_zero_range(folio, offset, length);
if (folio_has_private(folio))
- do_invalidatepage(&folio->page, offset, length);
+ folio_invalidate(folio, offset, length);
if (!folio_test_large(folio))
return true;
if (split_huge_page(&folio->page) == 0)
@@ -294,22 +267,40 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page)
}
EXPORT_SYMBOL(generic_error_remove_page);
-/*
+static long mapping_evict_folio(struct address_space *mapping,
+ struct folio *folio)
+{
+ if (folio_test_dirty(folio) || folio_test_writeback(folio))
+ return 0;
+ /* The refcount will be elevated if any page in the folio is mapped */
+ if (folio_ref_count(folio) >
+ folio_nr_pages(folio) + folio_has_private(folio) + 1)
+ return 0;
+ if (folio_has_private(folio) && !filemap_release_folio(folio, 0))
+ return 0;
+
+ return remove_mapping(mapping, folio);
+}
+
+/**
+ * invalidate_inode_page() - Remove an unused page from the pagecache.
+ * @page: The page to remove.
+ *
* Safely invalidate one page from its pagecache mapping.
- * It only drops clean, unused pages. The page must be locked.
+ * It only drops clean, unused pages.
*
- * Returns 1 if the page is successfully invalidated, otherwise 0.
+ * Context: Page must be locked.
+ * Return: The number of pages successfully removed.
*/
-int invalidate_inode_page(struct page *page)
+long invalidate_inode_page(struct page *page)
{
- struct address_space *mapping = page_mapping(page);
+ struct folio *folio = page_folio(page);
+ struct address_space *mapping = folio_mapping(folio);
+
+ /* The page may have been truncated before it was locked */
if (!mapping)
return 0;
- if (PageDirty(page) || PageWriteback(page))
- return 0;
- if (page_mapped(page))
- return 0;
- return invalidate_complete_page(mapping, page);
+ return mapping_evict_folio(mapping, folio);
}
/**
@@ -332,7 +323,7 @@ int invalidate_inode_page(struct page *page)
* mapping is large, it is probably the case that the final pages are the most
* recently touched, and freeing happens in ascending file offset order.
*
- * Note that since ->invalidatepage() accepts range to invalidate
+ * Note that since ->invalidate_folio() accepts range to invalidate
* truncate_inode_pages_range is able to handle cases where lend + 1 is not
* page aligned properly.
*/
@@ -497,7 +488,18 @@ void truncate_inode_pages_final(struct address_space *mapping)
}
EXPORT_SYMBOL(truncate_inode_pages_final);
-static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
+/**
+ * invalidate_mapping_pagevec - Invalidate all the unlocked pages of one inode
+ * @mapping: the address_space which holds the pages to invalidate
+ * @start: the offset 'from' which to invalidate
+ * @end: the offset 'to' which to invalidate (inclusive)
+ * @nr_pagevec: invalidate failed page number for caller
+ *
+ * This helper is similar to invalidate_mapping_pages(), except that it accounts
+ * for pages that are likely on a pagevec and counts them in @nr_pagevec, which
+ * will be used by the caller.
+ */
+unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
{
pgoff_t indices[PAGEVEC_SIZE];
@@ -510,27 +512,27 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
folio_batch_init(&fbatch);
while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
- struct page *page = &fbatch.folios[i]->page;
+ struct folio *folio = fbatch.folios[i];
- /* We rely upon deletion not changing page->index */
+ /* We rely upon deletion not changing folio->index */
index = indices[i];
- if (xa_is_value(page)) {
+ if (xa_is_value(folio)) {
count += invalidate_exceptional_entry(mapping,
index,
- page);
+ folio);
continue;
}
- index += thp_nr_pages(page) - 1;
+ index += folio_nr_pages(folio) - 1;
- ret = invalidate_inode_page(page);
- unlock_page(page);
+ ret = mapping_evict_folio(mapping, folio);
+ folio_unlock(folio);
/*
- * Invalidation is a hint that the page is no longer
+ * Invalidation is a hint that the folio is no longer
* of interest and try to speed up its reclaim.
*/
if (!ret) {
- deactivate_file_page(page);
+ deactivate_file_folio(folio);
/* It is likely on the pagevec of a remote CPU */
if (nr_pagevec)
(*nr_pagevec)++;
@@ -562,29 +564,12 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
- return __invalidate_mapping_pages(mapping, start, end, NULL);
+ return invalidate_mapping_pagevec(mapping, start, end, NULL);
}
EXPORT_SYMBOL(invalidate_mapping_pages);
-/**
- * invalidate_mapping_pagevec - Invalidate all the unlocked pages of one inode
- * @mapping: the address_space which holds the pages to invalidate
- * @start: the offset 'from' which to invalidate
- * @end: the offset 'to' which to invalidate (inclusive)
- * @nr_pagevec: invalidate failed page number for caller
- *
- * This helper is similar to invalidate_mapping_pages(), except that it accounts
- * for pages that are likely on a pagevec and counts them in @nr_pagevec, which
- * will be used by the caller.
- */
-void invalidate_mapping_pagevec(struct address_space *mapping,
- pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
-{
- __invalidate_mapping_pages(mapping, start, end, nr_pagevec);
-}
-
/*
- * This is like invalidate_complete_page(), except it ignores the page's
+ * This is like invalidate_inode_page(), except it ignores the page's
* refcount. We do this because invalidate_inode_pages2() needs stronger
* invalidation guarantees, and cannot afford to leave pages behind because
* shrink_page_list() has a temp ref on them, or because they're transiently
@@ -620,13 +605,13 @@ failed:
return 0;
}
-static int do_launder_folio(struct address_space *mapping, struct folio *folio)
+static int folio_launder(struct address_space *mapping, struct folio *folio)
{
if (!folio_test_dirty(folio))
return 0;
- if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL)
+ if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL)
return 0;
- return mapping->a_ops->launder_page(&folio->page);
+ return mapping->a_ops->launder_folio(folio);
}
/**
@@ -692,7 +677,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
unmap_mapping_folio(folio);
BUG_ON(folio_mapped(folio));
- ret2 = do_launder_folio(mapping, folio);
+ ret2 = folio_launder(mapping, folio);
if (ret2 == 0) {
if (!invalidate_complete_folio2(mapping, folio))
ret2 = -EBUSY;
diff --git a/mm/usercopy.c b/mm/usercopy.c
index d0d268135d96..2c235d5c2364 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -29,7 +29,7 @@
* Returns:
* NOT_STACK: not at all on the stack
* GOOD_FRAME: fully within a valid stack frame
- * GOOD_STACK: fully on the stack (when can't do frame-checking)
+ * GOOD_STACK: within the current stack (when can't frame-check exactly)
* BAD_STACK: error condition (invalid stack position or bad stack frame)
*/
static noinline int check_stack_object(const void *obj, unsigned long len)
@@ -55,6 +55,17 @@ static noinline int check_stack_object(const void *obj, unsigned long len)
if (ret)
return ret;
+ /* Finally, check stack depth if possible. */
+#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
+ if (IS_ENABLED(CONFIG_STACK_GROWSUP)) {
+ if ((void *)current_stack_pointer < obj + len)
+ return BAD_STACK;
+ } else {
+ if (obj < (void *)current_stack_pointer)
+ return BAD_STACK;
+ }
+#endif
+
return GOOD_STACK;
}
@@ -70,17 +81,6 @@ static noinline int check_stack_object(const void *obj, unsigned long len)
* kmem_cache_create_usercopy() function to create the cache (and
* carefully audit the whitelist range).
*/
-void usercopy_warn(const char *name, const char *detail, bool to_user,
- unsigned long offset, unsigned long len)
-{
- WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
- to_user ? "exposure" : "overwrite",
- to_user ? "from" : "to",
- name ? : "unknown?!",
- detail ? " '" : "", detail ? : "", detail ? "'" : "",
- offset, len);
-}
-
void __noreturn usercopy_abort(const char *name, const char *detail,
bool to_user, unsigned long offset,
unsigned long len)
@@ -280,7 +280,15 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
*/
return;
default:
- usercopy_abort("process stack", NULL, to_user, 0, n);
+ usercopy_abort("process stack", NULL, to_user,
+#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
+ IS_ENABLED(CONFIG_STACK_GROWSUP) ?
+ ptr - (void *)current_stack_pointer :
+ (void *)current_stack_pointer - ptr,
+#else
+ 0,
+#endif
+ n);
}
/* Check for bad heap object. */
@@ -295,7 +303,10 @@ static bool enable_checks __initdata = true;
static int __init parse_hardened_usercopy(char *str)
{
- return strtobool(str, &enable_checks);
+ if (strtobool(str, &enable_checks))
+ pr_warn("Invalid option string for hardened_usercopy: '%s'\n",
+ str);
+ return 1;
}
__setup("hardened_usercopy=", parse_hardened_usercopy);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0780c2a57ff1..0cb8e5ef1713 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -95,10 +95,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
if (!pte_none(*dst_pte))
goto out_unlock;
- if (page_in_cache)
- page_add_file_rmap(page, false);
- else
+ if (page_in_cache) {
+ /* Usually, cache pages are already added to LRU */
+ if (newly_allocated)
+ lru_cache_add(page);
+ page_add_file_rmap(page, dst_vma, false);
+ } else {
page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
+ lru_cache_add_inactive_or_unevictable(page, dst_vma);
+ }
/*
* Must happen after rmap, as mm_counter() checks mapping (via
@@ -106,9 +111,6 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
*/
inc_mm_counter(dst_mm, mm_counter(page));
- if (newly_allocated)
- lru_cache_add_inactive_or_unevictable(page, dst_vma);
-
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
/* No need to invalidate - it was non-present before */
@@ -150,6 +152,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
/* don't free the page */
goto out;
}
+
+ flush_dcache_page(page);
} else {
page = *pagep;
*pagep = NULL;
@@ -625,6 +629,7 @@ retry:
err = -EFAULT;
goto out;
}
+ flush_dcache_page(page);
goto retry;
} else
BUG_ON(page);
diff --git a/mm/util.c b/mm/util.c
index 7e43369064c8..1e2728736398 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -587,8 +587,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
return ret;
/* Don't even allow crazy sizes */
- if (WARN_ON_ONCE(size > INT_MAX))
+ if (unlikely(size > INT_MAX)) {
+ WARN_ON_ONCE(!(flags & __GFP_NOWARN));
return NULL;
+ }
return __vmalloc_node(size, 1, flags, node,
__builtin_return_address(0));
@@ -679,9 +681,8 @@ bool folio_mapped(struct folio *folio)
}
EXPORT_SYMBOL(folio_mapped);
-struct anon_vma *page_anon_vma(struct page *page)
+struct anon_vma *folio_anon_vma(struct folio *folio)
{
- struct folio *folio = page_folio(page);
unsigned long mapping = (unsigned long)folio->mapping;
if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
@@ -741,6 +742,39 @@ int __page_mapcount(struct page *page)
EXPORT_SYMBOL_GPL(__page_mapcount);
/**
+ * folio_mapcount() - Calculate the number of mappings of this folio.
+ * @folio: The folio.
+ *
+ * A large folio tracks both how many times the entire folio is mapped,
+ * and how many times each individual page in the folio is mapped.
+ * This function calculates the total number of times the folio is
+ * mapped.
+ *
+ * Return: The number of times this folio is mapped.
+ */
+int folio_mapcount(struct folio *folio)
+{
+ int i, compound, nr, ret;
+
+ if (likely(!folio_test_large(folio)))
+ return atomic_read(&folio->_mapcount) + 1;
+
+ compound = folio_entire_mapcount(folio);
+ nr = folio_nr_pages(folio);
+ if (folio_test_hugetlb(folio))
+ return compound;
+ ret = compound;
+ for (i = 0; i < nr; i++)
+ ret += atomic_read(&folio_page(folio, i)->_mapcount) + 1;
+ /* File pages has compound_mapcount included in _mapcount */
+ if (!folio_test_anon(folio))
+ return ret - compound * nr;
+ if (folio_test_double_map(folio))
+ ret -= nr;
+ return ret;
+}
+
+/**
* folio_copy - Copy the contents of one folio to another.
* @dst: Folio to copy to.
* @src: Folio to copy from.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4165304d3547..99e0f3e8d1a5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -118,7 +118,6 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (size != PAGE_SIZE) {
pte_t entry = pfn_pte(pfn, prot);
- entry = pte_mkhuge(entry);
entry = arch_make_huge_pte(entry, ilog2(size), 0);
set_huge_pte_at(&init_mm, addr, pte, entry);
pfn += PFN_DOWN(size);
@@ -776,23 +775,13 @@ get_subtree_max_size(struct rb_node *node)
return va ? va->subtree_max_size : 0;
}
-/*
- * Gets called when remove the node and rotate.
- */
-static __always_inline unsigned long
-compute_subtree_max_size(struct vmap_area *va)
-{
- return max3(va_size(va),
- get_subtree_max_size(va->rb_node.rb_left),
- get_subtree_max_size(va->rb_node.rb_right));
-}
-
RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
static void purge_vmap_area_lazy(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
-static unsigned long lazy_max_pages(void);
+static void drain_vmap_area_work(struct work_struct *work);
+static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
static atomic_long_t nr_vmalloc_pages;
@@ -973,6 +962,17 @@ unlink_va(struct vmap_area *va, struct rb_root *root)
}
#if DEBUG_AUGMENT_PROPAGATE_CHECK
+/*
+ * Gets called when remove the node and rotate.
+ */
+static __always_inline unsigned long
+compute_subtree_max_size(struct vmap_area *va)
+{
+ return max3(va_size(va),
+ get_subtree_max_size(va->rb_node.rb_left),
+ get_subtree_max_size(va->rb_node.rb_right));
+}
+
static void
augment_tree_propagate_check(void)
{
@@ -1189,22 +1189,28 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
/*
* Find the first free block(lowest start address) in the tree,
* that will accomplish the request corresponding to passing
- * parameters.
+ * parameters. Please note, with an alignment bigger than PAGE_SIZE,
+ * a search length is adjusted to account for worst case alignment
+ * overhead.
*/
static __always_inline struct vmap_area *
-find_vmap_lowest_match(unsigned long size,
- unsigned long align, unsigned long vstart)
+find_vmap_lowest_match(unsigned long size, unsigned long align,
+ unsigned long vstart, bool adjust_search_size)
{
struct vmap_area *va;
struct rb_node *node;
+ unsigned long length;
/* Start from the root. */
node = free_vmap_area_root.rb_node;
+ /* Adjust the search size for alignment overhead. */
+ length = adjust_search_size ? size + align - 1 : size;
+
while (node) {
va = rb_entry(node, struct vmap_area, rb_node);
- if (get_subtree_max_size(node->rb_left) >= size &&
+ if (get_subtree_max_size(node->rb_left) >= length &&
vstart < va->va_start) {
node = node->rb_left;
} else {
@@ -1214,9 +1220,9 @@ find_vmap_lowest_match(unsigned long size,
/*
* Does not make sense to go deeper towards the right
* sub-tree if it does not have a free block that is
- * equal or bigger to the requested search size.
+ * equal or bigger to the requested search length.
*/
- if (get_subtree_max_size(node->rb_right) >= size) {
+ if (get_subtree_max_size(node->rb_right) >= length) {
node = node->rb_right;
continue;
}
@@ -1232,7 +1238,7 @@ find_vmap_lowest_match(unsigned long size,
if (is_within_this_va(va, size, align, vstart))
return va;
- if (get_subtree_max_size(node->rb_right) >= size &&
+ if (get_subtree_max_size(node->rb_right) >= length &&
vstart <= va->va_start) {
/*
* Shift the vstart forward. Please note, we update it with
@@ -1280,7 +1286,7 @@ find_vmap_lowest_match_check(unsigned long size, unsigned long align)
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
- va_1 = find_vmap_lowest_match(size, align, vstart);
+ va_1 = find_vmap_lowest_match(size, align, vstart, false);
va_2 = find_vmap_lowest_linear_match(size, align, vstart);
if (va_1 != va_2)
@@ -1431,12 +1437,25 @@ static __always_inline unsigned long
__alloc_vmap_area(unsigned long size, unsigned long align,
unsigned long vstart, unsigned long vend)
{
+ bool adjust_search_size = true;
unsigned long nva_start_addr;
struct vmap_area *va;
enum fit_type type;
int ret;
- va = find_vmap_lowest_match(size, align, vstart);
+ /*
+ * Do not adjust when:
+ * a) align <= PAGE_SIZE, because it does not make any sense.
+ * All blocks(their start addresses) are at least PAGE_SIZE
+ * aligned anyway;
+ * b) a short range where a requested size corresponds to exactly
+ * specified [vstart:vend] interval and an alignment > PAGE_SIZE.
+ * With adjusted search length an allocation would not succeed.
+ */
+ if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
+ adjust_search_size = false;
+
+ va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
if (unlikely(!va))
return vend;
@@ -1720,18 +1739,6 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
}
/*
- * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
- * is already purging.
- */
-static void try_purge_vmap_area_lazy(void)
-{
- if (mutex_trylock(&vmap_purge_lock)) {
- __purge_vmap_area_lazy(ULONG_MAX, 0);
- mutex_unlock(&vmap_purge_lock);
- }
-}
-
-/*
* Kick off a purge of the outstanding lazy areas.
*/
static void purge_vmap_area_lazy(void)
@@ -1742,6 +1749,20 @@ static void purge_vmap_area_lazy(void)
mutex_unlock(&vmap_purge_lock);
}
+static void drain_vmap_area_work(struct work_struct *work)
+{
+ unsigned long nr_lazy;
+
+ do {
+ mutex_lock(&vmap_purge_lock);
+ __purge_vmap_area_lazy(ULONG_MAX, 0);
+ mutex_unlock(&vmap_purge_lock);
+
+ /* Recheck if further work is required. */
+ nr_lazy = atomic_long_read(&vmap_lazy_nr);
+ } while (nr_lazy > lazy_max_pages());
+}
+
/*
* Free a vmap area, caller ensuring that the area has been unmapped
* and flush_cache_vunmap had been called for the correct range
@@ -1768,7 +1789,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
/* After this point, we may free va at any time */
if (unlikely(nr_lazy > lazy_max_pages()))
- try_purge_vmap_area_lazy();
+ schedule_work(&drain_vmap_work);
}
/*
@@ -2925,7 +2946,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- const gfp_t orig_gfp_mask = gfp_mask;
bool nofail = gfp_mask & __GFP_NOFAIL;
unsigned long addr = (unsigned long)area->addr;
unsigned long size = get_vm_area_size(area);
@@ -2949,7 +2969,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
if (!area->pages) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, failed to allocated page array size %lu",
nr_small_pages * PAGE_SIZE, array_size);
free_vm_area(area);
@@ -2959,8 +2979,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
page_order = vm_area_page_order(area);
- area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
- page_order, nr_small_pages, area->pages);
+ area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN,
+ node, page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
if (gfp_mask & __GFP_ACCOUNT) {
@@ -2976,7 +2996,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via __vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);
goto fail;
@@ -3004,7 +3024,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
memalloc_noio_restore(flags);
if (ret < 0) {
- warn_alloc(orig_gfp_mask, NULL,
+ warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, failed to map pages",
area->nr_pages * PAGE_SIZE);
goto fail;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 090bfb605ecf..1678802e03e7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -56,6 +56,7 @@
#include <linux/swapops.h>
#include <linux/balloon_compaction.h>
+#include <linux/sched/sysctl.h>
#include "internal.h"
@@ -978,47 +979,36 @@ void drop_slab(void)
drop_slab_node(nid);
}
-static inline int is_page_cache_freeable(struct page *page)
+static inline int is_page_cache_freeable(struct folio *folio)
{
/*
* A freeable page cache page is referenced only by the caller
* that isolated the page, the page cache and optional buffer
* heads at page->private.
*/
- int page_cache_pins = thp_nr_pages(page);
- return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
-}
-
-static int may_write_to_inode(struct inode *inode)
-{
- if (current->flags & PF_SWAPWRITE)
- return 1;
- if (!inode_write_congested(inode))
- return 1;
- if (inode_to_bdi(inode) == current->backing_dev_info)
- return 1;
- return 0;
+ return folio_ref_count(folio) - folio_test_private(folio) ==
+ 1 + folio_nr_pages(folio);
}
/*
- * We detected a synchronous write error writing a page out. Probably
+ * We detected a synchronous write error writing a folio out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
- * prevents it from being freed up. But we have a ref on the page and once
- * that page is locked, the mapping is pinned.
+ * prevents it from being freed up. But we have a ref on the folio and once
+ * that folio is locked, the mapping is pinned.
*
- * We're allowed to run sleeping lock_page() here because we know the caller has
+ * We're allowed to run sleeping folio_lock() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
- struct page *page, int error)
+ struct folio *folio, int error)
{
- lock_page(page);
- if (page_mapping(page) == mapping)
+ folio_lock(folio);
+ if (folio_mapping(folio) == mapping)
mapping_set_error(mapping, error);
- unlock_page(page);
+ folio_unlock(folio);
}
static bool skip_throttle_noprogress(pg_data_t *pgdat)
@@ -1066,8 +1056,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
* forward progress (e.g. journalling workqueues or kthreads).
*/
if (!current_is_kswapd() &&
- current->flags & (PF_IO_WORKER|PF_KTHREAD))
+ current->flags & (PF_IO_WORKER|PF_KTHREAD)) {
+ cond_resched();
return;
+ }
/*
* These figures are pulled out of thin air.
@@ -1163,35 +1155,35 @@ typedef enum {
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().
*/
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+static pageout_t pageout(struct folio *folio, struct address_space *mapping)
{
/*
- * If the page is dirty, only perform writeback if that write
+ * If the folio is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in __generic_file_write_iter() against
- * this page's queue, we can perform writeback even if that
+ * this folio's queue, we can perform writeback even if that
* will block.
*
- * If the page is swapcache, write it back even if that would
+ * If the folio is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
*/
- if (!is_page_cache_freeable(page))
+ if (!is_page_cache_freeable(folio))
return PAGE_KEEP;
if (!mapping) {
/*
- * Some data journaling orphaned pages can have
- * page->mapping == NULL while being dirty with clean buffers.
+ * Some data journaling orphaned folios can have
+ * folio->mapping == NULL while being dirty with clean buffers.
*/
- if (page_has_private(page)) {
- if (try_to_free_buffers(page)) {
- ClearPageDirty(page);
- pr_info("%s: orphaned page\n", __func__);
+ if (folio_test_private(folio)) {
+ if (try_to_free_buffers(&folio->page)) {
+ folio_clear_dirty(folio);
+ pr_info("%s: orphaned folio\n", __func__);
return PAGE_CLEAN;
}
}
@@ -1199,10 +1191,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
- if (!may_write_to_inode(mapping->host))
- return PAGE_KEEP;
- if (clear_page_dirty_for_io(page)) {
+ if (folio_clear_dirty_for_io(folio)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
@@ -1212,21 +1202,21 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
.for_reclaim = 1,
};
- SetPageReclaim(page);
- res = mapping->a_ops->writepage(page, &wbc);
+ folio_set_reclaim(folio);
+ res = mapping->a_ops->writepage(&folio->page, &wbc);
if (res < 0)
- handle_write_error(mapping, page, res);
+ handle_write_error(mapping, folio, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
- ClearPageReclaim(page);
+ folio_clear_reclaim(folio);
return PAGE_ACTIVATE;
}
- if (!PageWriteback(page)) {
+ if (!folio_test_writeback(folio)) {
/* synchronous write or broken a_ops? */
- ClearPageReclaim(page);
+ folio_clear_reclaim(folio);
}
- trace_mm_vmscan_writepage(page);
- inc_node_page_state(page, NR_VMSCAN_WRITE);
+ trace_mm_vmscan_write_folio(folio);
+ node_stat_add_folio(folio, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
@@ -1237,16 +1227,16 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
*/
-static int __remove_mapping(struct address_space *mapping, struct page *page,
+static int __remove_mapping(struct address_space *mapping, struct folio *folio,
bool reclaimed, struct mem_cgroup *target_memcg)
{
int refcount;
void *shadow = NULL;
- BUG_ON(!PageLocked(page));
- BUG_ON(mapping != page_mapping(page));
+ BUG_ON(!folio_test_locked(folio));
+ BUG_ON(mapping != folio_mapping(folio));
- if (!PageSwapCache(page))
+ if (!folio_test_swapcache(folio))
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
/*
@@ -1274,23 +1264,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under the i_pages lock, then this ordering is not required.
*/
- refcount = 1 + compound_nr(page);
- if (!page_ref_freeze(page, refcount))
+ refcount = 1 + folio_nr_pages(folio);
+ if (!folio_ref_freeze(folio, refcount))
goto cannot_free;
/* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
- if (unlikely(PageDirty(page))) {
- page_ref_unfreeze(page, refcount);
+ if (unlikely(folio_test_dirty(folio))) {
+ folio_ref_unfreeze(folio, refcount);
goto cannot_free;
}
- if (PageSwapCache(page)) {
- swp_entry_t swap = { .val = page_private(page) };
- mem_cgroup_swapout(page, swap);
+ if (folio_test_swapcache(folio)) {
+ swp_entry_t swap = folio_swap_entry(folio);
+ mem_cgroup_swapout(folio, swap);
if (reclaimed && !mapping_exiting(mapping))
- shadow = workingset_eviction(page, target_memcg);
- __delete_from_swap_cache(page, swap, shadow);
+ shadow = workingset_eviction(folio, target_memcg);
+ __delete_from_swap_cache(&folio->page, swap, shadow);
xa_unlock_irq(&mapping->i_pages);
- put_swap_page(page, swap);
+ put_swap_page(&folio->page, swap);
} else {
void (*freepage)(struct page *);
@@ -1311,61 +1301,67 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* exceptional entries and shadow exceptional entries in the
* same address_space.
*/
- if (reclaimed && page_is_file_lru(page) &&
+ if (reclaimed && folio_is_file_lru(folio) &&
!mapping_exiting(mapping) && !dax_mapping(mapping))
- shadow = workingset_eviction(page, target_memcg);
- __delete_from_page_cache(page, shadow);
+ shadow = workingset_eviction(folio, target_memcg);
+ __filemap_remove_folio(folio, shadow);
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
spin_unlock(&mapping->host->i_lock);
if (freepage != NULL)
- freepage(page);
+ freepage(&folio->page);
}
return 1;
cannot_free:
xa_unlock_irq(&mapping->i_pages);
- if (!PageSwapCache(page))
+ if (!folio_test_swapcache(folio))
spin_unlock(&mapping->host->i_lock);
return 0;
}
-/*
- * Attempt to detach a locked page from its ->mapping. If it is dirty or if
- * someone else has a ref on the page, abort and return 0. If it was
- * successfully detached, return 1. Assumes the caller has a single ref on
- * this page.
+/**
+ * remove_mapping() - Attempt to remove a folio from its mapping.
+ * @mapping: The address space.
+ * @folio: The folio to remove.
+ *
+ * If the folio is dirty, under writeback or if someone else has a ref
+ * on it, removal will fail.
+ * Return: The number of pages removed from the mapping. 0 if the folio
+ * could not be removed.
+ * Context: The caller should have a single refcount on the folio and
+ * hold its lock.
*/
-int remove_mapping(struct address_space *mapping, struct page *page)
+long remove_mapping(struct address_space *mapping, struct folio *folio)
{
- if (__remove_mapping(mapping, page, false, NULL)) {
+ if (__remove_mapping(mapping, folio, false, NULL)) {
/*
- * Unfreezing the refcount with 1 rather than 2 effectively
+ * Unfreezing the refcount with 1 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
- page_ref_unfreeze(page, 1);
- return 1;
+ folio_ref_unfreeze(folio, 1);
+ return folio_nr_pages(folio);
}
return 0;
}
/**
- * putback_lru_page - put previously isolated page onto appropriate LRU list
- * @page: page to be put back to appropriate lru list
+ * folio_putback_lru - Put previously isolated folio onto appropriate LRU list.
+ * @folio: Folio to be returned to an LRU list.
*
- * Add previously isolated @page to appropriate LRU list.
- * Page may still be unevictable for other reasons.
+ * Add previously isolated @folio to appropriate LRU list.
+ * The folio may still be unevictable for other reasons.
*
- * lru_lock must not be held, interrupts must be enabled.
+ * Context: lru_lock must not be held, interrupts must be enabled.
*/
-void putback_lru_page(struct page *page)
+void folio_putback_lru(struct folio *folio)
{
- lru_cache_add(page);
- put_page(page); /* drop ref from isolate */
+ folio_add_lru(folio);
+ folio_put(folio); /* drop ref from isolate */
}
enum page_references {
@@ -1375,61 +1371,61 @@ enum page_references {
PAGEREF_ACTIVATE,
};
-static enum page_references page_check_references(struct page *page,
+static enum page_references folio_check_references(struct folio *folio,
struct scan_control *sc)
{
- int referenced_ptes, referenced_page;
+ int referenced_ptes, referenced_folio;
unsigned long vm_flags;
- referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
- &vm_flags);
- referenced_page = TestClearPageReferenced(page);
+ referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
+ &vm_flags);
+ referenced_folio = folio_test_clear_referenced(folio);
/*
- * Mlock lost the isolation race with us. Let try_to_unmap()
- * move the page to the unevictable list.
+ * The supposedly reclaimable folio was found to be in a VM_LOCKED vma.
+ * Let the folio, now marked Mlocked, be moved to the unevictable list.
*/
if (vm_flags & VM_LOCKED)
- return PAGEREF_RECLAIM;
+ return PAGEREF_ACTIVATE;
if (referenced_ptes) {
/*
- * All mapped pages start out with page table
+ * All mapped folios start out with page table
* references from the instantiating fault, so we need
- * to look twice if a mapped file page is used more
+ * to look twice if a mapped file/anon folio is used more
* than once.
*
* Mark it and spare it for another trip around the
* inactive list. Another page table reference will
* lead to its activation.
*
- * Note: the mark is set for activated pages as well
- * so that recently deactivated but used pages are
+ * Note: the mark is set for activated folios as well
+ * so that recently deactivated but used folios are
* quickly recovered.
*/
- SetPageReferenced(page);
+ folio_set_referenced(folio);
- if (referenced_page || referenced_ptes > 1)
+ if (referenced_folio || referenced_ptes > 1)
return PAGEREF_ACTIVATE;
/*
- * Activate file-backed executable pages after first usage.
+ * Activate file-backed executable folios after first usage.
*/
- if ((vm_flags & VM_EXEC) && !PageSwapBacked(page))
+ if ((vm_flags & VM_EXEC) && !folio_test_swapbacked(folio))
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
}
- /* Reclaim if clean, defer dirty pages to writeback */
- if (referenced_page && !PageSwapBacked(page))
+ /* Reclaim if clean, defer dirty folios to writeback */
+ if (referenced_folio && !folio_test_swapbacked(folio))
return PAGEREF_RECLAIM_CLEAN;
return PAGEREF_RECLAIM;
}
/* Check if a page is dirty or under writeback */
-static void page_check_dirty_writeback(struct page *page,
+static void folio_check_dirty_writeback(struct folio *folio,
bool *dirty, bool *writeback)
{
struct address_space *mapping;
@@ -1438,24 +1434,24 @@ static void page_check_dirty_writeback(struct page *page,
* Anonymous pages are not handled by flushers and must be written
* from reclaim context. Do not stall reclaim based on them
*/
- if (!page_is_file_lru(page) ||
- (PageAnon(page) && !PageSwapBacked(page))) {
+ if (!folio_is_file_lru(folio) ||
+ (folio_test_anon(folio) && !folio_test_swapbacked(folio))) {
*dirty = false;
*writeback = false;
return;
}
- /* By default assume that the page flags are accurate */
- *dirty = PageDirty(page);
- *writeback = PageWriteback(page);
+ /* By default assume that the folio flags are accurate */
+ *dirty = folio_test_dirty(folio);
+ *writeback = folio_test_writeback(folio);
/* Verify dirty/writeback state if the filesystem supports it */
- if (!page_has_private(page))
+ if (!folio_test_private(folio))
return;
- mapping = page_mapping(page);
+ mapping = folio_mapping(folio);
if (mapping && mapping->a_ops->is_dirty_writeback)
- mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
+ mapping->a_ops->is_dirty_writeback(&folio->page, dirty, writeback);
}
static struct page *alloc_demote_page(struct page *page, unsigned long node)
@@ -1529,14 +1525,16 @@ retry:
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
+ struct folio *folio;
enum page_references references = PAGEREF_RECLAIM;
bool dirty, writeback, may_enter_fs;
unsigned int nr_pages;
cond_resched();
- page = lru_to_page(page_list);
- list_del(&page->lru);
+ folio = lru_to_folio(page_list);
+ list_del(&folio->lru);
+ page = &folio->page;
if (!trylock_page(page))
goto keep;
@@ -1562,12 +1560,12 @@ retry:
* reclaim_congested. kswapd will stall and start writing
* pages if the tail of the LRU is all dirty unqueued pages.
*/
- page_check_dirty_writeback(page, &dirty, &writeback);
+ folio_check_dirty_writeback(folio, &dirty, &writeback);
if (dirty || writeback)
- stat->nr_dirty++;
+ stat->nr_dirty += nr_pages;
if (dirty && !writeback)
- stat->nr_unqueued_dirty++;
+ stat->nr_unqueued_dirty += nr_pages;
/*
* Treat this page as congested if the underlying BDI is or if
@@ -1576,10 +1574,8 @@ retry:
* end of the LRU a second time.
*/
mapping = page_mapping(page);
- if (((dirty || writeback) && mapping &&
- inode_write_congested(mapping->host)) ||
- (writeback && PageReclaim(page)))
- stat->nr_congested++;
+ if (writeback && PageReclaim(page))
+ stat->nr_congested += nr_pages;
/*
* If a page at the tail of the LRU is under writeback, there
@@ -1628,7 +1624,7 @@ retry:
if (current_is_kswapd() &&
PageReclaim(page) &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
- stat->nr_immediate++;
+ stat->nr_immediate += nr_pages;
goto activate_locked;
/* Case 2 above */
@@ -1646,7 +1642,7 @@ retry:
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
- stat->nr_writeback++;
+ stat->nr_writeback += nr_pages;
goto activate_locked;
/* Case 3 above */
@@ -1660,7 +1656,7 @@ retry:
}
if (!ignore_references)
- references = page_check_references(page, sc);
+ references = folio_check_references(folio, sc);
switch (references) {
case PAGEREF_ACTIVATE:
@@ -1693,28 +1689,28 @@ retry:
if (!PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
- if (page_maybe_dma_pinned(page))
+ if (folio_maybe_dma_pinned(folio))
goto keep_locked;
if (PageTransHuge(page)) {
/* cannot split THP, skip it */
- if (!can_split_huge_page(page, NULL))
+ if (!can_split_folio(folio, NULL))
goto activate_locked;
/*
* Split pages without a PMD map right
* away. Chances are some or all of the
* tail pages can be freed without IO.
*/
- if (!compound_mapcount(page) &&
- split_huge_page_to_list(page,
- page_list))
+ if (!folio_entire_mapcount(folio) &&
+ split_folio_to_list(folio,
+ page_list))
goto activate_locked;
}
if (!add_to_swap(page)) {
if (!PageTransHuge(page))
goto activate_locked_split;
/* Fallback to swap normal pages */
- if (split_huge_page_to_list(page,
- page_list))
+ if (split_folio_to_list(folio,
+ page_list))
goto activate_locked;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
count_vm_event(THP_SWPOUT_FALLBACK);
@@ -1728,9 +1724,9 @@ retry:
/* Adding to swap updated mapping */
mapping = page_mapping(page);
}
- } else if (unlikely(PageTransHuge(page))) {
- /* Split file THP */
- if (split_huge_page_to_list(page, page_list))
+ } else if (PageSwapBacked(page) && PageTransHuge(page)) {
+ /* Split shmem THP */
+ if (split_folio_to_list(folio, page_list))
goto keep_locked;
}
@@ -1754,10 +1750,11 @@ retry:
enum ttu_flags flags = TTU_BATCH_FLUSH;
bool was_swapbacked = PageSwapBacked(page);
- if (unlikely(PageTransHuge(page)))
+ if (PageTransHuge(page) &&
+ thp_order(page) >= HPAGE_PMD_ORDER)
flags |= TTU_SPLIT_HUGE_PMD;
- try_to_unmap(page, flags);
+ try_to_unmap(folio, flags);
if (page_mapped(page)) {
stat->nr_unmap_fail += nr_pages;
if (!was_swapbacked && PageSwapBacked(page))
@@ -1805,13 +1802,13 @@ retry:
* starts and then write it out here.
*/
try_to_unmap_flush_dirty();
- switch (pageout(page, mapping)) {
+ switch (pageout(folio, mapping)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:
- stat->nr_pageout += thp_nr_pages(page);
+ stat->nr_pageout += nr_pages;
if (PageWriteback(page))
goto keep;
@@ -1889,7 +1886,7 @@ retry:
*/
count_vm_event(PGLAZYFREED);
count_memcg_page_event(page, PGLAZYFREED);
- } else if (!mapping || !__remove_mapping(mapping, page, true,
+ } else if (!mapping || !__remove_mapping(mapping, folio, true,
sc->target_mem_cgroup))
goto keep_locked;
@@ -2012,69 +2009,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
}
/*
- * Attempt to remove the specified page from its LRU. Only take this page
- * if it is of the appropriate PageActive status. Pages which are being
- * freed elsewhere are also ignored.
- *
- * page: page to consider
- * mode: one of the LRU isolation modes defined above
- *
- * returns true on success, false on failure.
- */
-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
-{
- /* Only take pages on the LRU. */
- if (!PageLRU(page))
- return false;
-
- /* Compaction should not handle unevictable pages but CMA can do so */
- if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
- return false;
-
- /*
- * To minimise LRU disruption, the caller can indicate that it only
- * wants to isolate pages it will be able to operate on without
- * blocking - clean pages for the most part.
- *
- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
- * that it is possible to migrate without blocking
- */
- if (mode & ISOLATE_ASYNC_MIGRATE) {
- /* All the caller can do on PageWriteback is block */
- if (PageWriteback(page))
- return false;
-
- if (PageDirty(page)) {
- struct address_space *mapping;
- bool migrate_dirty;
-
- /*
- * Only pages without mappings or that have a
- * ->migratepage callback are possible to migrate
- * without blocking. However, we can be racing with
- * truncation so it's necessary to lock the page
- * to stabilise the mapping as truncation holds
- * the page lock until after the page is removed
- * from the page cache.
- */
- if (!trylock_page(page))
- return false;
-
- mapping = page_mapping(page);
- migrate_dirty = !mapping || mapping->a_ops->migratepage;
- unlock_page(page);
- if (!migrate_dirty)
- return false;
- }
- }
-
- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
- return false;
-
- return true;
-}
-
-/*
* Update LRU sizes after isolating pages. The LRU size updates must
* be complete before mem_cgroup_update_lru_size due to a sanity check.
*/
@@ -2125,11 +2059,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long skipped = 0;
unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
total_scan = 0;
scan = 0;
while (scan < nr_to_scan && !list_empty(src)) {
+ struct list_head *move_to = src;
struct page *page;
page = lru_to_page(src);
@@ -2139,9 +2073,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
total_scan += nr_pages;
if (page_zonenum(page) > sc->reclaim_idx) {
- list_move(&page->lru, &pages_skipped);
nr_skipped[page_zonenum(page)] += nr_pages;
- continue;
+ move_to = &pages_skipped;
+ goto move;
}
/*
@@ -2149,37 +2083,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* return with no isolated pages if the LRU mostly contains
* ineligible pages. This causes the VM to not reclaim any
* pages, triggering a premature OOM.
- *
- * Account all tail pages of THP. This would not cause
- * premature OOM since __isolate_lru_page() returns -EBUSY
- * only when the page is being freed somewhere else.
+ * Account all tail pages of THP.
*/
scan += nr_pages;
- if (!__isolate_lru_page_prepare(page, mode)) {
- /* It is being freed elsewhere */
- list_move(&page->lru, src);
- continue;
- }
+
+ if (!PageLRU(page))
+ goto move;
+ if (!sc->may_unmap && page_mapped(page))
+ goto move;
+
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
- if (unlikely(!get_page_unless_zero(page))) {
- list_move(&page->lru, src);
- continue;
- }
+ if (unlikely(!get_page_unless_zero(page)))
+ goto move;
if (!TestClearPageLRU(page)) {
/* Another thread is already isolating this page */
put_page(page);
- list_move(&page->lru, src);
- continue;
+ goto move;
}
nr_taken += nr_pages;
nr_zone_taken[page_zonenum(page)] += nr_pages;
- list_move(&page->lru, dst);
+ move_to = dst;
+move:
+ list_move(&page->lru, move_to);
}
/*
@@ -2203,51 +2134,47 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
}
*nr_scanned = total_scan;
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
- total_scan, skipped, nr_taken, mode, lru);
+ total_scan, skipped, nr_taken,
+ sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken;
}
/**
- * isolate_lru_page - tries to isolate a page from its LRU list
- * @page: page to isolate from its LRU list
- *
- * Isolates a @page from an LRU list, clears PageLRU and adjusts the
- * vmstat statistic corresponding to whatever LRU list the page was on.
+ * folio_isolate_lru() - Try to isolate a folio from its LRU list.
+ * @folio: Folio to isolate from its LRU list.
*
- * Returns 0 if the page was removed from an LRU list.
- * Returns -EBUSY if the page was not on an LRU list.
+ * Isolate a @folio from an LRU list and adjust the vmstat statistic
+ * corresponding to whatever LRU list the folio was on.
*
- * The returned page will have PageLRU() cleared. If it was found on
- * the active list, it will have PageActive set. If it was found on
- * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * The folio will have its LRU flag cleared. If it was found on the
+ * active list, it will have the Active flag set. If it was found on the
+ * unevictable list, it will have the Unevictable flag set. These flags
* may need to be cleared by the caller before letting the page go.
*
- * The vmstat statistic corresponding to the list on which the page was
- * found will be decremented.
- *
- * Restrictions:
+ * Context:
*
* (1) Must be called with an elevated refcount on the page. This is a
- * fundamental difference from isolate_lru_pages (which is called
+ * fundamental difference from isolate_lru_pages() (which is called
* without a stable reference).
- * (2) the lru_lock must not be held.
- * (3) interrupts must be enabled.
+ * (2) The lru_lock must not be held.
+ * (3) Interrupts must be enabled.
+ *
+ * Return: 0 if the folio was removed from an LRU list.
+ * -EBUSY if the folio was not on an LRU list.
*/
-int isolate_lru_page(struct page *page)
+int folio_isolate_lru(struct folio *folio)
{
- struct folio *folio = page_folio(page);
int ret = -EBUSY;
- VM_BUG_ON_PAGE(!page_count(page), page);
- WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
+ VM_BUG_ON_FOLIO(!folio_ref_count(folio), folio);
- if (TestClearPageLRU(page)) {
+ if (folio_test_clear_lru(folio)) {
struct lruvec *lruvec;
- get_page(page);
+ folio_get(folio);
lruvec = folio_lruvec_lock_irq(folio);
- del_page_from_lru_list(page, lruvec);
+ lruvec_del_folio(lruvec, folio);
unlock_page_lruvec_irq(lruvec);
ret = 0;
}
@@ -2377,9 +2304,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec,
*/
static int current_may_throttle(void)
{
- return !(current->flags & PF_LOCAL_THROTTLE) ||
- current->backing_dev_info == NULL ||
- bdi_write_congested(current->backing_dev_info);
+ return !(current->flags & PF_LOCAL_THROTTLE);
}
/*
@@ -2485,7 +2410,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
*
* If the pages are mostly unmapped, the processing is fast and it is
* appropriate to hold lru_lock across the whole operation. But if
- * the pages are mapped, the processing is slow (page_referenced()), so
+ * the pages are mapped, the processing is slow (folio_referenced()), so
* we should drop lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
@@ -2505,7 +2430,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
- struct page *page;
unsigned nr_deactivate, nr_activate;
unsigned nr_rotated = 0;
int file = is_file_lru(lru);
@@ -2527,9 +2451,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
spin_unlock_irq(&lruvec->lru_lock);
while (!list_empty(&l_hold)) {
+ struct folio *folio;
+ struct page *page;
+
cond_resched();
- page = lru_to_page(&l_hold);
- list_del(&page->lru);
+ folio = lru_to_folio(&l_hold);
+ list_del(&folio->lru);
+ page = &folio->page;
if (unlikely(!page_evictable(page))) {
putback_lru_page(page);
@@ -2544,8 +2472,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
}
- if (page_referenced(page, 0, sc->target_mem_cgroup,
- &vm_flags)) {
+ if (folio_referenced(folio, 0, sc->target_mem_cgroup,
+ &vm_flags)) {
/*
* Identify referenced, file-backed active pages and
* give them one more trip around the active list. So
@@ -3975,7 +3903,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
if (!managed_zone(zone))
continue;
- mark = high_wmark_pages(zone);
+ if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+ mark = wmark_pages(zone, WMARK_PROMO);
+ else
+ mark = high_wmark_pages(zone);
if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
return true;
}
@@ -4472,7 +4403,7 @@ static int kswapd(void *p)
* us from recursively trying to free more memory as we're
* trying to free the first piece of memory in the first place).
*/
- tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+ tsk->flags |= PF_MEMALLOC | PF_KSWAPD;
set_freezable();
WRITE_ONCE(pgdat->kswapd_order, 0);
@@ -4523,7 +4454,7 @@ kswapd_try_sleep:
goto kswapd_try_sleep;
}
- tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
+ tsk->flags &= ~(PF_MEMALLOC | PF_KSWAPD);
return 0;
}
@@ -4764,11 +4695,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
fs_reclaim_acquire(sc.gfp_mask);
/*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
- * and we also need to be able to write out pages for RECLAIM_WRITE
- * and RECLAIM_UNMAP.
*/
noreclaim_flag = memalloc_noreclaim_save();
- p->flags |= PF_SWAPWRITE;
set_task_reclaim_state(p, &sc.reclaim_state);
if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
@@ -4782,7 +4710,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
set_task_reclaim_state(p, NULL);
- current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask);
psi_memstall_leave(&pflags);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4057372745d0..b75b1a64b54c 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -28,6 +28,7 @@
#include <linux/mm_inline.h>
#include <linux/page_ext.h>
#include <linux/page_owner.h>
+#include <linux/migrate.h>
#include "internal.h"
@@ -1242,6 +1243,9 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_SWAP
"nr_swapcached",
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ "pgpromote_success",
+#endif
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
@@ -1385,6 +1389,9 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_SWAP
"swap_ra",
"swap_ra_hit",
+#ifdef CONFIG_KSM
+ "ksm_swpin_copy",
+#endif
#endif
#ifdef CONFIG_X86
"direct_map_level2_splits",
@@ -2043,7 +2050,12 @@ static void __init init_cpu_node_state(void)
static int vmstat_cpu_online(unsigned int cpu)
{
refresh_zone_stat_thresholds();
- node_set_state(cpu_to_node(cpu), N_CPU);
+
+ if (!node_state(cpu_to_node(cpu), N_CPU)) {
+ node_set_state(cpu_to_node(cpu), N_CPU);
+ set_migration_target_nodes();
+ }
+
return 0;
}
@@ -2066,6 +2078,8 @@ static int vmstat_cpu_dead(unsigned int cpu)
return 0;
node_clear_state(node, N_CPU);
+ set_migration_target_nodes();
+
return 0;
}
@@ -2097,6 +2111,9 @@ void __init init_mm_internals(void)
start_shepherd_timer();
#endif
+#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU)
+ migrate_on_reclaim_init();
+#endif
#ifdef CONFIG_PROC_FS
proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
diff --git a/mm/workingset.c b/mm/workingset.c
index 8c03afe1d67c..8a3828acc0bf 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -245,31 +245,32 @@ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
}
/**
- * workingset_eviction - note the eviction of a page from memory
+ * workingset_eviction - note the eviction of a folio from memory
* @target_memcg: the cgroup that is causing the reclaim
- * @page: the page being evicted
+ * @folio: the folio being evicted
*
- * Return: a shadow entry to be stored in @page->mapping->i_pages in place
- * of the evicted @page so that a later refault can be detected.
+ * Return: a shadow entry to be stored in @folio->mapping->i_pages in place
+ * of the evicted @folio so that a later refault can be detected.
*/
-void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
+void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
unsigned long eviction;
struct lruvec *lruvec;
int memcgid;
- /* Page is fully exclusive and pins page's memory cgroup pointer */
- VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ /* Folio is fully exclusive and pins folio's memory cgroup pointer */
+ VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+ VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
/* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
eviction = atomic_long_read(&lruvec->nonresident_age);
- workingset_age_nonresident(lruvec, thp_nr_pages(page));
- return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
+ workingset_age_nonresident(lruvec, folio_nr_pages(folio));
+ return pack_shadow(memcgid, pgdat, eviction,
+ folio_test_workingset(folio));
}
/**
@@ -429,10 +430,12 @@ out:
* point where they would still be useful.
*/
-static struct list_lru shadow_nodes;
+struct list_lru shadow_nodes;
void workingset_update_node(struct xa_node *node)
{
+ struct address_space *mapping;
+
/*
* Track non-empty nodes that contain only shadow entries;
* unlink those that contain pages or are being freed.
@@ -441,7 +444,8 @@ void workingset_update_node(struct xa_node *node)
* already where they should be. The list_empty() test is safe
* as node->private_list is protected by the i_pages lock.
*/
- VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */
+ mapping = container_of(node->array, struct address_space, i_pages);
+ lockdep_assert_held(&mapping->i_pages.xa_lock);
if (node->count && node->count == node->nr_values) {
if (list_empty(&node->private_list)) {
diff --git a/mm/zswap.c b/mm/zswap.c
index cdf6950fcb2e..3efd8cae315e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -120,11 +120,19 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
uint, 0644);
-/* Enable/disable handling same-value filled pages (enabled by default) */
+/*
+ * Enable/disable handling same-value filled pages (enabled by default).
+ * If disabled every page is considered non-same-value filled.
+ */
static bool zswap_same_filled_pages_enabled = true;
module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
bool, 0644);
+/* Enable/disable handling non-same-value filled pages (enabled by default) */
+static bool zswap_non_same_filled_pages_enabled = true;
+module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
+ bool, 0644);
+
/*********************************
* data structures
**********************************/
@@ -1147,6 +1155,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
kunmap_atomic(src);
}
+ if (!zswap_non_same_filled_pages_enabled) {
+ ret = -EINVAL;
+ goto freepage;
+ }
+
/* if entry is successfully added, it keeps the reference */
entry->pool = zswap_pool_current_get();
if (!entry->pool) {
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 1a705a4ef7fa..5eaf38875554 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
+void vlan_dev_free_egress_priority(const struct net_device *dev);
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
size_t size);
@@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
-void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 26d031a43cc1..d1902828a18a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -622,7 +622,7 @@ static int vlan_dev_init(struct net_device *dev)
}
/* Note: this function might be called multiple times for the same device. */
-void vlan_dev_uninit(struct net_device *dev)
+void vlan_dev_free_egress_priority(const struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -636,6 +636,16 @@ void vlan_dev_uninit(struct net_device *dev)
}
}
+static void vlan_dev_uninit(struct net_device *dev)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+ vlan_dev_free_egress_priority(dev);
+
+ /* Get rid of the vlan's reference to real_dev */
+ dev_put_track(vlan->real_dev, &vlan->dev_tracker);
+}
+
static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -846,9 +856,6 @@ static void vlan_dev_free(struct net_device *dev)
free_percpu(vlan->vlan_pcpu_stats);
vlan->vlan_pcpu_stats = NULL;
-
- /* Get rid of the vlan's reference to real_dev */
- dev_put_track(vlan->real_dev, &vlan->dev_tracker);
}
void vlan_setup(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0db85aeb119b..53b1955b027f 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -183,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
err = vlan_changelink(dev, tb, data, extack);
- if (!err)
- err = register_vlan_dev(dev, extack);
if (err)
- vlan_dev_uninit(dev);
+ return err;
+ err = register_vlan_dev(dev, extack);
+ if (err)
+ vlan_dev_free_egress_priority(dev);
return err;
}
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index eb9fb55280ef..01f8067994d6 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -281,9 +281,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
- free_pages((unsigned long)priv->rings[i].data.in,
- priv->rings[i].intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(priv->rings[i].data.in,
+ 1UL << (priv->rings[i].intf->ring_order +
+ XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
free_page((unsigned long)priv->rings[i].intf);
@@ -322,8 +322,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
if (ret < 0)
goto out;
ring->ref = ret;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes) {
ret = -ENOMEM;
goto out;
@@ -354,9 +354,7 @@ out:
if (bytes) {
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
- free_pages((unsigned long)bytes,
- ring->intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
free_page((unsigned long)ring->intf);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 02f43f3e2c56..6bd097180772 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *s;
+ struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
@@ -85,13 +86,24 @@ static void ax25_kill_by_device(struct net_device *dev)
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
+ sk = s->sk;
+ if (!sk) {
+ spin_unlock_bh(&ax25_list_lock);
+ s->ax25_dev = NULL;
+ ax25_disconnect(s, ENETUNREACH);
+ spin_lock_bh(&ax25_list_lock);
+ goto again;
+ }
+ sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
- lock_sock(s->sk);
+ lock_sock(sk);
s->ax25_dev = NULL;
- release_sock(s->sk);
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
ax25_disconnect(s, ENETUNREACH);
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
-
+ sock_put(sk);
/* The entry could have been deleted from the
* list meanwhile and thus the next pointer is
* no longer valid. Play it safe and restart
@@ -355,21 +367,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -436,6 +452,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -1107,8 +1124,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+ }
done:
ax25_cb_add(ax25);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 256fadb94df3..d2a244e1c260 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev)
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
@@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev)
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev)
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d0b2e094bd55..9751207f7757 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
@@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8a2b78f9c4b2..35fadb924849 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -149,22 +149,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
- /* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ iflink = dev_get_iflink(net_dev);
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
/* if we got a NULL parent_dev there is something broken.. */
if (!parent_dev) {
pr_err("Cannot find parent device\n");
@@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
batadv_hardif_put(hard_iface);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2b7bd3655b07..2882bc7d79d7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2738,6 +2738,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree_skb(hdev->sent_cmd);
kfree(hdev);
}
EXPORT_SYMBOL(hci_release_dev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 0feb68f12545..ab9aa700b6b3 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -276,40 +276,37 @@ EXPORT_SYMBOL(__hci_cmd_sync_status);
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
- struct hci_cmd_sync_work_entry *entry;
- hci_cmd_sync_work_func_t func;
- hci_cmd_sync_work_destroy_t destroy;
- void *data;
bt_dev_dbg(hdev, "");
- mutex_lock(&hdev->cmd_sync_work_lock);
- entry = list_first_entry(&hdev->cmd_sync_work_list,
- struct hci_cmd_sync_work_entry, list);
- if (entry) {
- list_del(&entry->list);
- func = entry->func;
- data = entry->data;
- destroy = entry->destroy;
- kfree(entry);
- } else {
- func = NULL;
- data = NULL;
- destroy = NULL;
- }
- mutex_unlock(&hdev->cmd_sync_work_lock);
+ /* Dequeue all entries and run them */
+ while (1) {
+ struct hci_cmd_sync_work_entry *entry;
- if (func) {
- int err;
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
+ struct hci_cmd_sync_work_entry,
+ list);
+ if (entry)
+ list_del(&entry->list);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
- hci_req_sync_lock(hdev);
+ if (!entry)
+ break;
- err = func(hdev, data);
+ bt_dev_dbg(hdev, "entry %p", entry);
- if (destroy)
- destroy(hdev, data, err);
+ if (entry->func) {
+ int err;
- hci_req_sync_unlock(hdev);
+ hci_req_sync_lock(hdev);
+ err = entry->func(hdev, entry->data);
+ if (entry->destroy)
+ entry->destroy(hdev, entry->data, err);
+ hci_req_sync_unlock(hdev);
+ }
+
+ kfree(entry);
}
}
@@ -1841,6 +1838,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
+ u8 filter_policy;
int err;
/* Pause advertising if resolving list can be used as controllers are
@@ -1927,6 +1925,8 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
err = -EINVAL;
done:
+ filter_policy = err ? 0x00 : 0x01;
+
/* Enable address resolution when LL Privacy is enabled. */
err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
if (err)
@@ -1937,7 +1937,7 @@ done:
hci_resume_advertising_sync(hdev);
/* Select filter policy to use accept list */
- return err ? 0x00 : 0x01;
+ return filter_policy;
}
/* Returns true if an le connection is in the scanning state */
@@ -3262,10 +3262,10 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
events[0] |= 0x40; /* LE Data Length Change */
- /* If the controller supports LL Privacy feature, enable
- * the corresponding event.
+ /* If the controller supports LL Privacy feature or LE Extended Adv,
+ * enable the corresponding event.
*/
- if (hdev->le_features[0] & HCI_LE_LL_PRIVACY)
+ if (use_enhanced_conn_complete(hdev))
events[1] |= 0x02; /* LE Enhanced Connection Complete */
/* If the controller supports Extended Scanner Filter
@@ -4106,9 +4106,9 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
- hci_dev_unlock(hdev);
-
+ /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */
smp_unregister(hdev);
+ hci_dev_unlock(hdev);
hci_sock_dev_event(hdev, HCI_DEV_DOWN);
@@ -5185,7 +5185,7 @@ int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN,
plen, data,
HCI_EV_LE_ENHANCED_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ conn->conn_timeout, NULL);
}
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -5270,9 +5270,18 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261:
+ *
+ * If this event is unmasked and the HCI_LE_Connection_Complete event
+ * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is
+ * sent when a new connection has been created.
+ */
err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN,
- sizeof(cp), &cp, HCI_EV_LE_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ sizeof(cp), &cp,
+ use_enhanced_conn_complete(hdev) ?
+ HCI_EV_LE_ENHANCED_CONN_COMPLETE :
+ HCI_EV_LE_CONN_COMPLETE,
+ conn->conn_timeout, NULL);
done:
/* Re-enable advertising after the connection attempt is finished. */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 37087cf7dc5a..230a7a8196c0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1218,7 +1218,13 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip)
static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
+ cp = cmd->param;
bt_dev_dbg(hdev, "err %d", err);
@@ -1242,7 +1248,7 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
mgmt_status(err));
}
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
}
static int set_powered_sync(struct hci_dev *hdev, void *data)
@@ -1281,7 +1287,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1290,6 +1296,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd,
mgmt_set_powered_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1383,6 +1392,10 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1402,7 +1415,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -1511,7 +1524,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1538,6 +1551,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd,
mgmt_set_discoverable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1550,6 +1566,10 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1562,7 +1582,9 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ if (cmd)
+ mgmt_pending_remove(cmd);
+
hci_dev_unlock(hdev);
}
@@ -1634,7 +1656,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1654,6 +1676,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd,
mgmt_set_connectable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1774,6 +1799,10 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
u8 enable = cp->val;
bool changed;
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ return;
+
if (err) {
u8 mgmt_err = mgmt_status(err);
@@ -3321,6 +3350,9 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
+ if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ return;
+
if (status) {
mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
status);
@@ -3493,6 +3525,9 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
+ if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -3759,13 +3794,6 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_WIDEBAND_SPEECH,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
if (hdev_is_powered(hdev) &&
!!cp->val != hci_dev_test_flag(hdev,
HCI_WIDEBAND_SPEECH_ENABLED)) {
@@ -4513,9 +4541,9 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
}
}
-done:
hci_dev_unlock(hdev);
+done:
if (status == MGMT_STATUS_SUCCESS)
device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -5036,12 +5064,6 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0);
if (!cmd)
err = -ENOMEM;
@@ -5261,11 +5283,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED:
DISCOVERY_FINDING);
@@ -5327,7 +5354,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
else
hdev->discovery.limited = false;
- cmd = mgmt_pending_new(sk, op, hdev, data, len);
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -5336,7 +5363,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5430,7 +5457,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY,
+ cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY,
hdev, data, len);
if (!cmd) {
err = -ENOMEM;
@@ -5463,7 +5490,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5495,11 +5522,14 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
if (!err)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
@@ -5535,7 +5565,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto unlock;
@@ -5544,7 +5574,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd,
stop_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto unlock;
}
@@ -7474,6 +7504,9 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
+ if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -7969,11 +8002,7 @@ static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags)
static bool adv_busy(struct hci_dev *hdev)
{
- return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev));
+ return pending_find(MGMT_OP_SET_LE, hdev);
}
static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance,
@@ -8563,9 +8592,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev)) {
+ if (pending_find(MGMT_OP_SET_LE, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_BUSY);
goto unlock;
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index edee60bbc7b4..37eef2ce55ae 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -77,11 +77,12 @@ int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag,
{
struct hci_dev *hdev;
struct mgmt_hdr *hdr;
- int len = skb->len;
+ int len;
if (!skb)
return -EINVAL;
+ len = skb->len;
hdev = bt_cb(skb)->mgmt.hdev;
/* Time stamp */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de2409889489..db4f2641d1cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+ struct netlink_ext_ack *extack);
+
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
struct net_bridge_port_group_sg_key *sg_p)
@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 84ba456a78cc..1402d5ca242d 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
- return br_vlan_state_allowed(*state, true);
- } else {
- return true;
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
}
+ return true;
}
}
v = br_vlan_find(vg, *vid);
@@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
- if (err && err != -EMSGSIZE)
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
goto out_err;
} else {
for_each_netdev_rcu(net, dev) {
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eba0efe64d05..fbf858ddec35 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 02cbcb2ecf0d..d2a430b6a13b 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -56,6 +56,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/wait.h>
#include <linux/uio.h>
@@ -145,6 +146,7 @@ struct isotp_sock {
struct tpcon rx, tx;
struct list_head notifier;
wait_queue_head_t wait;
+ spinlock_t rx_lock; /* protect single thread state machine */
};
static LIST_HEAD(isotp_notifier_list);
@@ -615,11 +617,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
n_pci_type = cf->data[ae] & 0xF0;
+ /* Make sure the state changes and data structures stay consistent at
+ * CAN frame reception time. This locking is not needed in real world
+ * use cases but the inconsistency can be triggered with syzkaller.
+ */
+ spin_lock(&so->rx_lock);
+
if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
/* check rx/tx path half duplex expectations */
if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
(so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
- return;
+ goto out_unlock;
}
switch (n_pci_type) {
@@ -668,6 +676,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
isotp_rcv_cf(sk, cf, ae, skb);
break;
}
+
+out_unlock:
+ spin_unlock(&so->rx_lock);
}
static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
@@ -876,7 +887,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!size || size > MAX_MSG_LENGTH) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
@@ -886,24 +897,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
(size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- goto err_out;
+ goto err_out_drop;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
if (!dev) {
err = -ENXIO;
- goto err_out;
+ goto err_out_drop;
}
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- goto err_out;
+ goto err_out_drop;
}
can_skb_reserve(skb);
@@ -965,7 +976,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- goto err_out;
+ goto err_out_drop;
}
if (wait_tx_done) {
@@ -978,6 +989,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return size;
+err_out_drop:
+ /* drop this PDU and unlock a potential wait queue */
+ old_state = ISOTP_IDLE;
err_out:
so->tx.state = old_state;
if (so->tx.state == ISOTP_IDLE)
@@ -1444,6 +1458,7 @@ static int isotp_init(struct sock *sk)
so->txtimer.function = isotp_tx_timer_handler;
init_waitqueue_head(&so->wait);
+ spin_lock_init(&so->rx_lock);
spin_lock(&isotp_notifier_lock);
list_add_tail(&so->notifier, &isotp_notifier_list);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index a271688780a2..307ee1174a6e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
/* set the end-packet for broadcast */
session->pkt.last = session->pkt.total;
- skcb->tskey = session->sk->sk_tskey++;
+ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
session->tskey = skcb->tskey;
return session;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ecc400a0b7bb..4c6441536d55 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -246,6 +246,7 @@ enum {
Opt_cephx_sign_messages,
Opt_tcp_nodelay,
Opt_abort_on_full,
+ Opt_rxbounce,
};
enum {
@@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
+ fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
@@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break;
+ case Opt_rxbounce:
+ opt->flags |= CEPH_OPT_RXBOUNCE;
+ break;
default:
BUG();
@@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,");
+ if (opt->flags & CEPH_OPT_RXBOUNCE)
+ seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,",
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 45eba2dcb67a..d3bb656308b4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
+ if (con->bounce_page) {
+ __free_page(con->bounce_page);
+ con->bounce_page = NULL;
+ }
if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 2cb5ffdf071a..6b014eca3a13 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con)
{
- struct ceph_msg *msg = con->in_msg;
- struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- if (!msg->num_data_items)
- return -EIO;
-
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->total_resid) {
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */
}
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ struct page *page;
+ size_t off, len;
+ u32 crc;
+ int ret;
+
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ crc = con->in_data_crc;
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
+ page = ceph_msg_data_next(cursor, &off, &len, NULL);
+ ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+ if (ret <= 0) {
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ crc = crc32c(crc, page_address(con->bounce_page), ret);
+ memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+ ceph_msg_data_advance(cursor, ret);
+ }
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
/*
* read (part of) a message.
*/
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */
if (data_len) {
- ret = read_partial_msg_data(con);
+ if (!m->num_data_items)
+ return -EIO;
+
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ ret = read_partial_msg_data_bounce(con);
+ else
+ ret = read_partial_msg_data(con);
if (ret <= 0)
return ret;
}
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index c4099b641b38..c81379f93ad5 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -57,8 +57,9 @@
#define IN_S_HANDLE_CONTROL_REMAINDER 3
#define IN_S_PREPARE_READ_DATA 4
#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_HANDLE_EPILOGUE 6
-#define IN_S_FINISH_SKIP 7
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_HANDLE_EPILOGUE 7
+#define IN_S_FINISH_SKIP 8
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
{
+ struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ int tail_len;
int ret;
+ tail_len = tail_onwire_len(con->in_msg, true);
+ ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt, 0, tail_len,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
con->v2.in_buf, true);
if (ret)
goto out;
- ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
- tail_onwire_len(con->in_msg, true));
+ dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+ con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+ ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+ if (ret)
+ goto out;
+
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
out:
sg_free_table(&sgt);
+ sg_free_table(&enc_sgt);
return ret;
}
@@ -1733,54 +1753,157 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
return 0;
}
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
- con->in_data_crc = -1;
+ con->in_data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
data_len(con->in_msg));
get_bvec_at(&con->v2.in_cursor, &bv);
- set_in_bvec(con, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ set_in_bvec(con, &bv);
+ } else {
+ set_in_bvec(con, &bv);
+ }
con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+ return 0;
}
static void prepare_read_data_cont(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+
+ get_bvec_at(&con->v2.in_cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
con->v2.in_bvec.bv_page,
con->v2.in_bvec.bv_offset,
con->v2.in_bvec.bv_len);
+ }
ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
if (con->v2.in_cursor.total_resid) {
get_bvec_at(&con->v2.in_cursor, &bv);
- set_in_bvec(con, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ set_in_bvec(con, &bv);
+ } else {
+ set_in_bvec(con, &bv);
+ }
WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
return;
}
/*
- * We've read all data. Prepare to read data padding (if any)
- * and epilogue.
+ * We've read all data. Prepare to read epilogue.
*/
reset_in_kvecs(con);
- if (con_secure(con)) {
- if (need_padding(data_len(con->in_msg)))
- add_in_kvec(con, DATA_PAD(con->v2.in_buf),
- padding_len(data_len(con->in_msg)));
- add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ if (!front_len(msg) && !middle_len(msg)) {
+ WARN_ON(!data_len(msg));
+ return prepare_read_data(con);
+ }
+
+ reset_in_kvecs(con);
+ if (front_len(msg)) {
+ add_in_kvec(con, msg->front.iov_base, front_len(msg));
+ WARN_ON(msg->front.iov_len != front_len(msg));
+ }
+ if (middle_len(msg)) {
+ add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+ WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+ }
+
+ if (data_len(msg)) {
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+ return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+ struct bio_vec bv;
+
+ dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+ con->v2.in_enc_resid);
+ WARN_ON(!con->v2.in_enc_resid);
+
+ bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+ bv.bv_offset = 0;
+ bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+ set_in_bvec(con, &bv);
+ con->v2.in_enc_i++;
+ con->v2.in_enc_resid -= bv.bv_len;
+
+ if (con->v2.in_enc_resid) {
+ con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+ return;
+ }
+
+ /*
+ * We are set to read the last piece of ciphertext (ending
+ * with epilogue) + auth tag.
+ */
+ WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+ struct page **enc_pages;
+ int enc_page_cnt;
+ int tail_len;
+
+ tail_len = tail_onwire_len(con->in_msg, true);
+ WARN_ON(!tail_len);
+
+ enc_page_cnt = calc_pages_for(0, tail_len);
+ enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+ if (IS_ERR(enc_pages))
+ return PTR_ERR(enc_pages);
+
+ WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = enc_pages;
+ con->v2.in_enc_page_cnt = enc_page_cnt;
+ con->v2.in_enc_resid = tail_len;
+ con->v2.in_enc_i = 0;
+
+ prepare_read_enc_page(con);
+ return 0;
+}
+
static void __finish_skip(struct ceph_connection *con)
{
con->in_seq++;
@@ -2589,47 +2712,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
}
msg = con->in_msg; /* set in process_message_header() */
- if (!front_len(msg) && !middle_len(msg)) {
- if (!data_len(msg))
- return process_message(con);
-
- prepare_read_data(con);
- return 0;
- }
-
- reset_in_kvecs(con);
if (front_len(msg)) {
WARN_ON(front_len(msg) > msg->front_alloc_len);
- add_in_kvec(con, msg->front.iov_base, front_len(msg));
msg->front.iov_len = front_len(msg);
-
- if (con_secure(con) && need_padding(front_len(msg)))
- add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
- padding_len(front_len(msg)));
} else {
msg->front.iov_len = 0;
}
if (middle_len(msg)) {
WARN_ON(middle_len(msg) > msg->middle->alloc_len);
- add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
msg->middle->vec.iov_len = middle_len(msg);
-
- if (con_secure(con) && need_padding(middle_len(msg)))
- add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
- padding_len(middle_len(msg)));
} else if (msg->middle) {
msg->middle->vec.iov_len = 0;
}
- if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
- } else {
- add_in_kvec(con, con->v2.in_buf,
- con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
- CEPH_EPILOGUE_PLAIN_LEN);
- con->v2.in_state = IN_S_HANDLE_EPILOGUE;
- }
- return 0;
+ if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+ return process_message(con);
+
+ if (con_secure(con))
+ return prepare_read_tail_secure(con);
+
+ return prepare_read_tail_plain(con);
}
static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2819,7 @@ static int handle_epilogue(struct ceph_connection *con)
int ret;
if (con_secure(con)) {
- ret = decrypt_message(con);
+ ret = decrypt_tail(con);
if (ret) {
if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2887,16 @@ static int populate_in_iter(struct ceph_connection *con)
ret = handle_control_remainder(con);
break;
case IN_S_PREPARE_READ_DATA:
- prepare_read_data(con);
- ret = 0;
+ ret = prepare_read_data(con);
break;
case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0;
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ prepare_read_enc_page(con);
+ ret = 0;
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3326,20 +3431,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
static void revoke_at_prepare_read_data(struct ceph_connection *con)
{
- int remaining; /* data + [data padding] + epilogue */
+ int remaining;
int resid;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
- if (con_secure(con))
- remaining = padded_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
remaining);
con->v2.in_iter.count -= resid;
@@ -3350,8 +3451,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
{
int recved, resid; /* current piece of data */
- int remaining; /* [data padding] + epilogue */
+ int remaining;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3465,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
ceph_msg_data_advance(&con->v2.in_cursor, recved);
WARN_ON(resid > con->v2.in_cursor.total_resid);
- if (con_secure(con))
- remaining = padding_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
con->v2.in_cursor.total_resid, remaining);
con->v2.in_iter.count -= resid;
@@ -3376,11 +3473,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+ int resid; /* current enc page (not necessarily data) */
+
+ WARN_ON(!con_secure(con));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+ dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+ con->v2.in_enc_resid);
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + con->v2.in_enc_resid);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
- WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
@@ -3399,6 +3511,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_DATA_CONT:
revoke_at_prepare_read_data_cont(con);
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ revoke_at_prepare_read_enc_page(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
@@ -3432,6 +3547,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
clear_out_sign_kvecs(con);
free_conn_bufs(con);
+ if (con->v2.in_enc_pages) {
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
+ }
if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..d5dc6be2522c 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -283,13 +283,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_lock();
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ struct net_device *dev;
+
/*
* only add a note to our monitor buffer if:
* 1) this is the dev we received on
* 2) its after the last_rx delta
* 3) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
+ /* Paired with WRITE_ONCE() in dropmon_net_event() */
+ dev = READ_ONCE(new_stat->dev);
+ if ((dev == napi->dev) &&
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
@@ -1576,7 +1580,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
mutex_lock(&net_dm_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
- new_stat->dev = NULL;
+
+ /* Paired with READ_ONCE() in trace_napi_poll_hit() */
+ WRITE_ONCE(new_stat->dev, NULL);
+
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..9eb785842258 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2710,6 +2710,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..b7d2b0dc59a2 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -93,6 +93,31 @@ void dev_remove_offload(struct packet_offload *po)
EXPORT_SYMBOL(dev_remove_offload);
/**
+ * skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
* skb_mac_gso_segment - mac layer segmentation handler.
* @skb: buffer to segment
* @features: features for the output path (see dev->features)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6c2016f7f3d1..ec0bf737b076 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1133,7 +1133,8 @@ out:
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -1154,12 +1155,17 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
- next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
- HZ/100);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
@@ -1571,7 +1577,7 @@ static void neigh_managed_work(struct work_struct *work)
write_lock_bh(&tbl->lock);
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
- neigh_event_send(neigh, NULL);
+ neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
write_unlock_bh(&tbl->lock);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..88cc0ad7d386 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 53ea262ecafd..9cbc1c8289bc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
@@ -823,7 +823,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *old_map, *map;
cpumask_var_t mask;
- int err, cpu, i, hk_flags;
+ int err, cpu, i;
static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
@@ -839,8 +839,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
if (!cpumask_empty(mask)) {
- hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
- cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
if (cpumask_empty(mask)) {
free_cpumask_var(mask);
return -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e476403231f0..2fb8eb6791e8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1699,6 +1699,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct Qdisc *qdisc;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1716,6 +1717,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
+ qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1735,8 +1737,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (dev->qdisc &&
- nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
+ (qdisc &&
+ nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -3275,8 +3277,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
unsigned char name_assign_type = NET_NAME_USER;
struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
- const struct rtnl_link_ops *m_ops = NULL;
- struct net_device *master_dev = NULL;
+ const struct rtnl_link_ops *m_ops;
+ struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
struct nlattr *tb[IFLA_MAX + 1];
@@ -3314,6 +3316,8 @@ replay:
else
dev = NULL;
+ master_dev = NULL;
+ m_ops = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
if (master_dev)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0118f0afaa4f..ea51e23e9247 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -681,7 +681,7 @@ exit:
* while trying to recycle fragments on __skb_frag_unref() we need
* to make one SKB responsible for triggering the recycle path.
* So disable the recycling bit if an SKB is cloned and we have
- * additional references to to the fragmented part of the SKB.
+ * additional references to the fragmented part of the SKB.
* Eventually the last SKB will have the recycling bit set and it's
* dataref set to 0, which will trigger the recycling
*/
@@ -2276,7 +2276,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -3876,6 +3876,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
list_skb = list_skb->next;
err = 0;
+ delta_truesize += nskb->truesize;
if (skb_shared(nskb)) {
tmp = skb_clone(nskb, GFP_ATOMIC);
if (tmp) {
@@ -3900,7 +3901,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
tail = nskb;
delta_len += nskb->len;
- delta_truesize += nskb->truesize;
skb_push(nskb, -skb_network_offset(nskb) + offset);
@@ -4730,7 +4730,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
- serr->ee.ee_data -= sk->sk_tskey;
+ serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
}
err = sock_queue_err_skb(sk, skb);
@@ -6105,7 +6105,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8eb671c827f9..929a2b096b04 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1153,7 +1153,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
- int len = skb->len;
+ int len = orig_len;
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
skb = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ff806d71921..6eb174805bf0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -879,9 +879,9 @@ int sock_set_timestamping(struct sock *sk, int optname,
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- sk->sk_tskey = tcp_sk(sk)->snd_una;
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
- sk->sk_tskey = 0;
+ atomic_set(&sk->sk_tskey, 0);
}
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7aba35504986..73fae16264e1 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -357,7 +357,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
if (IS_ERR(xdp_alloc))
return PTR_ERR(xdp_alloc);
- trace_mem_connect(xdp_alloc, xdp_rxq);
+ if (trace_mem_connect_enabled() && xdp_alloc)
+ trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index b441ab330fd3..dc4fb699b56c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
}
EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+static void dcbnl_flush_dev(struct net_device *dev)
+{
+ struct dcb_app_type *itr, *tmp;
+
+ spin_lock_bh(&dcb_lock);
+
+ list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) {
+ if (itr->ifindex == dev->ifindex) {
+ list_del(&itr->list);
+ kfree(itr);
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+}
+
+static int dcbnl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (!dev->dcbnl_ops)
+ return NOTIFY_DONE;
+
+ dcbnl_flush_dev(dev);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block dcbnl_nb __read_mostly = {
+ .notifier_call = dcbnl_netdevice_event,
+};
+
static int __init dcbnl_init(void)
{
+ int err;
+
+ err = register_netdevice_notifier(&dcbnl_nb);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d9d0d227092c..c43f7446a75d 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -349,6 +349,7 @@ void dsa_flush_workqueue(void)
{
flush_workqueue(dsa_owq);
}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3d21521453fe..88e2808019b4 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1058,7 +1058,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
- int err;
+ int err = 0;
rtnl_lock();
@@ -1066,13 +1066,13 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
if (dsa_port_is_cpu(dp)) {
err = dsa_master_setup(dp->master, dp);
if (err)
- return err;
+ break;
}
}
rtnl_unlock();
- return 0;
+ return err;
}
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
@@ -1261,7 +1261,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
info.tag_ops = tag_ops;
err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
if (err)
- return err;
+ goto out_unwind_tagger;
err = dsa_tree_bind_tag_proto(dst, tag_ops);
if (err)
@@ -1436,6 +1436,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
const char *user_protocol;
master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
if (!master)
return -EPROBE_DEFER;
@@ -1718,7 +1719,6 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch);
void dsa_switch_shutdown(struct dsa_switch *ds)
{
struct net_device *master, *slave_dev;
- LIST_HEAD(unregister_list);
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
@@ -1729,25 +1729,13 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
slave_dev = dp->slave;
netdev_upper_dev_unlink(master, slave_dev);
- /* Just unlinking ourselves as uppers of the master is not
- * sufficient. When the master net device unregisters, that will
- * also call dev_close, which we will catch as NETDEV_GOING_DOWN
- * and trigger a dev_close on our own devices (dsa_slave_close).
- * In turn, that will call dev_mc_unsync on the master's net
- * device. If the master is also a DSA switch port, this will
- * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
- * its own master. Lockdep will complain about the fact that
- * all cascaded masters have the same dsa_master_addr_list_lock_key,
- * which it normally would not do if the cascaded masters would
- * be in a proper upper/lower relationship, which we've just
- * destroyed.
- * To suppress the lockdep warnings, let's actually unregister
- * the DSA slave interfaces too, to avoid the nonsensical
- * multicast address list synchronization on shutdown.
- */
- unregister_netdevice_queue(slave_dev, &unregister_list);
}
- unregister_netdevice_many(&unregister_list);
+
+ /* Disconnect from further netdevice notifiers on the master,
+ * since netdev_uses_dsa() will now return false.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds)
+ dp->master->dsa_ptr = NULL;
rtnl_unlock();
mutex_unlock(&dsa2_mutex);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 760306f0012f..23c79e91ac67 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -147,7 +147,6 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
-void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 2199104ca7df..880f910b23a9 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -260,11 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev,
dev->dsa_ptr->netdev_ops = ops;
}
+/* Keep the master always promiscuous if the tagging protocol requires that
+ * (garbles MAC DA) or if it doesn't support unicast filtering, case in which
+ * it would revert to promiscuous mode as soon as we call dev_uc_add() on it
+ * anyway.
+ */
static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
{
const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
- if (!ops->promisc_on_master)
+ if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master)
return;
ASSERT_RTNL();
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bd78192e0e47..1a40c52f5a42 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -395,10 +395,17 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
.tree_index = dp->ds->dst->index,
.sw_index = dp->ds->index,
.port = dp->index,
- .bridge = *dp->bridge,
};
int err;
+ /* If the port could not be offloaded to begin with, then
+ * there is nothing to do.
+ */
+ if (!dp->bridge)
+ return;
+
+ info.bridge = *dp->bridge;
+
/* Here the port is already unbridged. Reflect the current configuration
* so that drivers can program their chips accordingly.
*/
@@ -781,9 +788,15 @@ int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_uc_add(cpu_dp->master, addr);
- if (err)
- return err;
+ /* Avoid a call to __dev_set_promiscuity() on the master, which
+ * requires rtnl_lock(), since we can't guarantee that is held here,
+ * and we can't take it either.
+ */
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_add(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
}
@@ -800,9 +813,11 @@ int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_uc_del(cpu_dp->master, addr);
- if (err)
- return err;
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_del(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index cb548188f813..98d7d7120bab 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -77,7 +77,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
- __be16 *lan9303_tag;
u16 lan9303_tag1;
unsigned int source_port;
@@ -87,14 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- lan9303_tag = dsa_etype_header_pos_rx(skb);
-
- if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
- return NULL;
+ if (skb_vlan_tag_present(skb)) {
+ lan9303_tag1 = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &lan9303_tag1);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- lan9303_tag1 = ntohs(lan9303_tag[1]);
source_port = lan9303_tag1 & 0x3;
skb->dev = dsa_master_find_slave(dev, 0, source_port);
@@ -103,13 +103,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* remove the special VLAN tag between the MAC addresses
- * and the current ethertype field.
- */
- skb_pull_rcsum(skb, 2 + 2);
-
- dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
-
if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
dsa_default_offload_fwd_mark(skb);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 277124f206e0..e0b072aecf0f 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c465bac1eb0..72fde2888ad2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1376,8 +1376,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 851f542928a3..70e6c87fbe3d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -446,6 +446,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ unsigned int allocsz;
/* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) {
@@ -455,6 +456,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
+ allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+ if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -671,7 +676,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d87f02a6e934..935026f4c807 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -110,8 +110,7 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ return skb_eth_gso_segment(skb, features, htons(ETH_P_IP));
}
static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
@@ -160,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
}
+ if (proto == IPPROTO_IPV6)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..85117b45216d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -436,6 +436,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net))
goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..78e40ea42e58 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -16,10 +16,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
- u8 offload:1,
- trap:1,
- offload_failed:1,
- unused:5;
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b4589861b84c..2dd375f7407b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -525,9 +525,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.dst_len = dst_len;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..f7f74d5c14da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1047,19 +1047,23 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
- fa_match->offload_failed == fri->offload_failed)
+ /* These are paired with the WRITE_ONCE() happening in this function.
+ * The reason is that we are only protected by RCU at this point.
+ */
+ if (READ_ONCE(fa_match->offload) == fri->offload &&
+ READ_ONCE(fa_match->trap) == fri->trap &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload = fri->offload;
- fa_match->trap = fri->trap;
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
- fa_match->offload_failed == fri->offload_failed)
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload_failed = fri->offload_failed;
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -2297,9 +2301,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.dst_len = KEYLENGTH - fa->fa_slen;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57c1d8431386..7911916a480b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
- if (iter.frag)
- ip_options_fragment(iter.frag);
-
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
+ bool first_frag = (iter.offset == 0);
+
IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
+ if (first_frag && IPCB(skb)->opt.optlen) {
+ /* ipcb->opt is not populated for frags
+ * coming from __ip_make_skb(),
+ * ip_options_fragment() needs optlen
+ */
+ IPCB(iter.frag)->opt.optlen =
+ IPCB(skb)->opt.optlen;
+ ip_options_fragment(iter.frag);
+ ip_send_check(iter.iph);
+ }
}
skb->tstamp = tstamp;
@@ -975,7 +991,7 @@ static int __ip_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..29bbe2b08ae9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -256,7 +256,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 67087f95579f..aab384126f61 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,10 +58,6 @@ config NF_TABLES_ARP
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV4
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0e56df3a45e2..3ee947557b88 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9eb5fc247868..9f97b9cbf7b3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff6f91cdb6c4..f33ad1f383b6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3395,8 +3395,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_tos == fri.tos &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
- fri.offload = fa->offload;
- fri.trap = fa->trap;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b75836db19b..28ff2a820f7c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
+ sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -936,6 +937,22 @@ void tcp_remove_empty_skb(struct sock *sk)
}
}
+/* skb changing from pure zc to mixed, must charge zc */
+static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
+{
+ if (unlikely(skb_zcopy_pure(skb))) {
+ u32 extra = skb->truesize -
+ SKB_TRUESIZE(skb_end_offset(skb));
+
+ if (!sk_wmem_schedule(sk, extra))
+ return -ENOMEM;
+
+ sk_mem_charge(sk, extra);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+ return 0;
+}
+
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
@@ -971,7 +988,7 @@ new_segment:
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
return NULL;
if (can_coalesce) {
@@ -1319,16 +1336,8 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- /* skb changing from pure zc to mixed, must charge zc */
- if (unlikely(skb_zcopy_pure(skb))) {
- if (!sk_wmem_schedule(sk, skb->data_len))
- goto wait_for_space;
-
- sk_mem_charge(sk, skb->data_len);
- skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
- }
-
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) ||
+ !sk_wmem_schedule(sk, copy))
goto wait_for_space;
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
@@ -1675,11 +1684,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!copied)
copied = used;
break;
- } else if (used <= len) {
- seq += used;
- copied += used;
- offset += used;
}
+ if (WARN_ON_ONCE(used > len))
+ used = len;
+ seq += used;
+ copied += used;
+ offset += used;
+
/* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc49a3d551eb..bfe4112e000c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
(mss != tcp_skb_seglen(skb)))
goto out;
+ if (!tcp_skb_can_collapse(prev, skb))
+ goto out;
len = skb->len;
pcount = tcp_skb_pcount(skb);
if (tcp_skb_shift(prev, skb, pcount, len))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b53476e78c84..fec656f5a39e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2095,7 +2095,7 @@ process:
nf_reset_ct(skb);
if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_TCP_FILTER;
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b91003538d87..bc3a043a5d5c 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
- if (node->dev != dev)
+ if (list_entry_is_head(node, &info->shared->devices, list))
return;
list_del(&node->list);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3eee17790a82..f908e2fd30b2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1839,8 +1839,8 @@ out:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags)
+static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags)
{
struct inet6_ifaddr *ifp;
int err = -EADDRNOTAVAIL;
@@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
__u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
- int create = 0;
+ int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
@@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
unsigned long now;
u32 stored_lft;
- /* Update lifetime (RFC4862 5.5.3 e)
- * We deviate from RFC4862 by honoring all Valid Lifetimes to
- * improve the reaction of SLAAC to renumbering events
- * (draft-gont-6man-slaac-renum-06, Section 4.2)
- */
+ /* update lifetime (RFC2462 5.5.3 e) */
spin_lock_bh(&ifp->lock);
now = jiffies;
if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
-
if (!create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = now;
@@ -3719,6 +3732,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
bool keep_addr = false;
+ bool was_ready;
int state, i;
ASSERT_RTNL();
@@ -3784,7 +3798,10 @@ restart:
addrconf_del_rs_timer(idev);
- /* Step 2: clear flags for stateless addrconf */
+ /* Step 2: clear flags for stateless addrconf, repeated down
+ * detection
+ */
+ was_ready = idev->if_flags & IF_READY;
if (!unregister)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3858,7 +3875,7 @@ restart:
if (unregister) {
ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- } else {
+ } else if (was_ready) {
ipv6_mc_down(idev);
}
@@ -4985,6 +5002,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
+ spin_lock_bh(&ifa->lock);
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -5006,6 +5024,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
+ spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bb2c407b46b..55d604c9b3b3 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -482,6 +482,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ unsigned int allocsz;
if (x->encap) {
int err = esp6_output_encap(x, skb, esp);
@@ -490,6 +491,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
+ allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+ if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -707,7 +712,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -807,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
struct tcphdr *th;
offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
-
- if (offset < 0) {
+ if (offset == -1) {
err = -EINVAL;
goto out;
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ba5e81cd569c..3a293838a91d 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,8 +145,7 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ return skb_eth_gso_segment(skb, features, htons(ETH_P_IPV6));
}
static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
@@ -199,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
ipv6_skip_exthdr(skb, 0, &proto, &frag);
}
+ if (proto == IPPROTO_IPIP)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 463c37dea449..413f66781e50 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
fn = rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
- fn->fn_sernum = fib6_new_sernum(net);
+ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}
/*
@@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = READ_ONCE(w->root->fn_sernum);
}
} else {
- if (cb->args[5] != w->root->fn_sernum) {
+ int sernum = READ_ONCE(w->root->fn_sernum);
+ if (cb->args[5] != sernum) {
/* Begin at the root if the tree changed */
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = sernum;
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
@@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
/* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
- fn->fn_sernum = sernum;
+ WRITE_ONCE(fn->fn_sernum, sernum);
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
@@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w)
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
- w->node->fn_sernum != c->sernum)
- w->node->fn_sernum = c->sernum;
+ READ_ONCE(w->node->fn_sernum) != c->sernum)
+ WRITE_ONCE(w->node->fn_sernum, c->sernum);
if (!c->func) {
WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
iter->w.args = iter;
- iter->sernum = iter->w.root->fn_sernum;
+ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(net, &iter->w);
}
@@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
- if (iter->sernum != iter->w.root->fn_sernum) {
- iter->sernum = iter->w.root->fn_sernum;
+ int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+ if (iter->sernum != sernum) {
+ iter->sernum = sernum;
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
WARN_ON(iter->w.skip);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index aa673a6a7e43..ceb85c67ce39 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
- if (fl_shared_exclusive(fl) || fl->opt)
+ if (fl_shared_exclusive(fl) || fl->opt) {
+ WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+ }
return fl;
done:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b29e9ba5e113..5f577e21459b 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
}
if (IS_ERR_OR_NULL(segs))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2995f8d89e7e..194832663d85 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1408,8 +1408,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (np->frag_size)
mtu = np->frag_size;
}
- if (mtu < IPV6_MIN_MTU)
- return -EINVAL;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
@@ -1465,14 +1463,12 @@ static int __ip6_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
- sizeof(struct frag_hdr);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
@@ -1480,6 +1476,13 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+ if (mtu <= fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
+ goto emsgsize;
+
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
+
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
*/
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fe786df4f849..97ade833f58c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7cf73e60e619..8a2db926b5eb 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -243,7 +243,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ip6mr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bed8155508c8..909f937befd7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
}
/* called with rcu_read_lock() */
-int igmp6_event_query(struct sk_buff *skb)
+void igmp6_event_query(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_query_lock);
if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_query_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_query_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_query_work(struct sk_buff *skb)
@@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work)
}
/* called with rcu_read_lock() */
-int igmp6_event_report(struct sk_buff *skb)
+void igmp6_event_report(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_report_lock);
if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_report_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_report_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_report_work(struct sk_buff *skb)
@@ -1759,7 +1751,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
- if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 97d3d1b36dbc..0ba62f4868f9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -47,10 +47,6 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV6
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b85383606df7..b8d6dc9aeeb6 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
-# flow table support
-obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
-
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ /dev/null
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e6de94203c13..ea1cf414a92e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
+ WRITE_ONCE(fn->fn_sernum, -1);
}
}
rcu_read_unlock();
@@ -5753,11 +5753,11 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (!dst) {
- if (rt->offload)
+ if (READ_ONCE(rt->offload))
rtm->rtm_flags |= RTM_F_OFFLOAD;
- if (rt->trap)
+ if (READ_ONCE(rt->trap))
rtm->rtm_flags |= RTM_F_TRAP;
- if (rt->offload_failed)
+ if (READ_ONCE(rt->offload_failed))
rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
@@ -6215,19 +6215,20 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
struct sk_buff *skb;
int err;
- if (f6i->offload == offload && f6i->trap == trap &&
- f6i->offload_failed == offload_failed)
+ if (READ_ONCE(f6i->offload) == offload &&
+ READ_ONCE(f6i->trap) == trap &&
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload = offload;
- f6i->trap = trap;
+ WRITE_ONCE(f6i->offload, offload);
+ WRITE_ONCE(f6i->trap, trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
- f6i->offload_failed == offload_failed)
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload_failed = offload_failed;
+ WRITE_ONCE(f6i->offload_failed, offload_failed);
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d0d280077721..ad07904642ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
return xfrm_output(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index de24a7d474df..fd51db3be91c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
xfrm_probe_algs();
- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
@@ -2623,7 +2623,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
}
return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
- kma ? &k : NULL, net, NULL);
+ kma ? &k : NULL, net, NULL, 0);
out:
return err;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 74a878f213d3..1deb3d874a4b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -626,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
}
+ if (test_sta_flag(sta, WLAN_STA_MFP) &&
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
+ ht_dbg(sdata,
+ "MFP STA not authorized - deny BA session request %pM tid %d\n",
+ sta->sta.addr, tid);
+ return -EINVAL;
+ }
+
/*
* 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a
* member of an IBSS, and has no other existing Block Ack agreement
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 330ea62231fa..e87bccaab561 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -376,7 +376,7 @@ struct ieee80211_mgd_auth_data {
u8 key[WLAN_KEY_LEN_WEP104];
u8 key_len, key_idx;
- bool done;
+ bool done, waiting;
bool peer_confirmed;
bool timeout_started;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1eeabdf10052..744842c4513b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -37,6 +37,7 @@
#define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2)
#define IEEE80211_AUTH_MAX_TRIES 3
#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
+#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2)
#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
@@ -666,7 +667,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_he_6ghz_cap(sdata, skb);
}
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -686,6 +687,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
const struct ieee80211_sband_iftype_data *iftd;
struct ieee80211_prep_tx_info info = {};
+ int ret;
/* we know it's writable, cast away the const */
if (assoc_data->ie_len)
@@ -699,7 +701,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
- return;
+ return -EINVAL;
}
chan = chanctx_conf->def.chan;
rcu_read_unlock();
@@ -750,7 +752,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
(iftd ? iftd->vendor_elems.len : 0),
GFP_KERNEL);
if (!skb)
- return;
+ return -ENOMEM;
skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -1031,15 +1033,22 @@ skip_rates:
skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
}
- if (assoc_data->fils_kek_len &&
- fils_encrypt_assoc_req(skb, assoc_data) < 0) {
- dev_kfree_skb(skb);
- return;
+ if (assoc_data->fils_kek_len) {
+ ret = fils_encrypt_assoc_req(skb, assoc_data);
+ if (ret < 0) {
+ dev_kfree_skb(skb);
+ return ret;
+ }
}
pos = skb_tail_pointer(skb);
kfree(ifmgd->assoc_req_ies);
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+ if (!ifmgd->assoc_req_ies) {
+ dev_kfree_skb(skb);
+ return -ENOMEM;
+ }
+
ifmgd->assoc_req_ies_len = pos - ie_start;
drv_mgd_prepare_tx(local, sdata, &info);
@@ -1049,6 +1058,8 @@ skip_rates:
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_tx_skb(sdata, skb);
+
+ return 0;
}
void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -3001,8 +3012,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED ||
(auth_transaction == 1 &&
(status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
- status_code == WLAN_STATUS_SAE_PK))))
+ status_code == WLAN_STATUS_SAE_PK)))) {
+ /* waiting for userspace now */
+ ifmgd->auth_data->waiting = true;
+ ifmgd->auth_data->timeout =
+ jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
+ ifmgd->auth_data->timeout_started = true;
+ run_again(sdata, ifmgd->auth_data->timeout);
goto notify_driver;
+ }
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
@@ -4497,6 +4515,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
struct ieee80211_local *local = sdata->local;
+ int ret;
sdata_assert_lock(sdata);
@@ -4517,7 +4536,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "associate with %pM (try %d/%d)\n",
assoc_data->bss->bssid, assoc_data->tries,
IEEE80211_ASSOC_MAX_TRIES);
- ieee80211_send_assoc(sdata);
+ ret = ieee80211_send_assoc(sdata);
+ if (ret)
+ return ret;
if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
@@ -4590,10 +4611,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->auth_data && ifmgd->auth_data->timeout_started &&
time_after(jiffies, ifmgd->auth_data->timeout)) {
- if (ifmgd->auth_data->done) {
+ if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) {
/*
- * ok ... we waited for assoc but userspace didn't,
- * so let's just kill the auth data
+ * ok ... we waited for assoc or continuation but
+ * userspace didn't do it, so kill the auth data
*/
ieee80211_destroy_auth_data(sdata, false);
} else if (ieee80211_auth(sdata)) {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 93680af62c47..48d9553dafe3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2607,7 +2607,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
* address, so that the authenticator (e.g. hostapd) will see
* the frame, but bridge won't forward it anywhere else. Note
* that due to earlier filtering, the only other address can
- * be the PAE group address.
+ * be the PAE group address, unless the hardware allowed them
+ * through in 802.3 offloaded mode.
*/
if (unlikely(skb->protocol == sdata->control_port_protocol &&
!ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
@@ -2922,13 +2923,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee802_1d_to_ac[skb->priority];
q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
}
- skb_set_queue_mapping(skb, q);
+ skb_set_queue_mapping(skb, ac);
if (!--mesh_hdr->ttl) {
if (!is_multicast_ether_addr(hdr->addr1))
@@ -4514,12 +4515,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, fast_rx->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->list)
- list_add_tail(&skb->list, rx->list);
- else
- netif_receive_skb(skb);
-
+ ieee80211_deliver_skb_to_local_stack(skb, rx);
}
static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..e52cef750500 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -412,13 +412,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc)
+ if (rc) {
kfree(key);
+ } else {
+ trace_mctp_key_acquire(key);
- trace_mctp_key_acquire(key);
-
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
+ /* we don't need to release key->lock on exit */
+ mctp_key_unref(key);
+ }
key = NULL;
} else {
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 48f75a56f4ae..d6fdc5782d33 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1607,6 +1607,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mpls_dev *mdev;
unsigned int flags;
+ int err;
if (event == NETDEV_REGISTER) {
mdev = mpls_add_dev(dev);
@@ -1621,7 +1622,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
switch (event) {
- int err;
case NETDEV_DOWN:
err = mpls_ifdown(dev, event);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3240b72271a7..7558802a1435 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -35,12 +35,14 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
+ SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
+ SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index ecd3d8b117e0..2966fcb6548b 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -28,12 +28,14 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
+ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
+ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 696b2c4613a7..7bea318ac5f2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -213,6 +213,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->remote = *addr;
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
spin_unlock_bh(&pm->lock);
@@ -253,8 +255,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
mptcp_event_addr_removed(msk, rm_list->ids[i]);
spin_lock_bh(&pm->lock);
- mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
- pm->rm_list_rx = *rm_list;
+ if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
+ pm->rm_list_rx = *rm_list;
+ else
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
spin_unlock_bh(&pm->lock);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 75af1f701e1d..4b5d795383cd 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+static struct mptcp_pm_addr_entry *
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
+ bool lookup_by_id)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
+ (lookup_by_id && entry->addr.id == info->id))
+ return entry;
+ }
+ return NULL;
+}
+
static int
lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
{
@@ -532,6 +546,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, msk);
+ /* due to racing events on both ends we can reach here while
+ * previous add address is still running: if we invoke now
+ * mptcp_pm_announce_addr(), that will fail and the
+ * corresponding id will be marked as used.
+ * Instead let the PM machinery reschedule us when the
+ * current address announce will be completed.
+ */
+ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
+ return;
+
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
@@ -636,6 +660,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
unsigned int subflows_max;
+ bool reset_port = false;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
@@ -645,15 +670,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
- if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
+ remote = msk->pm.remote;
+ if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
goto add_addr_echo;
+ /* pick id 0 port, if none is provided the remote address */
+ if (!remote.port) {
+ reset_port = true;
+ remote.port = sk->sk_dport;
+ }
+
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- remote = msk->pm.remote;
- if (!remote.port)
- remote.port = sk->sk_dport;
nr = fill_local_addresses_vec(msk, addrs);
msk->pm.add_addr_accepted++;
@@ -666,8 +695,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
__mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
+ /* be sure to echo exactly the received address */
+ if (reset_port)
+ remote.port = 0;
+
add_addr_echo:
- mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
+ mptcp_pm_announce_addr(msk, &remote, true);
mptcp_pm_nl_addr_send_ack(msk);
}
@@ -777,7 +810,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
@@ -911,6 +944,7 @@ out:
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
struct mptcp_pm_addr_entry *entry)
{
+ int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct mptcp_sock *msk;
struct socket *ssock;
@@ -935,8 +969,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
}
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
- err = kernel_bind(ssock, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (entry->addr.family == AF_INET6)
+ addrlen = sizeof(struct sockaddr_in6);
+#endif
+ err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
if (err) {
pr_warn("kernel_bind error, err=%d", err);
goto out;
@@ -1763,18 +1800,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
- (lookup_by_id && entry->addr.id == addr.addr.id)) {
- mptcp_nl_addr_backup(net, &entry->addr, bkup);
-
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
- }
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
}
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_addr_backup(net, &addr.addr, bkup);
return 0;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f60f01b14fac..1c72f25f083e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -466,9 +466,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
static void mptcp_set_datafin_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 retransmits;
- mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
- TCP_RTO_MIN << icsk->icsk_retransmits);
+ retransmits = min_t(u32, icsk->icsk_retransmits,
+ ilog2(TCP_RTO_MAX / TCP_RTO_MIN));
+
+ mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits;
}
static void __mptcp_set_timeout(struct sock *sk, long tout)
@@ -3294,6 +3297,17 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
return 0;
delta = msk->write_seq - v;
+ if (__mptcp_check_fallback(msk) && msk->first) {
+ struct tcp_sock *tp = tcp_sk(msk->first);
+
+ /* the first subflow is disconnected after close - see
+ * __mptcp_close_ssk(). tcp_disconnect() moves the write_seq
+ * so ignore that status, too.
+ */
+ if (!((1 << msk->first->sk_state) &
+ (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)))
+ delta += READ_ONCE(tp->write_seq) - tp->snd_una;
+ }
if (delta > INT_MAX)
delta = INT_MAX;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0e6b42c76ea0..85317ce38e3f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
- char reset_start[0];
+ struct_group(reset,
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
@@ -458,7 +458,7 @@ struct mptcp_subflow_context {
long delegated_status;
- char reset_end[0];
+ );
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
@@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
static inline void
mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
- memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 354cb472f386..8a77a3fd69bc 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -428,14 +428,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
p = nf_entry_dereference(*pp);
new_hooks = nf_hook_entries_grow(p, reg);
- if (!IS_ERR(new_hooks))
+ if (!IS_ERR(new_hooks)) {
+ hooks_validate(new_hooks);
rcu_assign_pointer(*pp, new_hooks);
+ }
mutex_unlock(&nf_hook_mutex);
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
- hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 894a325d39f2..bf1e17c678f1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1748,9 +1748,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
return 0;
if (IS_ERR(h))
return PTR_ERR(h);
-
- ct = nf_ct_tuplehash_to_ctrack(h);
- ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1924,15 +1921,17 @@ repeat:
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
- NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
+
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+ if (ret == -NF_DROP)
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
ret = -ret;
goto out;
}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 7f19ee259609..55415f011943 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -20,13 +20,14 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#define HELPER_NAME "netbios-ns"
#define NMBD_PORT 137
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_netbios_ns");
-MODULE_ALIAS_NFCT_HELPER("netbios_ns");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
@@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
}
static struct nf_conntrack_helper helper __read_mostly = {
- .name = "netbios-ns",
+ .name = HELPER_NAME,
.tuple.src.l3num = NFPROTO_IPV4,
.tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ac438370f94a..7032402ffd33 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2311,7 +2311,8 @@ ctnetlink_create_conntrack(struct net *net,
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- /* not in hash table yet so not strictly necessary */
+ /* disable helper auto-assignment for this entry */
+ ct->status |= IPS_HELPER;
RCU_INIT_POINTER(help->helper, helper);
}
} else {
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 2394238d01c9..5a936334b517 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+ /* don't renew timeout on init retransmit so
+ * port reuse by client or NAT middlebox cannot
+ * keep entry alive indefinitely (incl. nat info).
+ */
+ if (new_state == SCTP_CONNTRACK_CLOSED &&
+ old_state == SCTP_CONNTRACK_CLOSED &&
+ nf_ct_is_confirmed(ct))
+ ignore = true;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af5115e127cf..d1582b888c0d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,6 +446,32 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
+static void tcp_init_sender(struct ip_ct_tcp_state *sender,
+ struct ip_ct_tcp_state *receiver,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *tcph,
+ u32 end, u32 win)
+{
+ /* SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, dataoff, tcph, sender);
+ /* RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
+ sender->td_scale = 0;
+ receiver->td_scale = 0;
+ }
+}
+
static bool tcp_in_window(struct nf_conn *ct,
enum ip_conntrack_dir dir,
unsigned int index,
@@ -499,24 +525,9 @@ static bool tcp_in_window(struct nf_conn *ct,
* Initialize sender data.
*/
if (tcph->syn) {
- /*
- * SYN-ACK in reply to a SYN
- * or SYN from reply direction in simultaneous open.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
if (!tcph->ack)
/* Simultaneous open */
return true;
@@ -560,6 +571,18 @@ static bool tcp_in_window(struct nf_conn *ct,
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(skb, dataoff, tcph, sender);
+ } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
+ state->state == TCP_CONNTRACK_SYN_SENT) {
+ /* Retransmitted syn-ack, or syn (simultaneous open).
+ *
+ * Re-init state for this direction, just like for the first
+ * syn(-ack) reply, it might differ in seq, ack or tcp options.
+ */
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
+ if (!tcph->ack)
+ return true;
}
if (!(tcph->ack)) {
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b561e0a44a45..fc4265acd9c4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -110,7 +110,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
nf_flow_rule_lwt_match(match, tun_info);
}
- key->meta.ingress_ifindex = tuple->iifidx;
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
+ key->meta.ingress_ifindex = tuple->tc.iifidx;
+ else
+ key->meta.ingress_ifindex = tuple->iifidx;
+
mask->meta.ingress_ifindex = 0xffffffff;
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2d06a66899b2..ffcf6529afc3 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -494,38 +494,6 @@ another_round:
goto another_round;
}
-static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
-{
- u16 sp, dp;
-
- switch (tuple->dst.protonum) {
- case IPPROTO_TCP:
- sp = ntohs(tuple->src.u.tcp.port);
- dp = ntohs(tuple->dst.u.tcp.port);
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- sp = ntohs(tuple->src.u.udp.port);
- dp = ntohs(tuple->dst.u.udp.port);
- break;
- default:
- return false;
- }
-
- /* IANA: System port range: 1-1023,
- * user port range: 1024-49151,
- * private port range: 49152-65535.
- *
- * Linux default ephemeral port range is 32768-60999.
- *
- * Enforce port remapping if sport is significantly lower
- * than dport to prevent NAT port shadowing, i.e.
- * accidental match of 'new' inbound connection vs.
- * existing outbound one.
- */
- return sp < 16384 && dp >= 32768;
-}
-
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
- bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
- if (maniptype == NF_NAT_MANIP_SRC &&
- !random_port &&
- !ct->local_origin)
- random_port = tuple_force_port_remap(orig_tuple);
-
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
@@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* So far, we don't do local source mappings, so multiple
* manips not an issue.
*/
- if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
+ if (maniptype == NF_NAT_MANIP_SRC &&
+ !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
- if (!random_port) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 6d12afabfe8a..63d1516816b1 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ sock_gen_put(sk);
+#else
+ sock_put(sk);
+#endif
+}
+
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
@@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->in);
dev_put(state->out);
if (state->sk)
- sock_put(state->sk);
+ nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_put(entry->physin);
@@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
dev_hold(state->in);
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
break;
}
+ if (skb_sk_is_prefetched(skb)) {
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk)) {
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ return -ENOTCONN;
+
+ /* drop refcount on skb_orphan */
+ skb->destructor = sock_edemux;
+ }
+ }
+
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry)
return -ENOMEM;
@@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 77938b1042f3..d71a33ae39b3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
prule = (struct nft_rule_dp *)ptr;
prule->is_last = 1;
- ptr += offsetof(struct nft_rule_dp, data);
/* blob size does not include the trailer rule */
}
@@ -4503,7 +4502,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
nft_set_elem_destroy(set, catchall->elem, true);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
}
}
@@ -5670,7 +5669,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
if (catchall->elem == elem->priv) {
list_del_rcu(&catchall->list);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
break;
}
}
@@ -6552,12 +6551,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
{
struct nft_object *newobj;
struct nft_trans *trans;
- int err;
+ int err = -ENOMEM;
+
+ if (!try_module_get(type->owner))
+ return -ENOENT;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
- return -ENOMEM;
+ goto err_trans;
newobj = nft_obj_init(ctx, type, attr);
if (IS_ERR(newobj)) {
@@ -6574,6 +6576,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
err_free_trans:
kfree(trans);
+err_trans:
+ module_put(type->owner);
return err;
}
@@ -8186,7 +8190,7 @@ static void nft_obj_commit_update(struct nft_trans *trans)
if (obj->ops->update)
obj->ops->update(obj, newobj);
- kfree(newobj);
+ nft_obj_destroy(&trans->ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
@@ -8256,6 +8260,12 @@ void nf_tables_trans_destroy_flush_work(void)
}
EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
+static bool nft_expr_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ return false;
+}
+
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
{
const struct nft_expr *expr, *last;
@@ -8264,14 +8274,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
void *data, *data_boundary;
struct nft_rule_dp *prule;
struct nft_rule *rule;
- int i;
/* already handled or inactive chain? */
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
- i = 0;
data_size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
@@ -8301,12 +8309,11 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
return -ENOMEM;
size = 0;
- track.last = last;
+ track.last = nft_expr_last(rule);
nft_rule_for_each_expr(expr, last, rule) {
track.cur = expr;
- if (expr->ops->reduce &&
- expr->ops->reduce(&track, expr)) {
+ if (nft_expr_reduce(&track, expr)) {
expr = track.cur;
continue;
}
@@ -8979,7 +8986,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- kfree(nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -9639,10 +9646,13 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
static void __nft_release_hook(struct net *net, struct nft_table *table)
{
+ struct nft_flowtable *flowtable;
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
}
static void __nft_release_hooks(struct net *net)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9656c1646222..2d36952b1392 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
- if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+ if (expr->ops->offload_action &&
+ expr->ops->offload_action(expr))
num_actions++;
expr = nft_expr_next(expr);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea2d9c2a44cf..64a6acb6aeae 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -710,9 +710,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9d5947ab8d4e..e646e9ee4a98 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -167,12 +167,24 @@ nla_put_failure:
return -1;
}
+static bool nft_byteorder_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ struct nft_byteorder *priv = nft_expr_priv(expr);
+
+ track->regs[priv->dreg].selector = NULL;
+ track->regs[priv->dreg].bitwise = NULL;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_byteorder_ops = {
.type = &nft_byteorder_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
.eval = nft_byteorder_eval,
.init = nft_byteorder_init,
.dump = nft_byteorder_dump,
+ .reduce = nft_byteorder_reduce,
};
struct nft_expr_type nft_byteorder_type __read_mostly = {
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 7d00a1452b1d..3362417ebfdb 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
{
bool invert = false;
u32 flags, limit;
+ int err;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
@@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
priv->limit = limit;
priv->invert = invert;
- return nf_ct_netns_get(ctx->net, ctx->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
+ if (err < 0)
+ goto err_netns;
+
+ return 0;
+err_netns:
+ kfree(priv->list);
+
+ return err;
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 518d96c8c247..5adf8bb628a8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
if (likely(refcount_read(&ct->ct_general.use) == 1)) {
+ refcount_inc(&ct->ct_general.use);
nf_ct_zone_add(ct, &zone);
} else {
- /* previous skb got queued to userspace */
+ /* previous skb got queued to userspace, allocate temporary
+ * one until percpu template can be reused.
+ */
ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
if (!ct) {
regs->verdict.code = NF_DROP;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index bbf3fcba3df4..5b5c607fbf83 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
}
+static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
static struct nft_expr_type nft_dup_netdev_type;
static const struct nft_expr_ops nft_dup_netdev_ops = {
.type = &nft_dup_netdev_type,
@@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
.init = nft_dup_netdev_init,
.dump = nft_dup_netdev_dump,
.offload = nft_dup_netdev_offload,
+ .offload_action = nft_dup_netdev_offload_action,
};
static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index dbe1f2e7dd9e..9e927ab4df15 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
return NULL;
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index fa9301ca6033..619e394a91de 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
}
+static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
struct nft_fwd_neigh {
u8 sreg_dev;
u8 sreg_addr;
@@ -222,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.dump = nft_fwd_netdev_dump,
.validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
+ .offload_action = nft_fwd_netdev_offload_action,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 90c64d27ae53..d0f67d325bdf 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_immediate_offload_action(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ return true;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = {
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
.offload = nft_immediate_offload,
- .offload_flags = NFT_OFFLOAD_F_ACTION,
+ .offload_action = nft_immediate_offload_action,
};
struct nft_expr_type nft_imm_type __read_mostly = {
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c4f308460dd1..a726b623963d 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -340,11 +340,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
+static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, &priv->limit);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_pkts_ops = {
.type = &nft_limit_obj_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.init = nft_limit_obj_pkts_init,
+ .destroy = nft_limit_obj_pkts_destroy,
.eval = nft_limit_obj_pkts_eval,
.dump = nft_limit_obj_pkts_dump,
};
@@ -378,11 +387,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
+static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, priv);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_bytes_ops = {
.type = &nft_limit_obj_type,
.size = sizeof(struct nft_limit_priv),
.init = nft_limit_obj_bytes_init,
+ .destroy = nft_limit_obj_bytes_destroy,
.eval = nft_limit_obj_bytes_eval,
.dump = nft_limit_obj_bytes_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 940fed9a760b..5cc06aef4345 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
{
unsigned int thoff = nft_thoff(pkt);
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
return -1;
switch (pkt->tprot) {
@@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -688,7 +688,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -728,7 +728,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
pkt->tprot == IPPROTO_SCTP &&
skb->ip_summed != CHECKSUM_PARTIAL) {
- if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+ if (pkt->fragoff == 0 &&
+ nft_payload_csum_sctp(skb, nft_thoff(pkt)))
goto err;
}
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index a0109fa1e92d..1133e06f3c40 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
if (err)
goto nf_ct_failure;
err = nf_synproxy_ipv6_init(snet, ctx->net);
- if (err)
+ if (err) {
+ nf_synproxy_ipv4_fini(snet, ctx->net);
goto nf_ct_failure;
+ }
break;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5e6459e11605..7013f55f05d1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par)
{
if (par->family == NFPROTO_IPV4)
nf_defrag_ipv4_disable(par->net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- nf_defrag_ipv4_disable(par->net);
+ nf_defrag_ipv6_disable(par->net);
+#endif
}
static struct xt_match socket_mt_reg[] __read_mostly = {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..780d9e2246f3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
{
+ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+ (__force __wsum)(ipv6_tclass << 12));
+
+ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+ u32 ofl;
+
+ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
+ fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ nh->flow_lbl[0] = (u8)(fl >> 16);
+ nh->flow_lbl[1] = (u8)(fl >> 8);
+ nh->flow_lbl[2] = (u8)fl;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+ (__force __wsum)(new_ttl << 8));
+ nh->hop_limit = new_ttl;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +577,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
+ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bd409ab4cc2..a7273af2d900 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
@@ -1788,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -2313,8 +2317,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -3353,6 +3360,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3457,6 +3465,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3479,6 +3489,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
@@ -3932,7 +3946,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6be2672a65ea..df864e692267 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at, rto_j;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
ktime_t now, max_age, oldest, ack_ts;
int ix;
@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- rto_j = call->peer->rto_j;
-
now = ktime_get_real();
- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
+ max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rto_j;
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 10f2bf2e9068..a45c83f22236 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -468,7 +468,7 @@ done:
if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 32563cef85bf..ca03e7284254 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -274,7 +274,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
err = tc_setup_action(&fl_action->action, actions);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "Failed to setup tc actions for offload\n");
+ "Failed to setup tc actions for offload");
goto fl_err;
}
@@ -1037,6 +1037,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ int repeat_ttl;
if (jmp_prgcnt > 0) {
jmp_prgcnt -= 1;
@@ -1045,11 +1046,17 @@ restart_act_graph:
if (tc_act_skip_sw(a->tcfa_flags))
continue;
+
+ repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
-
+ if (unlikely(ret == TC_ACT_REPEAT)) {
+ if (--repeat_ttl != 0)
+ goto repeat;
+ /* suspicious opcode, stop pipeline */
+ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+ return TC_ACT_OK;
+ }
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..ec19f625863a 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -361,6 +361,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
}
}
+static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+ struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
+{
+ entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
+ entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
+}
+
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
@@ -385,10 +392,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
- entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
- entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
err = flow_offload_add(&ct_ft->nf_ft, entry);
@@ -533,11 +538,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct nf_conn *ct;
u8 dir;
- /* Previously seen or loopback */
- ct = nf_ct_get(skb, &ctinfo);
- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
- return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..5ce1208a6ea3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1044,7 +1044,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
/* Find qdisc */
if (!*parent) {
- *q = dev->qdisc;
+ *q = rcu_dereference(dev->qdisc);
*parent = (*q)->handle;
} else {
*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
@@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool prio_allocate;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
+ struct Qdisc *q;
struct tcf_chain_info chain_info;
- struct tcf_chain *chain = NULL;
+ struct tcf_chain *chain;
struct tcf_block *block;
struct tcf_proto *tp;
unsigned long cl;
@@ -1976,6 +1976,8 @@ replay:
tp = NULL;
cl = 0;
block = NULL;
+ q = NULL;
+ chain = NULL;
flags = 0;
if (prio == 0) {
@@ -2585,7 +2587,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
parent = tcm->tcm_parent;
if (!parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
@@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
struct tcmsg *t;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
- struct tcf_chain *chain = NULL;
+ struct Qdisc *q;
+ struct tcf_chain *chain;
struct tcf_block *block;
unsigned long cl;
int err;
@@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
return -EPERM;
replay:
+ q = NULL;
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2959,7 +2962,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
if (!tcm->tcm_parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2cb496c84878..e3c0e8ea2dbb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -1082,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
skip:
if (!ingress) {
notify_and_destroy(net, skb, n, classid,
- dev->qdisc, new);
+ rtnl_dereference(dev->qdisc), new);
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
- dev->qdisc = new ? : &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
if (new && new->ops->attach)
new->ops->attach(new);
@@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = -ENOENT;
if (!ops) {
- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
+ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1451,7 +1451,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
if (!q) {
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
@@ -1540,7 +1540,7 @@ replay:
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
/* It may be default qdisc, ignore it */
@@ -1762,7 +1762,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
+ skb, cb, &q_idx, s_q_idx,
true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -2033,7 +2034,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
} else if (qid1) {
qid = qid1;
} else if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
/* Now qid is genuine qdisc handle consistent
* both with parent and child.
@@ -2044,7 +2045,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
portid = TC_H_MAKE(qid, portid);
} else {
if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
}
/* OK. Locate qdisc */
@@ -2205,7 +2206,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
+ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
+ skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f893d9a81b01..5bab9f8b8f45 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1164,30 +1164,33 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
- dev->qdisc = qdisc;
+ rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc);
}
}
+ qdisc = rtnl_dereference(dev->qdisc);
/* Detect default qdisc setup/init failed and fallback to "noqueue" */
- if (dev->qdisc == &noop_qdisc) {
+ if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
}
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc, false);
+ if (qdisc != &noop_qdisc)
+ qdisc_hash_add(qdisc, false);
#endif
}
@@ -1217,7 +1220,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc)
+ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
@@ -1383,7 +1386,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
void dev_qdisc_change_real_num_tx(struct net_device *dev,
unsigned int new_real_tx)
{
- struct Qdisc *qdisc = dev->qdisc;
+ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
if (qdisc->ops->change_real_num_tx)
qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
@@ -1447,7 +1450,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev)
{
- dev->qdisc = &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
@@ -1475,8 +1478,8 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_put(dev->qdisc);
- dev->qdisc = &noop_qdisc;
+ qdisc_put(rtnl_dereference(dev->qdisc));
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer));
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9267922ea9c3..23a9d6242429 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
+ if (q->offload) {
+ /* Options not supported by the offload. */
+ if (hopt->rate.overhead || hopt->ceil.overhead) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
+ goto failure;
+ }
+ if (hopt->rate.mpu || hopt->ceil.mpu) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
+ goto failure;
+ }
+ if (hopt->quantum) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
+ goto failure;
+ }
+ if (hopt->prio) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
+ goto failure;
+ }
+ }
+
/* Keeping backward compatible with rate_table based iproute2 tc */
if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 034e2c74497d..d9c6d8f30f09 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
r->idiag_retrans = asoc->rtx_data_chunks;
r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- r->idiag_expires = 0;
}
}
@@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
r = nlmsg_data(nlh);
BUG_ON(!sk_fullsock(sk));
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
if (asoc) {
inet_diag_msg_sctpasoc_fill(r, sk, asoc);
} else {
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
}
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index cc544a97c4af..7f342bc12735 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Set peer label for connection. */
+ if (security_sctp_assoc_established((struct sctp_association *)asoc,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Verify that the chunk length for the COOKIE-ACK is OK.
* If we don't do this, any bundled chunks may be junked.
*/
@@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
*/
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
- /* Set peer label for connection. */
- security_inet_conn_established(ep->base.sk, chunk->skb);
-
/* RFC 2960 5.1 Normal Establishment of an Association
*
* E) Upon reception of the COOKIE ACK, endpoint "A" will move
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 961854e56736..284befa90967 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -183,7 +183,7 @@ static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
- int rc = 0;
+ int old_state, rc = 0;
if (!sk)
goto out;
@@ -191,8 +191,10 @@ static int smc_release(struct socket *sock)
sock_hold(sk); /* sock_put below */
smc = smc_sk(sk);
+ old_state = sk->sk_state;
+
/* cleanup for a dangling non-blocking connect */
- if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+ if (smc->connect_nonblock && old_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
if (cancel_work_sync(&smc->connect_work))
@@ -206,6 +208,10 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
+ if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
+ !smc->use_fallback)
+ smc_close_active_abort(smc);
+
rc = __smc_release(smc);
/* detach socket */
@@ -566,12 +572,118 @@ static void smc_stat_fallback(struct smc_sock *smc)
mutex_unlock(&net->smc.mutex_fback_rsn);
}
-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+/* must be called under rcu read lock */
+static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
{
- wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
- wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
- unsigned long flags;
+ struct socket_wq *wq;
+ __poll_t flags;
+
+ wq = rcu_dereference(smc->sk.sk_wq);
+ if (!skwq_has_sleeper(wq))
+ return;
+ /* wake up smc sk->sk_wq */
+ if (!key) {
+ /* sk_state_change */
+ wake_up_interruptible_all(&wq->wait);
+ } else {
+ flags = key_to_poll(key);
+ if (flags & (EPOLLIN | EPOLLOUT))
+ /* sk_data_ready or sk_write_space */
+ wake_up_interruptible_sync_poll(&wq->wait, flags);
+ else if (flags & EPOLLERR)
+ /* sk_error_report */
+ wake_up_interruptible_poll(&wq->wait, flags);
+ }
+}
+
+static int smc_fback_mark_woken(wait_queue_entry_t *wait,
+ unsigned int mode, int sync, void *key)
+{
+ struct smc_mark_woken *mark =
+ container_of(wait, struct smc_mark_woken, wait_entry);
+
+ mark->woken = true;
+ mark->key = key;
+ return 0;
+}
+
+static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
+ void (*clcsock_callback)(struct sock *sk))
+{
+ struct smc_mark_woken mark = { .woken = false };
+ struct socket_wq *wq;
+
+ init_waitqueue_func_entry(&mark.wait_entry,
+ smc_fback_mark_woken);
+ rcu_read_lock();
+ wq = rcu_dereference(clcsk->sk_wq);
+ if (!wq)
+ goto out;
+ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+ clcsock_callback(clcsk);
+ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+
+ if (mark.woken)
+ smc_fback_wakeup_waitqueue(smc, mark.key);
+out:
+ rcu_read_unlock();
+}
+
+static void smc_fback_state_change(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+}
+
+static void smc_fback_data_ready(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+}
+
+static void smc_fback_write_space(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+}
+
+static void smc_fback_error_report(struct sock *clcsk)
+{
+ struct smc_sock *smc =
+ smc_clcsock_user_data(clcsk);
+
+ if (!smc)
+ return;
+ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+}
+
+static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+{
+ struct sock *clcsk;
+ int rc = 0;
+
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ rc = -EBADF;
+ goto out;
+ }
+ clcsk = smc->clcsock->sk;
+
+ if (smc->use_fallback)
+ goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -582,22 +694,41 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
- /* There may be some entries remaining in
- * smc socket->wq, which should be removed
- * to clcsocket->wq during the fallback.
+ /* There might be some wait entries remaining
+ * in smc sk->sk_wq and they should be woken up
+ * as clcsock's wait queue is woken up.
*/
- spin_lock_irqsave(&smc_wait->lock, flags);
- spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
- list_splice_init(&smc_wait->head, &clc_wait->head);
- spin_unlock(&clc_wait->lock);
- spin_unlock_irqrestore(&smc_wait->lock, flags);
+ smc->clcsk_state_change = clcsk->sk_state_change;
+ smc->clcsk_data_ready = clcsk->sk_data_ready;
+ smc->clcsk_write_space = clcsk->sk_write_space;
+ smc->clcsk_error_report = clcsk->sk_error_report;
+
+ clcsk->sk_state_change = smc_fback_state_change;
+ clcsk->sk_data_ready = smc_fback_data_ready;
+ clcsk->sk_write_space = smc_fback_write_space;
+ clcsk->sk_error_report = smc_fback_error_report;
+
+ smc->clcsock->sk->sk_user_data =
+ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
}
+out:
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc, reason_code);
+ struct net *net = sock_net(&smc->sk);
+ int rc = 0;
+
+ rc = smc_switch_to_fallback(smc, reason_code);
+ if (rc) { /* fallback fails */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
+ return rc;
+ }
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -1518,11 +1649,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first);
- if (reason_code < 0) { /* error, no fallback possible */
+ if (reason_code < 0 ||
+ smc_switch_to_fallback(new_smc, reason_code)) {
+ /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1964,8 +2096,11 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
- smc_listen_out_connected(new_smc);
+ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+ if (rc)
+ smc_listen_out_err(new_smc);
+ else
+ smc_listen_out_connected(new_smc);
return;
}
@@ -2094,10 +2229,9 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc;
+ struct smc_sock *lsmc =
+ smc_clcsock_user_data(listen_clcsock);
- lsmc = (struct smc_sock *)
- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
if (!lsmc)
return;
lsmc->clcsk_data_ready(listen_clcsock);
@@ -2254,7 +2388,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ if (rc)
+ goto out;
} else {
rc = -EINVAL;
goto out;
@@ -2447,6 +2583,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
if (unlikely(!smc->clcsock->ops->setsockopt))
rc = -EOPNOTSUPP;
else
@@ -2456,6 +2597,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk_error_report(sk);
}
+ mutex_unlock(&smc->clcsock_release_lock);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2472,7 +2614,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2515,13 +2657,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
+ int rc;
smc = smc_sk(sock->sk);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
/* socket options apply to the CLC socket */
- if (unlikely(!smc->clcsock->ops->getsockopt))
+ if (unlikely(!smc->clcsock->ops->getsockopt)) {
+ mutex_unlock(&smc->clcsock_release_lock);
return -EOPNOTSUPP;
- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ }
+ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
static int smc_ioctl(struct socket *sock, unsigned int cmd,
@@ -2935,12 +3087,14 @@ static int __init smc_init(void)
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_sock;
+ goto out_ib;
}
static_branch_enable(&tcp_have_smc);
return 0;
+out_ib:
+ smc_ib_unregister_client();
out_sock:
sock_unregister(PF_SMC);
out_proto6:
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 3d0b8e300deb..37b2001a0255 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -139,6 +139,12 @@ enum smc_urg_state {
SMC_URG_READ = 3, /* data was already read */
};
+struct smc_mark_woken {
+ bool woken;
+ void *key;
+ wait_queue_entry_t wait_entry;
+};
+
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
@@ -228,8 +234,14 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
+ void (*clcsk_state_change)(struct sock *sk);
+ /* original stat_change fct. */
void (*clcsk_data_ready)(struct sock *sk);
- /* original data_ready fct. **/
+ /* original data_ready fct. */
+ void (*clcsk_write_space)(struct sock *sk);
+ /* original write_space fct. */
+ void (*clcsk_error_report)(struct sock *sk);
+ /* original error_report fct. */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
@@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk)
+{
+ return (struct smc_sock *)
+ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 29525d03b253..be7d704976ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1161,8 +1161,8 @@ void smc_conn_free(struct smc_connection *conn)
cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
- smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ smc_lgr_unregister_conn(conn);
}
if (!lgr->conns_num)
@@ -1864,7 +1864,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
+ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
conn->lgr = lgr;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index b8898c787d23..1fca2f90a9c7 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
- struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net);
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = link->ibport,
.lnk[0].link_id = link->link_id,
- .lnk[0].net_cookie = net->net_cookie,
};
memcpy(linfo.lnk[0].ibname,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 291f1484a1b7..29f0a559d884 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -113,7 +113,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
pnettable = &sn->pnettable;
/* remove table entry */
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
@@ -131,7 +131,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
rc = 0;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
/* if this is not the initial namespace, stop here */
if (net != &init_net)
@@ -192,7 +192,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
@@ -206,7 +206,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -224,7 +224,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
@@ -237,7 +237,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -368,12 +368,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
- new_pe->ndev = ndev;
- if (ndev)
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
@@ -382,10 +379,15 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
}
}
if (new_netdev) {
+ if (ndev) {
+ new_pe->ndev = ndev;
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
+ GFP_ATOMIC);
+ }
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
goto out_put;
}
@@ -446,7 +448,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
new_pe->ib_port = ib_port;
new_ibdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -456,9 +458,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
}
if (new_ibdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
}
return (new_ibdev) ? 0 : -EEXIST;
@@ -603,7 +605,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
pnettable = &sn->pnettable;
/* dump pnettable entries */
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
@@ -618,7 +620,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return idx;
}
@@ -862,7 +864,7 @@ int smc_pnet_net_init(struct net *net)
struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
- rwlock_init(&pnettable->lock);
+ mutex_init(&pnettable->lock);
INIT_LIST_HEAD(&pnetids_ndev->list);
rwlock_init(&pnetids_ndev->lock);
@@ -942,7 +944,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
@@ -951,7 +953,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1154,7 +1156,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
@@ -1164,7 +1166,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1183,7 +1185,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -1192,7 +1194,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 14039272f7e4..80a88eea4949 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -29,7 +29,7 @@ struct smc_link_group;
* @pnetlist: List of PNETIDs
*/
struct smc_pnettable {
- rwlock_t lock;
+ struct mutex lock;
struct list_head pnetlist;
};
diff --git a/net/socket.c b/net/socket.c
index 50cf75730fd7..6887840682bb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -301,7 +301,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wq.wait);
@@ -3448,7 +3448,7 @@ EXPORT_SYMBOL(kernel_connect);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is bound.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
@@ -3463,7 +3463,7 @@ EXPORT_SYMBOL(kernel_getsockname);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is connected.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index fe97f3106536..4a4082bb22ad 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
if (ret)
return ret;
- if (!ret) {
- *buf_in = buf;
- *body_size = toksize;
- }
+ *buf_in = buf;
+ *body_size = toksize;
return ret;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a312ea2bc440..c83fe618767c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
- int ret = 0;
+ int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
@@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
+ ident = xprt->xprt_class->ident;
rcu_read_unlock();
+ if (!xprtargs->ident)
+ xprtargs->ident = ident;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ee5336d73fdd..0b6034fab9ab 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -197,7 +197,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
- rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
+ rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_unlink(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2aabec2b4bec..557004017548 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -448,7 +448,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- const struct svc_serv_ops *ops)
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -465,7 +465,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
- serv->sv_ops = ops;
+ serv->sv_threadfn = threadfn;
xdrsize = 0;
while (prog) {
prog->pg_lovers = prog->pg_nvers-1;
@@ -511,22 +511,37 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return serv;
}
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create - Create an RPC service
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
+ int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, /*npools*/1, ops);
+ return __svc_create(prog, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
-struct svc_serv *
-svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create_pooled - Create an RPC service with pooled threads
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ unsigned int bufsize,
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, ops);
+ serv = __svc_create(prog, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
@@ -536,15 +551,6 @@ out_err:
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
-void svc_shutdown_net(struct svc_serv *serv, struct net *net)
-{
- svc_close_net(serv, net);
-
- if (serv->sv_ops->svo_shutdown)
- serv->sv_ops->svo_shutdown(serv, net);
-}
-EXPORT_SYMBOL_GPL(svc_shutdown_net);
-
/*
* Destroy an RPC service. Should be called with appropriate locking to
* protect sv_permsocks and sv_tempsocks.
@@ -745,11 +751,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
- __module_get(serv->sv_ops->svo_module);
- task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
+ task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
- module_put(serv->sv_ops->svo_module);
svc_exit_thread(rqstp);
return PTR_ERR(task);
}
@@ -1355,7 +1359,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_authorise(rqstp);
close_xprt:
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ svc_xprt_close(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b21ad7994147..0c117d3bfda8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -266,12 +266,12 @@ void svc_xprt_received(struct svc_xprt *xprt)
}
/* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_enqueue_xprt with:
+ * 'put', so we need a reference to call svc_xprt_enqueue with:
*/
svc_xprt_get(xprt);
smp_mb__before_atomic();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+ svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_received);
@@ -285,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
@@ -321,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
return -EPROTONOSUPPORT;
}
-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+/**
+ * svc_xprt_create - Add a new listener to @serv
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @family: network address family
+ * @port: listener port
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return values:
+ * %0: New listener added successfully
+ * %-EPROTONOSUPPORT: Requested transport type not supported
+ */
+int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
int err;
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
}
return err;
}
-EXPORT_SYMBOL_GPL(svc_create_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_create);
/*
* Copy the local and remote xprt addresses to the rqstp structure
@@ -411,6 +425,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ if (xpt_flags & BIT(XPT_BUSY))
+ return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
@@ -423,7 +439,12 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
return false;
}
-void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+/**
+ * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
+ * @xprt: transport with data pending
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
@@ -467,19 +488,6 @@ out_unlock:
put_cpu();
trace_svc_xprt_enqueue(xprt, rqstp);
}
-EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
-
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
-{
- if (test_bit(XPT_BUSY, &xprt->xpt_flags))
- return;
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
-}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
/*
@@ -1060,7 +1068,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
svc_xprt_put(xprt);
}
-void svc_close_xprt(struct svc_xprt *xprt)
+/**
+ * svc_xprt_close - Close a client connection
+ * @xprt: transport to disconnect
+ *
+ */
+void svc_xprt_close(struct svc_xprt *xprt)
{
trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
@@ -1075,7 +1088,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
*/
svc_delete_xprt(xprt);
}
-EXPORT_SYMBOL_GPL(svc_close_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_close);
static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
{
@@ -1127,7 +1140,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
}
}
-/*
+/**
+ * svc_xprt_destroy_all - Destroy transports associated with @serv
+ * @serv: RPC service to be shut down
+ * @net: target network namespace
+ *
* Server threads may still be running (especially in the case where the
* service is still running in other network namespaces).
*
@@ -1139,7 +1156,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
* threads, we may need to wait a little while and then check again to
* see if they're done.
*/
-void svc_close_net(struct svc_serv *serv, struct net *net)
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
{
int delay = 0;
@@ -1150,6 +1167,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
msleep(delay++);
}
}
+EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
/*
* Handle defer and revisit of requests
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5a8b8e03fdd4..e72ba2f13f6c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -31,10 +31,12 @@
*/
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
[RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
+ [RPC_AUTH_TLS] = (struct auth_ops __force __rcu *)&svcauth_tls,
};
static struct auth_ops *
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d7ed7d49115a..b1efc34db6ed 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,6 +37,7 @@ struct unix_domain {
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
{
@@ -789,6 +790,65 @@ struct auth_ops svcauth_null = {
static int
+svcauth_tls_accept(struct svc_rqst *rqstp)
+{
+ struct svc_cred *cred = &rqstp->rq_cred;
+ struct kvec *argv = rqstp->rq_arg.head;
+ struct kvec *resv = rqstp->rq_res.head;
+
+ if (argv->iov_len < XDR_UNIT * 3)
+ return SVC_GARBAGE;
+
+ /* Call's cred length */
+ if (svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Call's verifier flavor and its length */
+ if (svc_getu32(argv) != rpc_auth_null ||
+ svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
+ return SVC_DENIED;
+ }
+
+ /* AUTH_TLS is not valid on non-NULL procedures */
+ if (rqstp->rq_proc != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Mapping to nobody uid/gid is required */
+ cred->cr_uid = INVALID_UID;
+ cred->cr_gid = INVALID_GID;
+ cred->cr_group_info = groups_alloc(0);
+ if (cred->cr_group_info == NULL)
+ return SVC_CLOSE; /* kmalloc failure - client must retry */
+
+ /* Reply's verifier */
+ svc_putnl(resv, RPC_AUTH_NULL);
+ if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+ svc_putnl(resv, 8);
+ memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
+ resv->iov_len += 8;
+ } else
+ svc_putnl(resv, 0);
+
+ rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
+ return SVC_OK;
+}
+
+struct auth_ops svcauth_tls = {
+ .name = "tls",
+ .owner = THIS_MODULE,
+ .flavour = RPC_AUTH_TLS,
+ .accept = svcauth_tls_accept,
+ .release = svcauth_null_release,
+ .set_client = svcauth_unix_set_client,
+};
+
+
+static int
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 2766dd21935b..05c758da6a92 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -115,11 +115,14 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
}
sock = container_of(xprt, struct sock_xprt, xprt);
- if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock == NULL ||
+ kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
goto out;
ret = sprintf(buf, "%pISc\n", &saddr);
out:
+ mutex_unlock(&sock->recv_mutex);
xprt_put(xprt);
return ret + 1;
}
@@ -295,8 +298,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
online = 1;
else if (!strncmp(buf, "remove", 6))
remove = 1;
- else
- return -EINVAL;
+ else {
+ count = -EINVAL;
+ goto out_put;
+ }
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
count = -EINTR;
@@ -307,25 +312,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
goto release_tasks;
}
if (offline) {
- set_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive--;
- spin_unlock(&xps->xps_lock);
+ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (online) {
- clear_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive++;
- spin_unlock(&xps->xps_lock);
+ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ }
} else if (remove) {
if (test_bit(XPRT_OFFLINE, &xprt->state)) {
- set_bit(XPRT_REMOVE, &xprt->state);
- xprt_force_disconnect(xprt);
- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
- if (!xprt->sending.qlen &&
- !xprt->pending.qlen &&
- !xprt->backlog.qlen &&
- !atomic_long_read(&xprt->queuelen))
- rpc_xprt_switch_remove_xprt(xps, xprt);
+ if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) {
+ xprt_force_disconnect(xprt);
+ if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+ if (!xprt->sending.qlen &&
+ !xprt->pending.qlen &&
+ !xprt->backlog.qlen &&
+ !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt);
+ }
}
} else {
count = -EINVAL;
@@ -422,6 +430,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_change_state.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
static struct kobj_attribute rpc_sysfs_xprt_switch_info =
__ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
@@ -430,6 +439,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
&rpc_sysfs_xprt_switch_info.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
static struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
@@ -439,14 +449,14 @@ static struct kobj_type rpc_sysfs_client_type = {
static struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
- .default_attrs = rpc_sysfs_xprt_switch_attrs,
+ .default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
static struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
- .default_attrs = rpc_sysfs_xprt_attrs,
+ .default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_namespace,
};
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 17f174d6ea3b..faba7136dd9a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -13,10 +13,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
#undef RPCRDMA_BACKCHANNEL_DEBUG
/**
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ff699307e820..515dd7a66a04 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -45,10 +45,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_mr *mr)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8035a983c8ce..281ddb87ac8d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -54,10 +54,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 16897fcb659c..85c8cdda98b1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
ret = rpcrdma_bc_send_request(rdma, rqst);
if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
+ svc_xprt_close(sxprt);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..42e375dbdadb 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -60,10 +60,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/*
* tunables
*/
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3d3673ba9e1e..7b5fce2faa10 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,17 +63,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-/*
- * Globals/Macros
- */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/*
- * internal functions
- */
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
@@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_connect_status = -ENETUNREACH;
goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
- sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
ep->re_connect_status = -ENOTCONN;
@@ -291,8 +278,6 @@ disconnected:
break;
}
- dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap,
- ep->re_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_attr.qp_type = IB_QPT_RC;
ep->re_attr.port_num = ~0;
- dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
- "iovs: send %d recv %d\n",
- __func__,
- ep->re_attr.cap.max_send_wr,
- ep->re_attr.cap.max_recv_wr,
- ep->re_attr.cap.max_send_sge,
- ep->re_attr.cap.max_recv_sge);
-
ep->re_send_batch = ep->re_max_requests >> 3;
ep->re_send_count = ep->re_send_batch;
init_waitqueue_head(&ep->re_connect_wait);
@@ -436,6 +413,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.send_cq)) {
rc = PTR_ERR(ep->re_attr.send_cq);
+ ep->re_attr.send_cq = NULL;
goto out_destroy;
}
@@ -444,6 +422,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.recv_cq)) {
rc = PTR_ERR(ep->re_attr.recv_cq);
+ ep->re_attr.recv_cq = NULL;
goto out_destroy;
}
ep->re_receive_count = 0;
@@ -482,6 +461,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_pd = ib_alloc_pd(device, 0);
if (IS_ERR(ep->re_pd)) {
rc = PTR_ERR(ep->re_pd);
+ ep->re_pd = NULL;
goto out_destroy;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d8ee06a9650a..0f39e08ee580 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1641,7 +1641,12 @@ static int xs_get_srcport(struct sock_xprt *transport)
unsigned short get_srcport(struct rpc_xprt *xprt)
{
struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
- return xs_sock_getport(sock->sock);
+ unsigned short ret = 0;
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock)
+ ret = xs_sock_getport(sock->sock);
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
}
EXPORT_SYMBOL(get_srcport);
@@ -1910,7 +1915,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
- if (RPC_IS_ASYNC(task)) {
+ if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
* filesystem namespace of the process making the rpc
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 473a790f5894..a2f9c9640716 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -352,16 +352,18 @@ static int tipc_enable_bearer(struct net *net, const char *name,
goto rejected;
}
- test_and_set_bit_lock(0, &b->up);
- rcu_assign_pointer(tn->bearer_list[bearer_id], b);
- if (skb)
- tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
-
+ /* Create monitoring data before accepting activate messages */
if (tipc_mon_create(net, bearer_id)) {
bearer_disable(net, b);
+ kfree_skb(skb);
return -ENOMEM;
}
+ test_and_set_bit_lock(0, &b->up);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
return res;
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 9325479295b8..f09316a9035f 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2276,7 +2276,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
struct tipc_aead_key *skey = NULL;
u16 key_gen = msg_key_gen(hdr);
- u16 size = msg_data_sz(hdr);
+ u32 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
unsigned int keylen;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 8d9e09f48f4c..e260c0d557f5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
bool reply = msg_probe(hdr), retransmitted = false;
- u16 dlen = msg_data_sz(hdr), glen = 0;
+ u32 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
@@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
void *data;
trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
@@ -2282,6 +2286,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
+ break;
+
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
@@ -2307,9 +2316,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
}
- /* Receive Gap ACK blocks from peer if any */
- glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
-
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 407619697292..2f4d23238a7e 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index bda902caa814..8267b751a526 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -313,7 +313,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
ua.sr.type, ua.sr.lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 01396dd1c899..1d8ba233d047 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9947b7dfe1d2..6ef95ce565bd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 flags = n->action_flags;
struct list_head *publ_list;
struct tipc_uaddr ua;
- u32 bearer_id;
+ u32 bearer_id, node;
if (likely(!flags)) {
write_unlock_bh(&n->lock);
@@ -413,7 +413,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
TIPC_LINK_STATE, n->addr, n->addr);
sk.ref = n->link_id;
- sk.node = n->addr;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
bearer_id = n->link_id & 0xffff;
publ_list = &n->publ_list;
@@ -423,17 +424,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, sk.node, n->capabilities);
+ tipc_named_node_up(net, node, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- tipc_mon_peer_up(net, sk.node, bearer_id);
+ tipc_mon_peer_up(net, node, bearer_id);
tipc_nametbl_publish(net, &ua, &sk, sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
- tipc_mon_peer_down(net, sk.node, bearer_id);
+ tipc_mon_peer_down(net, node, bearer_id);
tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3e63c83e641c..7545321c3440 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3749,7 +3749,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3235261f138d..f04abf662ec6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+ void (*fn)(struct sock *sk))
{
int i;
@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
struct vsock_sock *vsk;
list_for_each_entry(vsk, &vsock_connected_table[i],
- connected_table)
+ connected_table) {
+ if (vsk->transport != transport)
+ continue;
+
fn(sk_vsock(vsk));
+ }
}
spin_unlock_bh(&vsock_table_lock);
@@ -1401,6 +1406,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
+ vsock_remove_connected(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fb3302fff627..5afc194a58bb 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
static struct workqueue_struct *virtio_vsock_workqueue;
static struct virtio_vsock __rcu *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */
struct virtio_vsock {
struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
switch (le32_to_cpu(event->id)) {
case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
virtio_vsock_update_guest_cid(vsock);
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
break;
}
}
@@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
synchronize_rcu();
/* Reset all connected sockets when the device disappear */
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
/* Stop all work handlers to make sure no one is accessing the device,
* so we can safely call virtio_reset_device().
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7aef34e32bdf..b17dc9745188 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
static int PROTOCOL_OVERRIDE = -1;
+static struct vsock_transport vmci_transport; /* forward declaration */
+
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
- vsock_for_each_connected_socket(vmci_transport_handle_detach);
+ vsock_for_each_connected_socket(&vmci_transport,
+ vmci_transport_handle_detach);
}
static void vmci_transport_recv_pkt_work(struct work_struct *work)
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 1e9be50469ce..527ae669f6f7 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -33,7 +33,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
-$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
@$(kecho) " GEN $@"
$(Q)(set -e; \
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3a54c8e6b6c6..f08d4b3bb148 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -332,29 +332,20 @@ static void cfg80211_event_work(struct work_struct *work)
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev, *tmp;
- bool found = false;
ASSERT_RTNL();
- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
if (wdev->nl_owner_dead) {
if (wdev->netdev)
dev_close(wdev->netdev);
- found = true;
- }
- }
-
- if (!found)
- return;
- wiphy_lock(&rdev->wiphy);
- list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
- if (wdev->nl_owner_dead) {
+ wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
rdev_del_virtual_intf(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
}
}
- wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_destroy_iface_wk(struct work_struct *work)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 578bff9c378b..c01fbcc848e8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13411,6 +13411,9 @@ static int handle_nan_filter(struct nlattr *attr_filter,
i = 0;
nla_for_each_nested(attr, attr_filter, rem) {
filter[i].filter = nla_memdup(attr, GFP_KERNEL);
+ if (!filter[i].filter)
+ goto err;
+
filter[i].len = nla_len(attr);
i++;
}
@@ -13423,6 +13426,15 @@ static int handle_nan_filter(struct nlattr *attr_filter,
}
return 0;
+
+err:
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ kfree(filter[i].filter);
+ i++;
+ }
+ kfree(filter);
+ return -ENOMEM;
}
static int nl80211_nan_add_func(struct sk_buff *skb,
@@ -17816,7 +17828,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->chandef = *chandef;
wdev->preset_chandef = *chandef;
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
!WARN_ON(!wdev->current_bss))
cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3fa066419d37..39bce5d764de 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (x->encap || x->tfcpad)
return -EINVAL;
+ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
+ return -EINVAL;
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
@@ -262,7 +265,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC);
xso->real_dev = dev;
xso->num_exthdrs = 1;
- xso->flags = xuo->flags;
+ /* Don't forward bit that is not implemented */
+ xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 57448fc519fc..1e8b26eecb3f 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -304,7 +304,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
@@ -673,12 +676,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
struct net *net = xi->net;
struct xfrm_if_parms p = {};
+ xfrmi_netlink_parms(data, &p);
if (!p.if_id) {
NL_SET_ERR_MSG(extack, "if_id must be non zero");
return -EINVAL;
}
- xfrmi_netlink_parms(data, &p);
xi = xfrmi_locate(net, &p);
if (!xi) {
xi = netdev_priv(dev);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 04d1ce9b510f..882526159d3a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4256,7 +4256,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
}
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
- u8 dir, u8 type, struct net *net)
+ u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
@@ -4265,7 +4265,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
@@ -4277,7 +4278,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
if ((pol->priority >= priority) && ret)
break;
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
break;
@@ -4393,7 +4395,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap)
+ struct xfrm_encap_tmpl *encap, u32 if_id)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -4412,14 +4414,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
+ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
err = -ENOENT;
goto out;
}
/* Stage 2 - find and update state(s) */
for (i = 0, mp = m; i < num_migrate; i++, mp++) {
- if ((x = xfrm_migrate_state_find(mp, net))) {
+ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
x_cur[nx_cur] = x;
nx_cur++;
xc = xfrm_state_migrate(x, mp, encap);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ca6bee18346d..b749935152ba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1579,9 +1579,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
- if (xfrm_init_state(x) < 0)
- goto error;
-
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
@@ -1606,7 +1603,8 @@ out:
return NULL;
}
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id)
{
unsigned int h;
struct xfrm_state *x = NULL;
@@ -1622,6 +1620,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1637,6 +1637,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1663,6 +1665,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
if (!xc)
return NULL;
+ xc->props.family = m->new_family;
+
+ if (xfrm_init_state(xc) < 0)
+ goto error;
+
memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
@@ -2572,7 +2579,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
@@ -2603,17 +2610,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
-
-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
-{
- mtu = __xfrm_state_mtu(x, mtu);
-
- if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
- return IPV6_MIN_MTU;
-
- return mtu;
-}
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8cd6c8129004..72b2f173aac8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -630,13 +630,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_smark_init(attrs, &x->props.smark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!x->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
if (err)
@@ -1432,13 +1427,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
mark = xfrm_mark_get(attrs, &m);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!if_id) {
- err = -EINVAL;
- goto out_noput;
- }
- }
if (p->info.seq) {
x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1751,13 +1741,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
xfrm_mark_get(attrs, &xp->mark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!xp->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
return xp;
error:
@@ -2608,6 +2593,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
int n = 0;
struct net *net = sock_net(skb->sk);
struct xfrm_encap_tmpl *encap = NULL;
+ u32 if_id = 0;
if (attrs[XFRMA_MIGRATE] == NULL)
return -EINVAL;
@@ -2632,7 +2618,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOMEM;
}
- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
+ if (attrs[XFRMA_IF_ID])
+ if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
+ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
kfree(encap);
diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c
index cc0648eb389e..4bca4b70f665 100644
--- a/samples/seccomp/dropper.c
+++ b/samples/seccomp/dropper.c
@@ -25,7 +25,7 @@
#include <sys/prctl.h>
#include <unistd.h>
-static int install_filter(int nr, int arch, int error)
+static int install_filter(int arch, int nr, int error)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
@@ -42,6 +42,10 @@ static int install_filter(int nr, int arch, int error)
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter,
};
+ if (error == -1) {
+ struct sock_filter kill = BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL);
+ filter[4] = kill;
+ }
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
perror("prctl(NO_NEW_PRIVS)");
return 1;
@@ -57,9 +61,10 @@ int main(int argc, char **argv)
{
if (argc < 5) {
fprintf(stderr, "Usage:\n"
- "dropper <syscall_nr> <arch> <errno> <prog> [<args>]\n"
+ "dropper <arch> <syscall_nr> <errno> <prog> [<args>]\n"
"Hint: AUDIT_ARCH_I386: 0x%X\n"
" AUDIT_ARCH_X86_64: 0x%X\n"
+ " errno == -1 means SECCOMP_RET_KILL\n"
"\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);
return 1;
}
diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h
index a3d14de6a2e5..9fdd8e7c2a45 100644
--- a/samples/trace_events/trace_custom_sched.h
+++ b/samples/trace_events/trace_custom_sched.h
@@ -25,10 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
* that the custom event is using.
*/
TP_PROTO(bool preempt,
+ unsigned int prev_state,
struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(preempt, prev, next),
+ TP_ARGS(preempt, prev_state, prev, next),
/*
* The next fields are where the customization happens.
diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang
index 51fc23e2e9e5..87285b76adb2 100644
--- a/scripts/Makefile.clang
+++ b/scripts/Makefile.clang
@@ -10,6 +10,7 @@ CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS_um := $(CLANG_TARGET_FLAGS_$(SUBARCH))
CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(SRCARCH))
ifeq ($(CROSS_COMPILE),)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index d53825503874..8be892887d71 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -51,6 +51,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-format-zero-length
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
+KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
endif
endif
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 1d16ca1b78c9..f67153b260c0 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -37,6 +37,8 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \
+= -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \
+= -fplugin-arg-stackleak_plugin-arch=$(SRCARCH)
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK_VERBOSE) \
+ += -fplugin-arg-stackleak_plugin-verbose
ifdef CONFIG_GCC_PLUGIN_STACKLEAK
DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
endif
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
index 803ba7561076..a0ea1d26e6b2 100755
--- a/scripts/atomic/fallbacks/read_acquire
+++ b/scripts/atomic/fallbacks/read_acquire
@@ -2,6 +2,15 @@ cat <<EOF
static __always_inline ${ret}
arch_${atomic}_read_acquire(const ${atomic}_t *v)
{
- return smp_load_acquire(&(v)->counter);
+ ${int} ret;
+
+ if (__native_word(${atomic}_t)) {
+ ret = smp_load_acquire(&(v)->counter);
+ } else {
+ ret = arch_${atomic}_read(v);
+ __atomic_acquire_fence();
+ }
+
+ return ret;
}
EOF
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
index 86ede759f24e..05cdb7f42477 100755
--- a/scripts/atomic/fallbacks/set_release
+++ b/scripts/atomic/fallbacks/set_release
@@ -2,6 +2,11 @@ cat <<EOF
static __always_inline void
arch_${atomic}_set_release(${atomic}_t *v, ${int} i)
{
- smp_store_release(&(v)->counter, i);
+ if (__native_word(${atomic}_t)) {
+ smp_store_release(&(v)->counter, i);
+ } else {
+ __atomic_release_fence();
+ arch_${atomic}_set(v, i);
+ }
}
EOF
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index e9db7dcb3e5f..42f0252ee2a4 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void)
return 0;
}
+/*
+ * STRING_CST may or may not be NUL terminated:
+ * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html
+ */
+static inline bool string_equal(tree node, const char *string, int length)
+{
+ if (TREE_STRING_LENGTH(node) < length)
+ return false;
+ if (TREE_STRING_LENGTH(node) > length + 1)
+ return false;
+ if (TREE_STRING_LENGTH(node) == length + 1 &&
+ TREE_STRING_POINTER(node)[length] != '\0')
+ return false;
+ return !memcmp(TREE_STRING_POINTER(node), string, length);
+}
+#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str))
+
static bool stackleak_gate(void)
{
tree section;
@@ -438,13 +455,17 @@ static bool stackleak_gate(void)
if (section && TREE_VALUE(section)) {
section = TREE_VALUE(TREE_VALUE(section));
- if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
+ if (STRING_EQUAL(section, ".init.text"))
+ return false;
+ if (STRING_EQUAL(section, ".devinit.text"))
+ return false;
+ if (STRING_EQUAL(section, ".cpuinit.text"))
return false;
- if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
+ if (STRING_EQUAL(section, ".meminit.text"))
return false;
- if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
+ if (STRING_EQUAL(section, ".noinstr.text"))
return false;
- if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
+ if (STRING_EQUAL(section, ".entry.text"))
return false;
}
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 59717be31210..d3c3a61308ad 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -979,10 +979,10 @@ static int conf_write_autoconf_cmd(const char *autoconf_name)
fprintf(out, "\n$(deps_config): ;\n");
- if (ferror(out)) /* error check for all fprintf() calls */
- return -1;
-
+ ret = ferror(out); /* error check for all fprintf() calls */
fclose(out);
+ if (ret)
+ return -1;
if (rename(tmp, name)) {
perror("rename");
@@ -994,14 +994,19 @@ static int conf_write_autoconf_cmd(const char *autoconf_name)
static int conf_touch_deps(void)
{
- const char *name;
+ const char *name, *tmp;
struct symbol *sym;
int res, i;
- strcpy(depfile_path, "include/config/");
- depfile_prefix_len = strlen(depfile_path);
-
name = conf_get_autoconfig_name();
+ tmp = strrchr(name, '/');
+ depfile_prefix_len = tmp ? tmp - name + 1 : 0;
+ if (depfile_prefix_len + 1 > sizeof(depfile_path))
+ return -1;
+
+ strncpy(depfile_path, name, depfile_prefix_len);
+ depfile_path[depfile_prefix_len] = 0;
+
conf_read_simple(name, S_DEF_AUTO);
sym_calc_value(modules_sym);
@@ -1093,10 +1098,10 @@ static int __conf_write_autoconf(const char *filename,
print_symbol(file, sym);
/* check possible errors in conf_write_heading() and print_symbol() */
- if (ferror(file))
- return -1;
-
+ ret = ferror(file);
fclose(file);
+ if (ret)
+ return -1;
if (rename(tmp, filename)) {
perror("rename");
diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c
index 0590f86df6e4..748da578b418 100644
--- a/scripts/kconfig/preprocess.c
+++ b/scripts/kconfig/preprocess.c
@@ -141,7 +141,7 @@ static char *do_lineno(int argc, char *argv[])
static char *do_shell(int argc, char *argv[])
{
FILE *p;
- char buf[256];
+ char buf[4096];
char *cmd;
size_t nread;
int i;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3106b7536b89..9c084a2ba3b0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -12,202 +12,46 @@ use strict;
## ##
## #define enhancements by Armin Kuster <akuster@mvista.com> ##
## Copyright (c) 2000 MontaVista Software, Inc. ##
-## ##
-## This software falls under the GNU General Public License. ##
-## Please read the COPYING file for more information ##
+#
+# Copyright (C) 2022 Tomasz Warniełło (POD)
-# 18/01/2001 - Cleanups
-# Functions prototyped as foo(void) same as foo()
-# Stop eval'ing where we don't need to.
-# -- huggie@earth.li
+use Pod::Usage qw/pod2usage/;
-# 27/06/2001 - Allowed whitespace after initial "/**" and
-# allowed comments before function declarations.
-# -- Christian Kreibich <ck@whoop.org>
+=head1 NAME
-# Still to do:
-# - add perldoc documentation
-# - Look more closely at some of the scarier bits :)
+kernel-doc - Print formatted kernel documentation to stdout
-# 26/05/2001 - Support for separate source and object trees.
-# Return error code.
-# Keith Owens <kaos@ocs.com.au>
+=head1 SYNOPSIS
-# 23/09/2001 - Added support for typedefs, structs, enums and unions
-# Support for Context section; can be terminated using empty line
-# Small fixes (like spaces vs. \s in regex)
-# -- Tim Jansen <tim@tjansen.de>
+ kernel-doc [-h] [-v] [-Werror]
+ [ -man |
+ -rst [-sphinx-version VERSION] [-enable-lineno] |
+ -none
+ ]
+ [
+ -export |
+ -internal |
+ [-function NAME] ... |
+ [-nosymbol NAME] ...
+ ]
+ [-no-doc-sections]
+ [-export-file FILE] ...
+ FILE ...
-# 25/07/2012 - Added support for HTML5
-# -- Dan Luedtke <mail@danrl.de>
+Run `kernel-doc -h` for details.
-sub usage {
- my $message = <<"EOF";
-Usage: $0 [OPTION ...] FILE ...
+=head1 DESCRIPTION
Read C language source or header FILEs, extract embedded documentation comments,
and print formatted documentation to standard output.
-The documentation comments are identified by "/**" opening comment mark. See
-Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
-
-Output format selection (mutually exclusive):
- -man Output troff manual page format. This is the default.
- -rst Output reStructuredText format.
- -none Do not output documentation, only warnings.
-
-Output format selection modifier (affects only ReST output):
-
- -sphinx-version Use the ReST C domain dialect compatible with an
- specific Sphinx Version.
- If not specified, kernel-doc will auto-detect using
- the sphinx-build version found on PATH.
-
-Output selection (mutually exclusive):
- -export Only output documentation for symbols that have been
- exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
- in any input FILE or -export-file FILE.
- -internal Only output documentation for symbols that have NOT been
- exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
- in any input FILE or -export-file FILE.
- -function NAME Only output documentation for the given function(s)
- or DOC: section title(s). All other functions and DOC:
- sections are ignored. May be specified multiple times.
- -nosymbol NAME Exclude the specified symbols from the output
- documentation. May be specified multiple times.
-
-Output selection modifiers:
- -no-doc-sections Do not output DOC: sections.
- -enable-lineno Enable output of #define LINENO lines. Only works with
- reStructuredText format.
- -export-file FILE Specify an additional FILE in which to look for
- EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with
- -export or -internal. May be specified multiple times.
-
-Other parameters:
- -v Verbose output, more warnings and other information.
- -h Print this help.
- -Werror Treat warnings as errors.
-
-EOF
- print $message;
- exit 1;
-}
+The documentation comments are identified by the "/**" opening comment mark.
-#
-# format of comments.
-# In the following table, (...)? signifies optional structure.
-# (...)* signifies 0 or more structure elements
-# /**
-# * function_name(:)? (- short description)?
-# (* @parameterx: (description of parameter x)?)*
-# (* a blank line)?
-# * (Description:)? (Description of function)?
-# * (section header: (section description)? )*
-# (*)?*/
-#
-# So .. the trivial example would be:
-#
-# /**
-# * my_function
-# */
-#
-# If the Description: header tag is omitted, then there must be a blank line
-# after the last parameter specification.
-# e.g.
-# /**
-# * my_function - does my stuff
-# * @my_arg: its mine damnit
-# *
-# * Does my stuff explained.
-# */
-#
-# or, could also use:
-# /**
-# * my_function - does my stuff
-# * @my_arg: its mine damnit
-# * Description: Does my stuff explained.
-# */
-# etc.
-#
-# Besides functions you can also write documentation for structs, unions,
-# enums and typedefs. Instead of the function name you must write the name
-# of the declaration; the struct/union/enum/typedef must always precede
-# the name. Nesting of declarations is not supported.
-# Use the argument mechanism to document members or constants.
-# e.g.
-# /**
-# * struct my_struct - short description
-# * @a: first member
-# * @b: second member
-# *
-# * Longer description
-# */
-# struct my_struct {
-# int a;
-# int b;
-# /* private: */
-# int c;
-# };
-#
-# All descriptions can be multiline, except the short function description.
-#
-# For really longs structs, you can also describe arguments inside the
-# body of the struct.
-# eg.
-# /**
-# * struct my_struct - short description
-# * @a: first member
-# * @b: second member
-# *
-# * Longer description
-# */
-# struct my_struct {
-# int a;
-# int b;
-# /**
-# * @c: This is longer description of C
-# *
-# * You can use paragraphs to describe arguments
-# * using this method.
-# */
-# int c;
-# };
-#
-# This should be use only for struct/enum members.
-#
-# You can also add additional sections. When documenting kernel functions you
-# should document the "Context:" of the function, e.g. whether the functions
-# can be called form interrupts. Unlike other sections you can end it with an
-# empty line.
-# A non-void function should have a "Return:" section describing the return
-# value(s).
-# Example-sections should contain the string EXAMPLE so that they are marked
-# appropriately in DocBook.
-#
-# Example:
-# /**
-# * user_function - function that can only be called in user context
-# * @a: some argument
-# * Context: !in_interrupt()
-# *
-# * Some description
-# * Example:
-# * user_function(22);
-# */
-# ...
-#
-#
-# All descriptive text is further processed, scanning for the following special
-# patterns, which are highlighted appropriately.
-#
-# 'funcname()' - function
-# '$ENVVAR' - environmental variable
-# '&struct_name' - name of a structure (up to two words including 'struct')
-# '&struct_name.member' - name of a structure member
-# '@parameter' - name of a parameter
-# '%CONST' - name of a constant.
-# '``LITERAL``' - literal string without any spaces on it.
+See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+
+=cut
+
+# more perldoc at the end of the file
## init lots of data
@@ -273,7 +117,13 @@ my $blankline_rst = "\n";
# read arguments
if ($#ARGV == -1) {
- usage();
+ pod2usage(
+ -message => "No arguments!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
}
my $kernelversion;
@@ -468,7 +318,7 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
} elsif ($cmd eq "Werror") {
$Werror = 1;
} elsif (($cmd eq "h") || ($cmd eq "help")) {
- usage();
+ pod2usage(-exitval => 0, -verbose => 2);
} elsif ($cmd eq 'no-doc-sections') {
$no_doc_sections = 1;
} elsif ($cmd eq 'enable-lineno') {
@@ -494,7 +344,22 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
}
} else {
# Unknown argument
- usage();
+ pod2usage(
+ -message => "Argument unknown!\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
+ }
+ if ($#ARGV < 0){
+ pod2usage(
+ -message => "FILE argument missing\n",
+ -exitval => 1,
+ -verbose => 99,
+ -sections => 'SYNOPSIS',
+ -output => \*STDERR,
+ );
}
}
@@ -2521,3 +2386,118 @@ if ($Werror && $warnings) {
} else {
exit($output_mode eq "none" ? 0 : $errors)
}
+
+__END__
+
+=head1 OPTIONS
+
+=head2 Output format selection (mutually exclusive):
+
+=over 8
+
+=item -man
+
+Output troff manual page format.
+
+=item -rst
+
+Output reStructuredText format. This is the default.
+
+=item -none
+
+Do not output documentation, only warnings.
+
+=back
+
+=head2 Output format modifiers
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -sphinx-version VERSION
+
+Use the ReST C domain dialect compatible with a specific Sphinx Version.
+
+If not specified, kernel-doc will auto-detect using the sphinx-build version
+found on PATH.
+
+=back
+
+=head2 Output selection (mutually exclusive):
+
+=over 8
+
+=item -export
+
+Only output documentation for the symbols that have been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -internal
+
+Only output documentation for the symbols that have NOT been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -function NAME
+
+Only output documentation for the given function or DOC: section title.
+All other functions and DOC: sections are ignored.
+
+May be specified multiple times.
+
+=item -nosymbol NAME
+
+Exclude the specified symbol from the output documentation.
+
+May be specified multiple times.
+
+=back
+
+=head2 Output selection modifiers:
+
+=over 8
+
+=item -no-doc-sections
+
+Do not output DOC: sections.
+
+=item -export-file FILE
+
+Specify an additional FILE in which to look for EXPORT_SYMBOL() and
+EXPORT_SYMBOL_GPL().
+
+To be used with -export or -internal.
+
+May be specified multiple times.
+
+=back
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -enable-lineno
+
+Enable output of #define LINENO lines.
+
+=back
+
+=head2 Other parameters:
+
+=over 8
+
+=item -h, -help
+
+Print this help.
+
+=item -v
+
+Verbose output, more warnings and other information.
+
+=item -Werror
+
+Treat warnings as errors.
+
+=back
+
+=cut
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 6a2a04d92f42..ca40a5258c87 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -5,9 +5,10 @@ TARGET=$1
ARCH=$2
SMP=$3
PREEMPT=$4
-PREEMPT_RT=$5
-CC_VERSION="$6"
-LD=$7
+PREEMPT_DYNAMIC=$5
+PREEMPT_RT=$6
+CC_VERSION="$7"
+LD=$8
# Do not expand names
set -f
@@ -41,8 +42,14 @@ fi
UTS_VERSION="#$VERSION"
CONFIG_FLAGS=""
if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
-if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
-if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
+
+if [ -n "$PREEMPT_RT" ] ; then
+ CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"
+elif [ -n "$PREEMPT_DYNAMIC" ] ; then
+ CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_DYNAMIC"
+elif [ -n "$PREEMPT" ] ; then
+ CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"
+fi
# Truncate to maximum length
UTS_LEN=64
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 0c8b79cfb1bb..8435b99452b6 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -180,6 +180,7 @@ asuming||assuming
asycronous||asynchronous
asychronous||asynchronous
asynchnous||asynchronous
+asynchronus||asynchronous
asynchromous||asynchronous
asymetric||asymmetric
asymmeric||asymmetric
@@ -231,6 +232,7 @@ baloons||balloons
bandwith||bandwidth
banlance||balance
batery||battery
+battey||battery
beacuse||because
becasue||because
becomming||becoming
@@ -333,6 +335,7 @@ commoditiy||commodity
comsume||consume
comsumer||consumer
comsuming||consuming
+comaptible||compatible
compability||compatibility
compaibility||compatibility
comparsion||comparison
@@ -353,7 +356,9 @@ compoment||component
comppatible||compatible
compres||compress
compresion||compression
+compresser||compressor
comression||compression
+comsumed||consumed
comunicate||communicate
comunication||communication
conbination||combination
@@ -530,6 +535,7 @@ dissconect||disconnect
distiction||distinction
divisable||divisible
divsiors||divisors
+dsiabled||disabled
docuentation||documentation
documantation||documentation
documentaion||documentation
@@ -677,6 +683,7 @@ frequence||frequency
frequncy||frequency
frequancy||frequency
frome||from
+fronend||frontend
fucntion||function
fuction||function
fuctions||functions
@@ -761,6 +768,7 @@ implmentation||implementation
implmenting||implementing
incative||inactive
incomming||incoming
+incompaitiblity||incompatibility
incompatabilities||incompatibilities
incompatable||incompatible
incompatble||incompatible
@@ -942,6 +950,7 @@ metdata||metadata
micropone||microphone
microprocesspr||microprocessor
migrateable||migratable
+millenium||millennium
milliseonds||milliseconds
minium||minimum
minimam||minimum
@@ -1007,6 +1016,7 @@ notity||notify
nubmer||number
numebr||number
numner||number
+nunber||number
obtaion||obtain
obusing||abusing
occassionally||occasionally
@@ -1136,6 +1146,7 @@ preprare||prepare
pressre||pressure
presuambly||presumably
previosuly||previously
+previsously||previously
primative||primitive
princliple||principle
priorty||priority
@@ -1297,6 +1308,7 @@ routins||routines
rquest||request
runing||running
runned||ran
+runnnig||running
runnning||running
runtine||runtime
sacrifying||sacrificing
@@ -1353,6 +1365,7 @@ similiar||similar
simlar||similar
simliar||similar
simpified||simplified
+simultanous||simultaneous
singaled||signaled
singal||signal
singed||signed
@@ -1461,6 +1474,7 @@ syste||system
sytem||system
sythesis||synthesis
taht||that
+tained||tainted
tansmit||transmit
targetted||targeted
targetting||targeting
@@ -1489,6 +1503,7 @@ timout||timeout
tmis||this
toogle||toggle
torerable||tolerable
+torlence||tolerance
traget||target
traking||tracking
tramsmitted||transmitted
@@ -1503,6 +1518,7 @@ transferd||transferred
transfered||transferred
transfering||transferring
transision||transition
+transistioned||transitioned
transmittd||transmitted
transormed||transformed
trasfer||transfer
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index d051f8ceefdd..ded4d7c0d132 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -174,6 +174,16 @@ config GCC_PLUGIN_STACKLEAK
* https://grsecurity.net/
* https://pax.grsecurity.net/
+config GCC_PLUGIN_STACKLEAK_VERBOSE
+ bool "Report stack depth analysis instrumentation" if EXPERT
+ depends on GCC_PLUGIN_STACKLEAK
+ depends on !COMPILE_TEST # too noisy
+ help
+ This option will cause a warning to be printed each time the
+ stackleak plugin finds a function it thinks needs to be
+ instrumented. This is useful for comparing coverage between
+ builds.
+
config STACKLEAK_TRACK_MIN_SIZE
int "Minimum stack frame size of functions tracked by STACKLEAK"
default 100
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index 71f0177e8716..599429f99f99 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -62,6 +62,19 @@ config INTEGRITY_PLATFORM_KEYRING
provided by the platform for verifying the kexec'ed kerned image
and, possibly, the initramfs signature.
+config INTEGRITY_MACHINE_KEYRING
+ bool "Provide a keyring to which Machine Owner Keys may be added"
+ depends on SECONDARY_TRUSTED_KEYRING
+ depends on INTEGRITY_ASYMMETRIC_KEYS
+ depends on SYSTEM_BLACKLIST_KEYRING
+ depends on LOAD_UEFI_KEYS
+ depends on !IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+ help
+ If set, provide a keyring to which Machine Owner Keys (MOK) may
+ be added. This keyring shall contain just MOK keys. Unlike keys
+ in the platform keyring, keys contained in the .machine keyring will
+ be trusted within the kernel.
+
config LOAD_UEFI_KEYS
depends on INTEGRITY_PLATFORM_KEYRING
depends on EFI
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 7ee39d66cf16..d0ffe37dc1d6 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -10,6 +10,7 @@ integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
+integrity-$(CONFIG_INTEGRITY_MACHINE_KEYRING) += platform_certs/machine_keyring.o
integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
platform_certs/load_uefi.o \
platform_certs/keyring_handler.o
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 3b06a01bd0fd..c8c8a4a4e7a0 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -30,6 +30,7 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
".ima",
#endif
".platform",
+ ".machine",
};
#ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
@@ -111,6 +112,8 @@ static int __init __integrity_init_keyring(const unsigned int id,
} else {
if (id == INTEGRITY_KEYRING_PLATFORM)
set_platform_trusted_keys(keyring[id]);
+ if (id == INTEGRITY_KEYRING_MACHINE && trust_moklist())
+ set_machine_trusted_keys(keyring[id]);
if (id == INTEGRITY_KEYRING_IMA)
load_module_cert(keyring[id]);
}
@@ -126,7 +129,8 @@ int __init integrity_init_keyring(const unsigned int id)
perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
| KEY_USR_READ | KEY_USR_SEARCH;
- if (id == INTEGRITY_KEYRING_PLATFORM) {
+ if (id == INTEGRITY_KEYRING_PLATFORM ||
+ id == INTEGRITY_KEYRING_MACHINE) {
restriction = NULL;
goto out;
}
@@ -139,7 +143,14 @@ int __init integrity_init_keyring(const unsigned int id)
return -ENOMEM;
restriction->check = restrict_link_to_ima;
- perm |= KEY_USR_WRITE;
+
+ /*
+ * MOK keys can only be added through a read-only runtime services
+ * UEFI variable during boot. No additional keys shall be allowed to
+ * load into the machine keyring following init from userspace.
+ */
+ if (id != INTEGRITY_KEYRING_MACHINE)
+ perm |= KEY_USR_WRITE;
out:
return __integrity_init_keyring(id, perm, restriction);
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index 23240d793b07..895f4b9ce8c6 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -109,22 +109,25 @@ int asymmetric_verify(struct key *keyring, const char *sig,
pk = asymmetric_key_public_key(key);
pks.pkey_algo = pk->pkey_algo;
- if (!strcmp(pk->pkey_algo, "rsa"))
+ if (!strcmp(pk->pkey_algo, "rsa")) {
pks.encoding = "pkcs1";
- else if (!strncmp(pk->pkey_algo, "ecdsa-", 6))
+ } else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) {
/* edcsa-nist-p192 etc. */
pks.encoding = "x962";
- else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
- !strcmp(pk->pkey_algo, "sm2"))
+ } else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
+ !strcmp(pk->pkey_algo, "sm2")) {
pks.encoding = "raw";
- else
- return -ENOPKG;
+ } else {
+ ret = -ENOPKG;
+ goto out;
+ }
pks.digest = (u8 *)data;
pks.digest_size = datalen;
pks.s = hdr->sig;
pks.s_size = siglen;
ret = verify_signature(key, &pks);
+out:
key_put(key);
pr_debug("%s() = %d\n", __func__, ret);
return ret;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 08f907382c61..7d87772f0ce6 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str)
else
pr_err("invalid \"%s\" mode", str);
- return 0;
+ return 1;
}
__setup("evm=", evm_set_fixmode);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index a64fb0130b01..c6805af46211 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -217,14 +217,11 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
const char *audit_cause = "failed";
struct inode *inode = file_inode(file);
const char *filename = file->f_path.dentry->d_name.name;
+ struct ima_max_digest_data hash;
int result = 0;
int length;
void *tmpbuf;
u64 i_version;
- struct {
- struct ima_digest_data hdr;
- char digest[IMA_MAX_DIGEST_SIZE];
- } hash;
/*
* Always collect the modsig, because IMA might have already collected
@@ -238,9 +235,10 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
goto out;
/*
- * Dectecting file change is based on i_version. On filesystems
- * which do not support i_version, support is limited to an initial
- * measurement/appraisal/audit.
+ * Detecting file change is based on i_version. On filesystems
+ * which do not support i_version, support was originally limited
+ * to an initial measurement/appraisal/audit, but was modified to
+ * assume the file changed.
*/
i_version = inode_query_iversion(inode);
hash.hdr.algo = algo;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 3d8e9d5db5aa..cd1683dad3bf 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -452,56 +452,71 @@ static const struct file_operations ima_measure_policy_ops = {
int __init ima_fs_init(void)
{
+ int ret;
+
ima_dir = securityfs_create_dir("ima", integrity_dir);
if (IS_ERR(ima_dir))
- return -1;
+ return PTR_ERR(ima_dir);
ima_symlink = securityfs_create_symlink("ima", NULL, "integrity/ima",
NULL);
- if (IS_ERR(ima_symlink))
+ if (IS_ERR(ima_symlink)) {
+ ret = PTR_ERR(ima_symlink);
goto out;
+ }
binary_runtime_measurements =
securityfs_create_file("binary_runtime_measurements",
S_IRUSR | S_IRGRP, ima_dir, NULL,
&ima_measurements_ops);
- if (IS_ERR(binary_runtime_measurements))
+ if (IS_ERR(binary_runtime_measurements)) {
+ ret = PTR_ERR(binary_runtime_measurements);
goto out;
+ }
ascii_runtime_measurements =
securityfs_create_file("ascii_runtime_measurements",
S_IRUSR | S_IRGRP, ima_dir, NULL,
&ima_ascii_measurements_ops);
- if (IS_ERR(ascii_runtime_measurements))
+ if (IS_ERR(ascii_runtime_measurements)) {
+ ret = PTR_ERR(ascii_runtime_measurements);
goto out;
+ }
runtime_measurements_count =
securityfs_create_file("runtime_measurements_count",
S_IRUSR | S_IRGRP, ima_dir, NULL,
&ima_measurements_count_ops);
- if (IS_ERR(runtime_measurements_count))
+ if (IS_ERR(runtime_measurements_count)) {
+ ret = PTR_ERR(runtime_measurements_count);
goto out;
+ }
violations =
securityfs_create_file("violations", S_IRUSR | S_IRGRP,
ima_dir, NULL, &ima_htable_violations_ops);
- if (IS_ERR(violations))
+ if (IS_ERR(violations)) {
+ ret = PTR_ERR(violations);
goto out;
+ }
ima_policy = securityfs_create_file("policy", POLICY_FILE_FLAGS,
ima_dir, NULL,
&ima_measure_policy_ops);
- if (IS_ERR(ima_policy))
+ if (IS_ERR(ima_policy)) {
+ ret = PTR_ERR(ima_policy);
goto out;
+ }
return 0;
out:
+ securityfs_remove(ima_policy);
securityfs_remove(violations);
securityfs_remove(runtime_measurements_count);
securityfs_remove(ascii_runtime_measurements);
securityfs_remove(binary_runtime_measurements);
securityfs_remove(ima_symlink);
securityfs_remove(ima_dir);
- securityfs_remove(ima_policy);
- return -1;
+
+ return ret;
}
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index b26fa67476b4..63979aefc95f 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -47,12 +47,9 @@ static int __init ima_add_boot_aggregate(void)
struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
struct ima_event_data event_data = { .iint = iint,
.filename = boot_aggregate_name };
+ struct ima_max_digest_data hash;
int result = -ENOMEM;
int violation = 0;
- struct {
- struct ima_digest_data hdr;
- char digest[TPM_MAX_DIGEST_SIZE];
- } hash;
memset(iint, 0, sizeof(*iint));
memset(&hash, 0, sizeof(hash));
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 8c6e4514d494..c6412dec3810 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -263,7 +263,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
/* reset appraisal flags if ima_inode_post_setattr was called */
iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED |
IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
- IMA_ACTION_FLAGS);
+ IMA_NONACTION_FLAGS);
/*
* Re-evaulate the file if either the xattr has changed or the
@@ -764,7 +764,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
* call to ima_post_load_data().
*
* Callers of this LSM hook can not measure, appraise, or audit the
- * data provided by userspace. Enforce policy rules requring a file
+ * data provided by userspace. Enforce policy rules requiring a file
* signature (eg. kexec'ed kernel image).
*
* For permission return 0, otherwise return -EACCES.
@@ -874,10 +874,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
.buf = buf,
.buf_len = size};
struct ima_template_desc *template;
- struct {
- struct ima_digest_data hdr;
- char digest[IMA_MAX_DIGEST_SIZE];
- } hash = {};
+ struct ima_max_digest_data hash;
char digest_hash[IMA_MAX_DIGEST_SIZE];
int digest_hash_len = hash_digest_size[ima_hash_algo];
int violation = 0;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 320ca80aacab..eea6e92500b8 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -16,7 +16,6 @@
#include <linux/parser.h>
#include <linux/slab.h>
#include <linux/rculist.h>
-#include <linux/genhd.h>
#include <linux/seq_file.h>
#include <linux/ima.h>
@@ -429,7 +428,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry)
/*
* ima_lsm_copy_rule() shallow copied all references, except for the
* LSM references, from entry to nentry so we only want to free the LSM
- * references and the entry itself. All other memory refrences will now
+ * references and the entry itself. All other memory references will now
* be owned by nentry.
*/
ima_lsm_free_rule(entry);
@@ -712,7 +711,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
func, mask, func_data))
continue;
- action |= entry->flags & IMA_ACTION_FLAGS;
+ action |= entry->flags & IMA_NONACTION_FLAGS;
action |= entry->action & IMA_DO_MASK;
if (entry->action & IMA_APPRAISE) {
@@ -1967,6 +1966,14 @@ int ima_policy_show(struct seq_file *m, void *v)
rcu_read_lock();
+ /* Do not print rules with inactive LSM labels */
+ for (i = 0; i < MAX_LSM_RULES; i++) {
+ if (entry->lsm[i].args_p && !entry->lsm[i].rule) {
+ rcu_read_unlock();
+ return 0;
+ }
+ }
+
if (entry->action & MEASURE)
seq_puts(m, pt(Opt_measure));
if (entry->action & DONT_MEASURE)
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 694560396be0..db1ad6d7a57f 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = {
static LIST_HEAD(defined_templates);
static DEFINE_SPINLOCK(template_list);
+static int template_setup_done;
static const struct ima_template_field supported_fields[] = {
{.field_id = "d", .field_init = ima_eventdigest_init,
@@ -101,10 +102,11 @@ static int __init ima_template_setup(char *str)
struct ima_template_desc *template_desc;
int template_len = strlen(str);
- if (ima_template)
+ if (template_setup_done)
return 1;
- ima_init_template_list();
+ if (!ima_template)
+ ima_init_template_list();
/*
* Verify that a template with the supplied name exists.
@@ -128,6 +130,7 @@ static int __init ima_template_setup(char *str)
}
ima_template = template_desc;
+ template_setup_done = 1;
return 1;
}
__setup("ima_template=", ima_template_setup);
@@ -136,7 +139,7 @@ static int __init ima_template_fmt_setup(char *str)
{
int num_templates = ARRAY_SIZE(builtin_templates);
- if (ima_template)
+ if (template_setup_done)
return 1;
if (template_desc_init_fields(str, NULL, NULL) < 0) {
@@ -147,6 +150,7 @@ static int __init ima_template_fmt_setup(char *str)
builtin_templates[num_templates - 1].fmt = str;
ima_template = builtin_templates + num_templates - 1;
+ template_setup_done = 1;
return 1;
}
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index ca017cae73eb..7155d17a3b75 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -272,7 +272,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
* digest formats:
* - DATA_FMT_DIGEST: digest
* - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
- * where <hash algo> is provided if the hash algoritm is not
+ * where <hash algo> is provided if the hash algorithm is not
* SHA1 or MD5
*/
u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
@@ -307,10 +307,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
int ima_eventdigest_init(struct ima_event_data *event_data,
struct ima_field_data *field_data)
{
- struct {
- struct ima_digest_data hdr;
- char digest[IMA_MAX_DIGEST_SIZE];
- } hash;
+ struct ima_max_digest_data hash;
u8 *cur_digest = NULL;
u32 cur_digestsize = 0;
struct inode *inode;
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 547425c20e11..3510e413ea17 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/integrity.h>
#include <crypto/sha1.h>
+#include <crypto/hash.h>
#include <linux/key.h>
#include <linux/audit.h>
@@ -30,8 +31,8 @@
#define IMA_HASH 0x00000100
#define IMA_HASHED 0x00000200
-/* iint cache flags */
-#define IMA_ACTION_FLAGS 0xff000000
+/* iint policy rule cache flags */
+#define IMA_NONACTION_FLAGS 0xff000000
#define IMA_DIGSIG_REQUIRED 0x01000000
#define IMA_PERMIT_DIRECTIO 0x02000000
#define IMA_NEW_FILE 0x04000000
@@ -111,6 +112,15 @@ struct ima_digest_data {
} __packed;
/*
+ * Instead of wrapping the ima_digest_data struct inside a local structure
+ * with the maximum hash size, define ima_max_digest_data struct.
+ */
+struct ima_max_digest_data {
+ struct ima_digest_data hdr;
+ u8 digest[HASH_MAX_DIGESTSIZE];
+} __packed;
+
+/*
* signature format v2 - for using with asymmetric keys
*/
struct signature_v2_hdr {
@@ -151,7 +161,8 @@ int integrity_kernel_read(struct file *file, loff_t offset,
#define INTEGRITY_KEYRING_EVM 0
#define INTEGRITY_KEYRING_IMA 1
#define INTEGRITY_KEYRING_PLATFORM 2
-#define INTEGRITY_KEYRING_MAX 3
+#define INTEGRITY_KEYRING_MACHINE 3
+#define INTEGRITY_KEYRING_MAX 4
extern struct dentry *integrity_dir;
@@ -283,3 +294,17 @@ static inline void __init add_to_platform_keyring(const char *source,
{
}
#endif
+
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len);
+bool __init trust_moklist(void);
+#else
+static inline void __init add_to_machine_keyring(const char *source,
+ const void *data, size_t len)
+{
+}
+static inline bool __init trust_moklist(void)
+{
+ return false;
+}
+#endif
diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
index 29220056207f..0ec5e4c22cb2 100644
--- a/security/integrity/integrity_audit.c
+++ b/security/integrity/integrity_audit.c
@@ -45,6 +45,8 @@ void integrity_audit_message(int audit_msgno, struct inode *inode,
return;
ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno);
+ if (!ab)
+ return;
audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
task_pid_nr(current),
from_kuid(&init_user_ns, current_uid()),
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 5604bd57c990..1db4d3b4356d 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -9,6 +9,7 @@
#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include "../integrity.h"
+#include "keyring_handler.h"
static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
static efi_guid_t efi_cert_x509_sha256_guid __initdata =
@@ -66,7 +67,7 @@ static __init void uefi_revocation_list_x509(const char *source,
/*
* Return the appropriate handler for particular signature list types found in
- * the UEFI db and MokListRT tables.
+ * the UEFI db tables.
*/
__init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
{
@@ -77,6 +78,21 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
/*
* Return the appropriate handler for particular signature list types found in
+ * the MokListRT tables.
+ */
+__init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
+{
+ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) {
+ if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) && trust_moklist())
+ return add_to_machine_keyring;
+ else
+ return add_to_platform_keyring;
+ }
+ return 0;
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
* the UEFI dbx and MokListXRT tables.
*/
__init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 2462bfa08fe3..284558f30411 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -25,6 +25,11 @@ void blacklist_binary(const char *source, const void *data, size_t len);
efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
/*
+ * Return the handler for particular signature list types found in the mok.
+ */
+efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
+
+/*
* Return the handler for particular signature list types found in the dbx.
*/
efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 08b6d12f99b4..5f45c3c07dbd 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -95,7 +95,7 @@ static int __init load_moklist_certs(void)
rc = parse_efi_signature_list("UEFI:MokListRT (MOKvar table)",
mokvar_entry->data,
mokvar_entry->data_size,
- get_handler_for_db);
+ get_handler_for_mok);
/* All done if that worked. */
if (!rc)
return rc;
@@ -110,7 +110,7 @@ static int __init load_moklist_certs(void)
mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status);
if (mok) {
rc = parse_efi_signature_list("UEFI:MokListRT",
- mok, moksize, get_handler_for_db);
+ mok, moksize, get_handler_for_mok);
kfree(mok);
if (rc)
pr_err("Couldn't parse MokListRT signatures: %d\n", rc);
diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
new file mode 100644
index 000000000000..7aaed7950b6e
--- /dev/null
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Machine keyring routines.
+ *
+ * Copyright (c) 2021, Oracle and/or its affiliates.
+ */
+
+#include <linux/efi.h>
+#include "../integrity.h"
+
+static bool trust_mok;
+
+static __init int machine_keyring_init(void)
+{
+ int rc;
+
+ rc = integrity_init_keyring(INTEGRITY_KEYRING_MACHINE);
+ if (rc)
+ return rc;
+
+ pr_notice("Machine keyring initialized\n");
+ return 0;
+}
+device_initcall(machine_keyring_init);
+
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len)
+{
+ key_perm_t perm;
+ int rc;
+
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+ rc = integrity_load_cert(INTEGRITY_KEYRING_MACHINE, source, data, len, perm);
+
+ /*
+ * Some MOKList keys may not pass the machine keyring restrictions.
+ * If the restriction check does not pass and the platform keyring
+ * is configured, try to add it into that keyring instead.
+ */
+ if (rc && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+ rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source,
+ data, len, perm);
+
+ if (rc)
+ pr_info("Error adding keys to machine keyring %s\n", source);
+}
+
+/*
+ * Try to load the MokListTrustedRT MOK variable to see if we should trust
+ * the MOK keys within the kernel. It is not an error if this variable
+ * does not exist. If it does not exist, MOK keys should not be trusted
+ * within the machine keyring.
+ */
+static __init bool uefi_check_trust_mok_keys(void)
+{
+ struct efi_mokvar_table_entry *mokvar_entry;
+
+ mokvar_entry = efi_mokvar_entry_find("MokListTrustedRT");
+
+ if (mokvar_entry)
+ return true;
+
+ return false;
+}
+
+bool __init trust_moklist(void)
+{
+ static bool initialized;
+
+ if (!initialized) {
+ initialized = true;
+
+ if (uefi_check_trust_mok_keys())
+ trust_mok = true;
+ }
+
+ return trust_mok;
+}
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 969122c7b92f..0e30b361e1c1 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -98,10 +98,21 @@ config ENCRYPTED_KEYS
select CRYPTO_RNG
help
This option provides support for create/encrypting/decrypting keys
- in the kernel. Encrypted keys are kernel generated random numbers,
- which are encrypted/decrypted with a 'master' symmetric key. The
- 'master' key can be either a trusted-key or user-key type.
- Userspace only ever sees/stores encrypted blobs.
+ in the kernel. Encrypted keys are instantiated using kernel
+ generated random numbers or provided decrypted data, and are
+ encrypted/decrypted with a 'master' symmetric key. The 'master'
+ key can be either a trusted-key or user-key type. Only encrypted
+ blobs are ever output to Userspace.
+
+ If you are unsure as to whether this is required, answer N.
+
+config USER_DECRYPTED_DATA
+ bool "Allow encrypted keys with user decrypted data"
+ depends on ENCRYPTED_KEYS
+ help
+ This option provides support for instantiating encrypted keys using
+ user-provided decrypted data. The decrypted data must be hex-ascii
+ encoded.
If you are unsure as to whether this is required, answer N.
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 4573fc15617d..b339760a31dd 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -15,7 +15,7 @@
#include <keys/user-type.h>
#include "internal.h"
-static ssize_t dh_data_from_key(key_serial_t keyid, void **data)
+static ssize_t dh_data_from_key(key_serial_t keyid, const void **data)
{
struct key *key;
key_ref_t key_ref;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 87432b35d771..e05cfc2e49ae 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -78,6 +78,11 @@ static const match_table_t key_tokens = {
{Opt_err, NULL}
};
+static bool user_decrypted_data = IS_ENABLED(CONFIG_USER_DECRYPTED_DATA);
+module_param(user_decrypted_data, bool, 0);
+MODULE_PARM_DESC(user_decrypted_data,
+ "Allow instantiation of encrypted keys using provided decrypted data");
+
static int aes_get_sizes(void)
{
struct crypto_skcipher *tfm;
@@ -158,7 +163,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc)
* datablob_parse - parse the keyctl data
*
* datablob format:
- * new [<format>] <master-key name> <decrypted data length>
+ * new [<format>] <master-key name> <decrypted data length> [<decrypted data>]
* load [<format>] <master-key name> <decrypted data length>
* <encrypted iv + data>
* update <new-master-key name>
@@ -170,7 +175,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc)
*/
static int datablob_parse(char *datablob, const char **format,
char **master_desc, char **decrypted_datalen,
- char **hex_encoded_iv)
+ char **hex_encoded_iv, char **decrypted_data)
{
substring_t args[MAX_OPT_ARGS];
int ret = -EINVAL;
@@ -231,6 +236,7 @@ static int datablob_parse(char *datablob, const char **format,
"when called from .update method\n", keyword);
break;
}
+ *decrypted_data = strsep(&datablob, " \t");
ret = 0;
break;
case Opt_load:
@@ -595,7 +601,8 @@ out:
static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
const char *format,
const char *master_desc,
- const char *datalen)
+ const char *datalen,
+ const char *decrypted_data)
{
struct encrypted_key_payload *epayload = NULL;
unsigned short datablob_len;
@@ -604,6 +611,7 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
unsigned int encrypted_datalen;
unsigned int format_len;
long dlen;
+ int i;
int ret;
ret = kstrtol(datalen, 10, &dlen);
@@ -613,6 +621,24 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
format_len = (!format) ? strlen(key_format_default) : strlen(format);
decrypted_datalen = dlen;
payload_datalen = decrypted_datalen;
+
+ if (decrypted_data) {
+ if (!user_decrypted_data) {
+ pr_err("encrypted key: instantiation of keys using provided decrypted data is disabled since CONFIG_USER_DECRYPTED_DATA is set to false\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (strlen(decrypted_data) != decrypted_datalen) {
+ pr_err("encrypted key: decrypted data provided does not match decrypted data length provided\n");
+ return ERR_PTR(-EINVAL);
+ }
+ for (i = 0; i < strlen(decrypted_data); i++) {
+ if (!isxdigit(decrypted_data[i])) {
+ pr_err("encrypted key: decrypted data provided must contain only hexadecimal characters\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+ }
+
if (format) {
if (!strcmp(format, key_format_ecryptfs)) {
if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
@@ -740,13 +766,14 @@ static void __ekey_init(struct encrypted_key_payload *epayload,
/*
* encrypted_init - initialize an encrypted key
*
- * For a new key, use a random number for both the iv and data
- * itself. For an old key, decrypt the hex encoded data.
+ * For a new key, use either a random number or user-provided decrypted data in
+ * case it is provided. A random number is used for the iv in both cases. For
+ * an old key, decrypt the hex encoded data.
*/
static int encrypted_init(struct encrypted_key_payload *epayload,
const char *key_desc, const char *format,
const char *master_desc, const char *datalen,
- const char *hex_encoded_iv)
+ const char *hex_encoded_iv, const char *decrypted_data)
{
int ret = 0;
@@ -760,21 +787,26 @@ static int encrypted_init(struct encrypted_key_payload *epayload,
}
__ekey_init(epayload, format, master_desc, datalen);
- if (!hex_encoded_iv) {
- get_random_bytes(epayload->iv, ivsize);
-
- get_random_bytes(epayload->decrypted_data,
- epayload->decrypted_datalen);
- } else
+ if (hex_encoded_iv) {
ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv);
+ } else if (decrypted_data) {
+ get_random_bytes(epayload->iv, ivsize);
+ memcpy(epayload->decrypted_data, decrypted_data,
+ epayload->decrypted_datalen);
+ } else {
+ get_random_bytes(epayload->iv, ivsize);
+ get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen);
+ }
return ret;
}
/*
* encrypted_instantiate - instantiate an encrypted key
*
- * Decrypt an existing encrypted datablob or create a new encrypted key
- * based on a kernel random number.
+ * Instantiates the key:
+ * - by decrypting an existing encrypted datablob, or
+ * - by creating a new encrypted key based on a kernel random number, or
+ * - using provided decrypted data.
*
* On success, return 0. Otherwise return errno.
*/
@@ -787,6 +819,7 @@ static int encrypted_instantiate(struct key *key,
char *master_desc = NULL;
char *decrypted_datalen = NULL;
char *hex_encoded_iv = NULL;
+ char *decrypted_data = NULL;
size_t datalen = prep->datalen;
int ret;
@@ -799,18 +832,18 @@ static int encrypted_instantiate(struct key *key,
datablob[datalen] = 0;
memcpy(datablob, prep->data, datalen);
ret = datablob_parse(datablob, &format, &master_desc,
- &decrypted_datalen, &hex_encoded_iv);
+ &decrypted_datalen, &hex_encoded_iv, &decrypted_data);
if (ret < 0)
goto out;
epayload = encrypted_key_alloc(key, format, master_desc,
- decrypted_datalen);
+ decrypted_datalen, decrypted_data);
if (IS_ERR(epayload)) {
ret = PTR_ERR(epayload);
goto out;
}
ret = encrypted_init(epayload, key->description, format, master_desc,
- decrypted_datalen, hex_encoded_iv);
+ decrypted_datalen, hex_encoded_iv, decrypted_data);
if (ret < 0) {
kfree_sensitive(epayload);
goto out;
@@ -860,7 +893,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
buf[datalen] = 0;
memcpy(buf, prep->data, datalen);
- ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
+ ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL, NULL);
if (ret < 0)
goto out;
@@ -869,7 +902,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
goto out;
new_epayload = encrypted_key_alloc(key, epayload->format,
- new_master_desc, epayload->datalen);
+ new_master_desc, epayload->datalen, NULL);
if (IS_ERR(new_epayload)) {
ret = PTR_ERR(new_epayload);
goto out;
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
index 5de0d599a274..97bc27bbf079 100644
--- a/security/keys/keyctl_pkey.c
+++ b/security/keys/keyctl_pkey.c
@@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
switch (op) {
case KEYCTL_PKEY_ENCRYPT:
+ if (uparams.in_len > info.max_dec_size ||
+ uparams.out_len > info.max_enc_size)
+ return -EINVAL;
+ break;
case KEYCTL_PKEY_DECRYPT:
if (uparams.in_len > info.max_enc_size ||
uparams.out_len > info.max_dec_size)
return -EINVAL;
break;
case KEYCTL_PKEY_SIGN:
+ if (uparams.in_len > info.max_data_size ||
+ uparams.out_len > info.max_sig_size)
+ return -EINVAL;
+ break;
case KEYCTL_PKEY_VERIFY:
- if (uparams.in_len > info.max_sig_size ||
- uparams.out_len > info.max_data_size)
+ if (uparams.in_len > info.max_data_size ||
+ uparams.in2_len > info.max_sig_size)
return -EINVAL;
break;
default:
@@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
}
params->in_len = uparams.in_len;
- params->out_len = uparams.out_len;
+ params->out_len = uparams.out_len; /* Note: same as in2_len */
return 0;
}
diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index d5c891d8d353..9b9d3ef79cbe 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0);
MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)");
static const struct trusted_key_source trusted_key_sources[] = {
-#if defined(CONFIG_TCG_TPM)
+#if IS_REACHABLE(CONFIG_TCG_TPM)
{ "tpm", &trusted_key_tpm_ops },
#endif
-#if defined(CONFIG_TEE)
+#if IS_REACHABLE(CONFIG_TEE)
{ "tee", &trusted_key_tee_ops },
#endif
};
@@ -351,7 +351,7 @@ static int __init init_trusted(void)
static void __exit cleanup_trusted(void)
{
- static_call(trusted_key_exit)();
+ static_call_cond(trusted_key_exit)();
}
late_initcall(init_trusted);
diff --git a/security/security.c b/security/security.c
index 3d4eb474f35b..b7cf5cbfdc67 100644
--- a/security/security.c
+++ b/security/security.c
@@ -884,9 +884,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
return call_int_hook(fs_context_dup, 0, fc, src_fc);
}
-int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
+int security_fs_context_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
{
- return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
+ struct security_hook_list *hp;
+ int trc;
+ int rc = -ENOPARAM;
+
+ hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
+ list) {
+ trc = hp->hook.fs_context_parse_param(fc, param);
+ if (trc == 0)
+ rc = 0;
+ else if (trc != -ENOPARAM)
+ return trc;
+ }
+ return rc;
}
int security_sb_alloc(struct super_block *sb)
@@ -1048,8 +1061,19 @@ int security_dentry_init_security(struct dentry *dentry, int mode,
const char **xattr_name, void **ctx,
u32 *ctxlen)
{
- return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode,
- name, xattr_name, ctx, ctxlen);
+ struct security_hook_list *hp;
+ int rc;
+
+ /*
+ * Only one module will provide a security context.
+ */
+ hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) {
+ rc = hp->hook.dentry_init_security(dentry, mode, name,
+ xattr_name, ctx, ctxlen);
+ if (rc != LSM_RET_DEFAULT(dentry_init_security))
+ return rc;
+ }
+ return LSM_RET_DEFAULT(dentry_init_security);
}
EXPORT_SYMBOL(security_dentry_init_security);
@@ -2380,6 +2404,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
}
EXPORT_SYMBOL(security_sctp_sk_clone);
+int security_sctp_assoc_established(struct sctp_association *asoc,
+ struct sk_buff *skb)
+{
+ return call_int_hook(sctp_assoc_established, 0, asoc, skb);
+}
+EXPORT_SYMBOL(security_sctp_assoc_established);
+
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5b6895e4fc29..b12e14b2797b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -211,10 +211,9 @@ static int selinux_lsm_notifier_avc_callback(u32 event)
*/
static void cred_init_security(void)
{
- struct cred *cred = (struct cred *) current->real_cred;
struct task_security_struct *tsec;
- tsec = selinux_cred(cred);
+ tsec = selinux_cred(unrcu_pointer(current->real_cred));
tsec->osid = tsec->sid = SECINITSID_KERNEL;
}
@@ -341,17 +340,15 @@ static void inode_free_security(struct inode *inode)
}
struct selinux_mnt_opts {
- const char *fscontext, *context, *rootcontext, *defcontext;
+ u32 fscontext_sid;
+ u32 context_sid;
+ u32 rootcontext_sid;
+ u32 defcontext_sid;
};
static void selinux_free_mnt_opts(void *mnt_opts)
{
- struct selinux_mnt_opts *opts = mnt_opts;
- kfree(opts->fscontext);
- kfree(opts->context);
- kfree(opts->rootcontext);
- kfree(opts->defcontext);
- kfree(opts);
+ kfree(mnt_opts);
}
enum {
@@ -479,7 +476,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb)
static int sb_check_xattr_support(struct super_block *sb)
{
- struct superblock_security_struct *sbsec = sb->s_security;
+ struct superblock_security_struct *sbsec = selinux_superblock(sb);
struct dentry *root = sb->s_root;
struct inode *root_inode = d_backing_inode(root);
u32 sid;
@@ -598,18 +595,6 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag,
return 0;
}
-static int parse_sid(struct super_block *sb, const char *s, u32 *sid,
- gfp_t gfp)
-{
- int rc = security_context_str_to_sid(&selinux_state, s,
- sid, gfp);
- if (rc)
- pr_warn("SELinux: security_context_str_to_sid"
- "(%s) failed for (dev %s, type %s) errno=%d\n",
- s, sb->s_id, sb->s_type->name, rc);
- return rc;
-}
-
/*
* Allow filesystems with binary mount data to explicitly set mount point
* labeling information.
@@ -672,41 +657,29 @@ static int selinux_set_mnt_opts(struct super_block *sb,
* than once with different security options.
*/
if (opts) {
- if (opts->fscontext) {
- rc = parse_sid(sb, opts->fscontext, &fscontext_sid,
- GFP_KERNEL);
- if (rc)
- goto out;
+ if (opts->fscontext_sid) {
+ fscontext_sid = opts->fscontext_sid;
if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
fscontext_sid))
goto out_double_mount;
sbsec->flags |= FSCONTEXT_MNT;
}
- if (opts->context) {
- rc = parse_sid(sb, opts->context, &context_sid,
- GFP_KERNEL);
- if (rc)
- goto out;
+ if (opts->context_sid) {
+ context_sid = opts->context_sid;
if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
context_sid))
goto out_double_mount;
sbsec->flags |= CONTEXT_MNT;
}
- if (opts->rootcontext) {
- rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid,
- GFP_KERNEL);
- if (rc)
- goto out;
+ if (opts->rootcontext_sid) {
+ rootcontext_sid = opts->rootcontext_sid;
if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
rootcontext_sid))
goto out_double_mount;
sbsec->flags |= ROOTCONTEXT_MNT;
}
- if (opts->defcontext) {
- rc = parse_sid(sb, opts->defcontext, &defcontext_sid,
- GFP_KERNEL);
- if (rc)
- goto out;
+ if (opts->defcontext_sid) {
+ defcontext_sid = opts->defcontext_sid;
if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
defcontext_sid))
goto out_double_mount;
@@ -976,6 +949,8 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
{
struct selinux_mnt_opts *opts = *mnt_opts;
bool is_alloc_opts = false;
+ u32 *dst_sid;
+ int rc;
if (token == Opt_seclabel)
/* eaten and completely ignored */
@@ -983,6 +958,11 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
if (!s)
return -ENOMEM;
+ if (!selinux_initialized(&selinux_state)) {
+ pr_warn("SELinux: Unable to set superblock options before the security server is initialized\n");
+ return -EINVAL;
+ }
+
if (!opts) {
opts = kzalloc(sizeof(*opts), GFP_KERNEL);
if (!opts)
@@ -993,28 +973,34 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
switch (token) {
case Opt_context:
- if (opts->context || opts->defcontext)
+ if (opts->context_sid || opts->defcontext_sid)
goto err;
- opts->context = s;
+ dst_sid = &opts->context_sid;
break;
case Opt_fscontext:
- if (opts->fscontext)
+ if (opts->fscontext_sid)
goto err;
- opts->fscontext = s;
+ dst_sid = &opts->fscontext_sid;
break;
case Opt_rootcontext:
- if (opts->rootcontext)
+ if (opts->rootcontext_sid)
goto err;
- opts->rootcontext = s;
+ dst_sid = &opts->rootcontext_sid;
break;
case Opt_defcontext:
- if (opts->context || opts->defcontext)
+ if (opts->context_sid || opts->defcontext_sid)
goto err;
- opts->defcontext = s;
+ dst_sid = &opts->defcontext_sid;
break;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
}
-
- return 0;
+ rc = security_context_str_to_sid(&selinux_state, s, dst_sid, GFP_KERNEL);
+ if (rc)
+ pr_warn("SELinux: security_context_str_to_sid (%s) failed with errno=%d\n",
+ s, rc);
+ return rc;
err:
if (is_alloc_opts) {
@@ -2535,7 +2521,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
if (rc) {
clear_itimer();
- spin_lock_irq(&current->sighand->siglock);
+ spin_lock_irq(&unrcu_pointer(current->sighand)->siglock);
if (!fatal_signal_pending(current)) {
flush_sigqueue(&current->pending);
flush_sigqueue(&current->signal->shared_pending);
@@ -2543,13 +2529,13 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
sigemptyset(&current->blocked);
recalc_sigpending();
}
- spin_unlock_irq(&current->sighand->siglock);
+ spin_unlock_irq(&unrcu_pointer(current->sighand)->siglock);
}
/* Wake up the parent if it is waiting so that it can recheck
* wait permission to the new task SID. */
read_lock(&tasklist_lock);
- __wake_up_parent(current, current->real_parent);
+ __wake_up_parent(current, unrcu_pointer(current->real_parent));
read_unlock(&tasklist_lock);
}
@@ -2647,9 +2633,7 @@ free_opt:
static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
{
struct selinux_mnt_opts *opts = mnt_opts;
- struct superblock_security_struct *sbsec = sb->s_security;
- u32 sid;
- int rc;
+ struct superblock_security_struct *sbsec = selinux_superblock(sb);
/*
* Superblock not initialized (i.e. no options) - reject if any
@@ -2665,35 +2649,27 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
if (!opts)
return (sbsec->flags & SE_MNTMASK) ? 1 : 0;
- if (opts->fscontext) {
- rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT);
- if (rc)
- return 1;
- if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
+ if (opts->fscontext_sid) {
+ if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
+ opts->fscontext_sid))
return 1;
}
- if (opts->context) {
- rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT);
- if (rc)
- return 1;
- if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
+ if (opts->context_sid) {
+ if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
+ opts->context_sid))
return 1;
}
- if (opts->rootcontext) {
+ if (opts->rootcontext_sid) {
struct inode_security_struct *root_isec;
root_isec = backing_inode_security(sb->s_root);
- rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT);
- if (rc)
- return 1;
- if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
+ if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
+ opts->rootcontext_sid))
return 1;
}
- if (opts->defcontext) {
- rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT);
- if (rc)
- return 1;
- if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
+ if (opts->defcontext_sid) {
+ if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
+ opts->defcontext_sid))
return 1;
}
return 0;
@@ -2703,8 +2679,6 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
{
struct selinux_mnt_opts *opts = mnt_opts;
struct superblock_security_struct *sbsec = selinux_superblock(sb);
- u32 sid;
- int rc;
if (!(sbsec->flags & SE_SBINITIALIZED))
return 0;
@@ -2712,34 +2686,26 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
if (!opts)
return 0;
- if (opts->fscontext) {
- rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL);
- if (rc)
- return rc;
- if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
+ if (opts->fscontext_sid) {
+ if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
+ opts->fscontext_sid))
goto out_bad_option;
}
- if (opts->context) {
- rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL);
- if (rc)
- return rc;
- if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
+ if (opts->context_sid) {
+ if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
+ opts->context_sid))
goto out_bad_option;
}
- if (opts->rootcontext) {
+ if (opts->rootcontext_sid) {
struct inode_security_struct *root_isec;
root_isec = backing_inode_security(sb->s_root);
- rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL);
- if (rc)
- return rc;
- if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
+ if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
+ opts->rootcontext_sid))
goto out_bad_option;
}
- if (opts->defcontext) {
- rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL);
- if (rc)
- return rc;
- if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
+ if (opts->defcontext_sid) {
+ if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
+ opts->defcontext_sid))
goto out_bad_option;
}
return 0;
@@ -2806,38 +2772,12 @@ static int selinux_fs_context_dup(struct fs_context *fc,
struct fs_context *src_fc)
{
const struct selinux_mnt_opts *src = src_fc->security;
- struct selinux_mnt_opts *opts;
if (!src)
return 0;
- fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL);
- if (!fc->security)
- return -ENOMEM;
-
- opts = fc->security;
-
- if (src->fscontext) {
- opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL);
- if (!opts->fscontext)
- return -ENOMEM;
- }
- if (src->context) {
- opts->context = kstrdup(src->context, GFP_KERNEL);
- if (!opts->context)
- return -ENOMEM;
- }
- if (src->rootcontext) {
- opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL);
- if (!opts->rootcontext)
- return -ENOMEM;
- }
- if (src->defcontext) {
- opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL);
- if (!opts->defcontext)
- return -ENOMEM;
- }
- return 0;
+ fc->security = kmemdup(src, sizeof(*src), GFP_KERNEL);
+ return fc->security ? 0 : -ENOMEM;
}
static const struct fs_parameter_spec selinux_fs_parameters[] = {
@@ -2860,10 +2800,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc,
return opt;
rc = selinux_add_opt(opt, param->string, &fc->security);
- if (!rc) {
+ if (!rc)
param->string = NULL;
- rc = 1;
- }
+
return rc;
}
@@ -3345,8 +3284,6 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
isec->sid = newsid;
isec->initialized = LABEL_INITIALIZED;
spin_unlock(&isec->lock);
-
- return;
}
static int selinux_inode_getxattr(struct dentry *dentry, const char *name)
@@ -3745,6 +3682,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
CAP_OPT_NONE, true);
break;
+ case FIOCLEX:
+ case FIONCLEX:
+ if (!selinux_policycap_ioctl_skip_cloexec())
+ error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd);
+ break;
+
/* default case assumes that the command will go
* to the file's ioctl() function.
*/
@@ -5299,37 +5242,38 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
sksec->sclass = isec->sclass;
}
-/* Called whenever SCTP receives an INIT chunk. This happens when an incoming
- * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association
- * already present).
+/*
+ * Determines peer_secid for the asoc and updates socket's peer label
+ * if it's the first association on the socket.
*/
-static int selinux_sctp_assoc_request(struct sctp_association *asoc,
- struct sk_buff *skb)
+static int selinux_sctp_process_new_assoc(struct sctp_association *asoc,
+ struct sk_buff *skb)
{
- struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+ struct sock *sk = asoc->base.sk;
+ u16 family = sk->sk_family;
+ struct sk_security_struct *sksec = sk->sk_security;
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
- u8 peerlbl_active;
- u32 peer_sid = SECINITSID_UNLABELED;
- u32 conn_sid;
- int err = 0;
+ int err;
- if (!selinux_policycap_extsockclass())
- return 0;
+ /* handle mapped IPv4 packets arriving via IPv6 sockets */
+ if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
+ family = PF_INET;
- peerlbl_active = selinux_peerlbl_enabled();
+ if (selinux_peerlbl_enabled()) {
+ asoc->peer_secid = SECSID_NULL;
- if (peerlbl_active) {
/* This will return peer_sid = SECSID_NULL if there are
* no peer labels, see security_net_peersid_resolve().
*/
- err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family,
- &peer_sid);
+ err = selinux_skb_peerlbl_sid(skb, family, &asoc->peer_secid);
if (err)
return err;
- if (peer_sid == SECSID_NULL)
- peer_sid = SECINITSID_UNLABELED;
+ if (asoc->peer_secid == SECSID_NULL)
+ asoc->peer_secid = SECINITSID_UNLABELED;
+ } else {
+ asoc->peer_secid = SECINITSID_UNLABELED;
}
if (sksec->sctp_assoc_state == SCTP_ASSOC_UNSET) {
@@ -5340,8 +5284,8 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
* then it is approved by policy and used as the primary
* peer SID for getpeercon(3).
*/
- sksec->peer_sid = peer_sid;
- } else if (sksec->peer_sid != peer_sid) {
+ sksec->peer_sid = asoc->peer_secid;
+ } else if (sksec->peer_sid != asoc->peer_secid) {
/* Other association peer SIDs are checked to enforce
* consistency among the peer SIDs.
*/
@@ -5349,11 +5293,32 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
ad.u.net = &net;
ad.u.net->sk = asoc->base.sk;
err = avc_has_perm(&selinux_state,
- sksec->peer_sid, peer_sid, sksec->sclass,
- SCTP_SOCKET__ASSOCIATION, &ad);
+ sksec->peer_sid, asoc->peer_secid,
+ sksec->sclass, SCTP_SOCKET__ASSOCIATION,
+ &ad);
if (err)
return err;
}
+ return 0;
+}
+
+/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This
+ * happens on an incoming connect(2), sctp_connectx(3) or
+ * sctp_sendmsg(3) (with no association already present).
+ */
+static int selinux_sctp_assoc_request(struct sctp_association *asoc,
+ struct sk_buff *skb)
+{
+ struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+ u32 conn_sid;
+ int err;
+
+ if (!selinux_policycap_extsockclass())
+ return 0;
+
+ err = selinux_sctp_process_new_assoc(asoc, skb);
+ if (err)
+ return err;
/* Compute the MLS component for the connection and store
* the information in asoc. This will be used by SCTP TCP type
@@ -5361,17 +5326,36 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
* socket to be generated. selinux_sctp_sk_clone() will then
* plug this into the new socket.
*/
- err = selinux_conn_sid(sksec->sid, peer_sid, &conn_sid);
+ err = selinux_conn_sid(sksec->sid, asoc->peer_secid, &conn_sid);
if (err)
return err;
asoc->secid = conn_sid;
- asoc->peer_secid = peer_sid;
/* Set any NetLabel labels including CIPSO/CALIPSO options. */
return selinux_netlbl_sctp_assoc_request(asoc, skb);
}
+/* Called when SCTP receives a COOKIE ACK chunk as the final
+ * response to an association request (initited by us).
+ */
+static int selinux_sctp_assoc_established(struct sctp_association *asoc,
+ struct sk_buff *skb)
+{
+ struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+
+ if (!selinux_policycap_extsockclass())
+ return 0;
+
+ /* Inherit secid from the parent socket - this will be picked up
+ * by selinux_sctp_sk_clone() if the association gets peeled off
+ * into a new socket.
+ */
+ asoc->secid = sksec->sid;
+
+ return selinux_sctp_process_new_assoc(asoc, skb);
+}
+
/* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting
* based on their @optname.
*/
@@ -7192,6 +7176,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request),
LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone),
LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect),
+ LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established),
LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request),
LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone),
LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established),
diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c
index 20b3b2243820..5839ca7bb9c7 100644
--- a/security/selinux/ibpkey.c
+++ b/security/selinux/ibpkey.c
@@ -104,7 +104,7 @@ static void sel_ib_pkey_insert(struct sel_ib_pkey *pkey)
tail = list_entry(
rcu_dereference_protected(
- sel_ib_pkey_hash[idx].list.prev,
+ list_tail_rcu(&sel_ib_pkey_hash[idx].list),
lockdep_is_held(&sel_ib_pkey_lock)),
struct sel_ib_pkey, list);
list_del_rcu(&tail->list);
diff --git a/security/selinux/ima.c b/security/selinux/ima.c
index 727c4e43219d..a915b89d55b0 100644
--- a/security/selinux/ima.c
+++ b/security/selinux/ima.c
@@ -29,7 +29,7 @@ static char *selinux_ima_collect_state(struct selinux_state *state)
buf_len = strlen("initialized=0;enforcing=0;checkreqprot=0;") + 1;
len = strlen(on);
- for (i = 0; i < __POLICYDB_CAPABILITY_MAX; i++)
+ for (i = 0; i < __POLICYDB_CAP_MAX; i++)
buf_len += strlen(selinux_policycap_names[i]) + len;
buf = kzalloc(buf_len, GFP_KERNEL);
@@ -54,7 +54,7 @@ static char *selinux_ima_collect_state(struct selinux_state *state)
rc = strlcat(buf, checkreqprot_get(state) ? on : off, buf_len);
WARN_ON(rc >= buf_len);
- for (i = 0; i < __POLICYDB_CAPABILITY_MAX; i++) {
+ for (i = 0; i < __POLICYDB_CAP_MAX; i++) {
rc = strlcat(buf, selinux_policycap_names[i], buf_len);
WARN_ON(rc >= buf_len);
@@ -77,7 +77,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
size_t policy_len;
int rc = 0;
- WARN_ON(!mutex_is_locked(&state->policy_mutex));
+ lockdep_assert_held(&state->policy_mutex);
state_str = selinux_ima_collect_state(state);
if (!state_str) {
@@ -117,7 +117,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
*/
void selinux_ima_measure_state(struct selinux_state *state)
{
- WARN_ON(mutex_is_locked(&state->policy_mutex));
+ lockdep_assert_not_held(&state->policy_mutex);
mutex_lock(&state->policy_mutex);
selinux_ima_measure_state_locked(state);
diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h
index 2ec038efbb03..2680aa21205c 100644
--- a/security/selinux/include/policycap.h
+++ b/security/selinux/include/policycap.h
@@ -4,17 +4,18 @@
/* Policy capabilities */
enum {
- POLICYDB_CAPABILITY_NETPEER,
- POLICYDB_CAPABILITY_OPENPERM,
- POLICYDB_CAPABILITY_EXTSOCKCLASS,
- POLICYDB_CAPABILITY_ALWAYSNETWORK,
- POLICYDB_CAPABILITY_CGROUPSECLABEL,
- POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION,
- POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS,
- __POLICYDB_CAPABILITY_MAX
+ POLICYDB_CAP_NETPEER,
+ POLICYDB_CAP_OPENPERM,
+ POLICYDB_CAP_EXTSOCKCLASS,
+ POLICYDB_CAP_ALWAYSNETWORK,
+ POLICYDB_CAP_CGROUPSECLABEL,
+ POLICYDB_CAP_NNP_NOSUID_TRANSITION,
+ POLICYDB_CAP_GENFS_SECLABEL_SYMLINKS,
+ POLICYDB_CAP_IOCTL_SKIP_CLOEXEC,
+ __POLICYDB_CAP_MAX
};
-#define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
+#define POLICYDB_CAP_MAX (__POLICYDB_CAP_MAX - 1)
-extern const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX];
+extern const char *selinux_policycap_names[__POLICYDB_CAP_MAX];
#endif /* _SELINUX_POLICYCAP_H_ */
diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h
index b89289f092c9..100da7d043db 100644
--- a/security/selinux/include/policycap_names.h
+++ b/security/selinux/include/policycap_names.h
@@ -5,14 +5,15 @@
#include "policycap.h"
/* Policy capability names */
-const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = {
+const char *selinux_policycap_names[__POLICYDB_CAP_MAX] = {
"network_peer_controls",
"open_perms",
"extended_socket_class",
"always_check_network",
"cgroup_seclabel",
"nnp_nosuid_transition",
- "genfs_seclabel_symlinks"
+ "genfs_seclabel_symlinks",
+ "ioctl_skip_cloexec"
};
#endif /* _SELINUX_POLICYCAP_NAMES_H_ */
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index ac0ece01305a..ace4bd13e808 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -96,7 +96,7 @@ struct selinux_state {
#endif
bool checkreqprot;
bool initialized;
- bool policycap[__POLICYDB_CAPABILITY_MAX];
+ bool policycap[__POLICYDB_CAP_MAX];
struct page *status_page;
struct mutex status_lock;
@@ -174,49 +174,56 @@ static inline bool selinux_policycap_netpeer(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NETPEER]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_NETPEER]);
}
static inline bool selinux_policycap_openperm(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_OPENPERM]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_OPENPERM]);
}
static inline bool selinux_policycap_extsockclass(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_EXTSOCKCLASS]);
}
static inline bool selinux_policycap_alwaysnetwork(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_ALWAYSNETWORK]);
}
static inline bool selinux_policycap_cgroupseclabel(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_CGROUPSECLABEL]);
}
static inline bool selinux_policycap_nnp_nosuid_transition(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_NNP_NOSUID_TRANSITION]);
}
static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
{
struct selinux_state *state = &selinux_state;
- return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
+ return READ_ONCE(state->policycap[POLICYDB_CAP_GENFS_SECLABEL_SYMLINKS]);
+}
+
+static inline bool selinux_policycap_ioctl_skip_cloexec(void)
+{
+ struct selinux_state *state = &selinux_state;
+
+ return READ_ONCE(state->policycap[POLICYDB_CAP_IOCTL_SKIP_CLOEXEC]);
}
struct selinux_policy_convert_data;
@@ -254,8 +261,8 @@ struct av_decision {
#define XPERMS_AUDITALLOW 2
#define XPERMS_DONTAUDIT 4
-#define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f))
-#define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f)))
+#define security_xperm_set(perms, x) ((perms)[(x) >> 5] |= 1 << ((x) & 0x1f))
+#define security_xperm_test(perms, x) (1 & ((perms)[(x) >> 5] >> ((x) & 0x1f)))
struct extended_perms_data {
u32 p[8];
};
@@ -386,11 +393,11 @@ int security_get_allow_unknown(struct selinux_state *state);
int security_fs_use(struct selinux_state *state, struct super_block *sb);
int security_genfs_sid(struct selinux_state *state,
- const char *fstype, char *name, u16 sclass,
+ const char *fstype, const char *path, u16 sclass,
u32 *sid);
int selinux_policy_genfs_sid(struct selinux_policy *policy,
- const char *fstype, char *name, u16 sclass,
+ const char *fstype, const char *path, u16 sclass,
u32 *sid);
#ifdef CONFIG_NETLABEL
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 4a7d2ab5b960..0ac7df9a9367 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -107,7 +107,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family)
switch (family) {
case PF_INET:
- idx = sel_netnode_hashfn_ipv4(*(__be32 *)addr);
+ idx = sel_netnode_hashfn_ipv4(*(const __be32 *)addr);
break;
case PF_INET6:
idx = sel_netnode_hashfn_ipv6(addr);
@@ -121,7 +121,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family)
if (node->nsec.family == family)
switch (family) {
case PF_INET:
- if (node->nsec.addr.ipv4 == *(__be32 *)addr)
+ if (node->nsec.addr.ipv4 == *(const __be32 *)addr)
return node;
break;
case PF_INET6:
@@ -164,8 +164,9 @@ static void sel_netnode_insert(struct sel_netnode *node)
if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
struct sel_netnode *tail;
tail = list_entry(
- rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
- lockdep_is_held(&sel_netnode_lock)),
+ rcu_dereference_protected(
+ list_tail_rcu(&sel_netnode_hash[idx].list),
+ lockdep_is_held(&sel_netnode_lock)),
struct sel_netnode, list);
list_del_rcu(&tail->list);
kfree_rcu(tail, rcu);
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index 9ba09d11c0f5..8eec6347cf01 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -113,7 +113,7 @@ static void sel_netport_insert(struct sel_netport *port)
struct sel_netport *tail;
tail = list_entry(
rcu_dereference_protected(
- sel_netport_hash[idx].list.prev,
+ list_tail_rcu(&sel_netport_hash[idx].list),
lockdep_is_held(&sel_netport_lock)),
struct sel_netport, list);
list_del_rcu(&tail->list);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index e4cd7cb856f3..097c6d866ec4 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1983,7 +1983,7 @@ static int sel_make_policycap(struct selinux_fs_info *fsi)
struct dentry *dentry = NULL;
struct inode *inode = NULL;
- for (iter = 0; iter <= POLICYDB_CAPABILITY_MAX; iter++) {
+ for (iter = 0; iter <= POLICYDB_CAP_MAX; iter++) {
if (iter < ARRAY_SIZE(selinux_policycap_names))
dentry = d_alloc_name(fsi->policycap_dir,
selinux_policycap_names[iter]);
@@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc)
}
ret = sel_make_avc_files(dentry);
+ if (ret)
+ goto err;
dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino);
if (IS_ERR(dentry)) {
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index c97695ae508f..cfdae20792e1 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -67,7 +67,7 @@ static inline int avtab_hash(const struct avtab_key *keyp, u32 mask)
static struct avtab_node*
avtab_insert_node(struct avtab *h, int hvalue,
- struct avtab_node *prev, struct avtab_node *cur,
+ struct avtab_node *prev,
const struct avtab_key *key, const struct avtab_datum *datum)
{
struct avtab_node *newnode;
@@ -137,7 +137,7 @@ static int avtab_insert(struct avtab *h, const struct avtab_key *key,
break;
}
- newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum);
+ newnode = avtab_insert_node(h, hvalue, prev, key, datum);
if (!newnode)
return -ENOMEM;
@@ -177,7 +177,7 @@ struct avtab_node *avtab_insert_nonunique(struct avtab *h,
key->target_class < cur->key.target_class)
break;
}
- return avtab_insert_node(h, hvalue, prev, cur, key, datum);
+ return avtab_insert_node(h, hvalue, prev, key, datum);
}
struct avtab_datum *avtab_search(struct avtab *h, const struct avtab_key *key)
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 2ec6e5cd25d9..e11219fdf9f7 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -152,6 +152,8 @@ static void cond_list_destroy(struct policydb *p)
for (i = 0; i < p->cond_list_len; i++)
cond_node_destroy(&p->cond_list[i]);
kfree(p->cond_list);
+ p->cond_list = NULL;
+ p->cond_list_len = 0;
}
void cond_policydb_destroy(struct policydb *p)
@@ -441,7 +443,6 @@ int cond_read_list(struct policydb *p, void *fp)
return 0;
err:
cond_list_destroy(p);
- p->cond_list = NULL;
return rc;
}
@@ -566,8 +567,6 @@ void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
if (node->key.specified & AVTAB_ENABLED)
services_compute_xperms_decision(xpermd, node);
}
- return;
-
}
/* Determine whether additional permissions are granted by the conditional
* av table, and if so, add them to the result
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 61fcbb8d0f88..abde349c8321 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -359,7 +359,6 @@ void ebitmap_destroy(struct ebitmap *e)
e->highbit = 0;
e->node = NULL;
- return;
}
int ebitmap_read(struct ebitmap *e, void *fp)
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 9eb2d0af2805..58eb822f11ee 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -118,9 +118,9 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
}
#define ebitmap_for_each_positive_bit(e, n, bit) \
- for (bit = ebitmap_start_positive(e, &n); \
- bit < ebitmap_length(e); \
- bit = ebitmap_next_positive(e, &n, bit)) \
+ for ((bit) = ebitmap_start_positive(e, &(n)); \
+ (bit) < ebitmap_length(e); \
+ (bit) = ebitmap_next_positive(e, &(n), bit)) \
int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 3f5fd124342c..99571b19d4a9 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -156,7 +156,6 @@ void mls_sid_to_context(struct policydb *p,
}
*scontext = scontextp;
- return;
}
int mls_level_isvalid(struct policydb *p, struct mls_level *l)
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 0ae1b718194a..d036e1238e77 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -41,8 +41,6 @@
#include "mls.h"
#include "services.h"
-#define _DEBUG_HASHES
-
#ifdef DEBUG_HASHES
static const char *symtab_name[SYM_NUM] = {
"common prefixes",
@@ -704,7 +702,7 @@ static void symtab_hash_eval(struct symtab *s)
}
#else
-static inline void hash_eval(struct hashtab *h, char *hash_name)
+static inline void hash_eval(struct hashtab *h, const char *hash_name)
{
}
#endif
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8e92af7dd284..6901dc07680d 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -529,8 +529,6 @@ out:
/* release scontext/tcontext */
kfree(tcontext_name);
kfree(scontext_name);
-
- return;
}
/*
@@ -1452,7 +1450,7 @@ static int string_to_context_struct(struct policydb *pol,
/* Parse the security context. */
rc = -EINVAL;
- scontextp = (char *) scontext;
+ scontextp = scontext;
/* Extract the user. */
p = scontextp;
@@ -2875,7 +2873,7 @@ out_unlock:
*/
static inline int __security_genfs_sid(struct selinux_policy *policy,
const char *fstype,
- char *path,
+ const char *path,
u16 orig_sclass,
u32 *sid)
{
@@ -2928,7 +2926,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy,
*/
int security_genfs_sid(struct selinux_state *state,
const char *fstype,
- char *path,
+ const char *path,
u16 orig_sclass,
u32 *sid)
{
@@ -2952,7 +2950,7 @@ int security_genfs_sid(struct selinux_state *state,
int selinux_policy_genfs_sid(struct selinux_policy *policy,
const char *fstype,
- char *path,
+ const char *path,
u16 orig_sclass,
u32 *sid)
{
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index 293ec048af08..a54b8652bfb5 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -27,8 +27,8 @@ struct sidtab_str_cache {
char str[];
};
-#define index_to_sid(index) (index + SECINITSID_NUM + 1)
-#define sid_to_index(sid) (sid - (SECINITSID_NUM + 1))
+#define index_to_sid(index) ((index) + SECINITSID_NUM + 1)
+#define sid_to_index(sid) ((sid) - (SECINITSID_NUM + 1))
int sidtab_init(struct sidtab *s)
{
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 90697317895f..c576832febc6 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
int rc;
struct xfrm_sec_ctx *ctx;
char *ctx_str = NULL;
- int str_len;
+ u32 str_len;
if (!polsec)
return 0;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 14b279cc75c9..6207762dbdb1 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2510,7 +2510,7 @@ static int smk_ipv6_check(struct smack_known *subject,
#ifdef CONFIG_AUDIT
smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
ad.a.u.net->family = PF_INET6;
- ad.a.u.net->dport = ntohs(address->sin6_port);
+ ad.a.u.net->dport = address->sin6_port;
if (act == SMK_RECEIVING)
ad.a.u.net->v6info.saddr = address->sin6_addr;
else
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index d1fcd1d5adae..6fd763d4d15b 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -511,7 +511,8 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
DEFAULT_GFP, 0);
if (!sgt)
return NULL;
- dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir);
+ dmab->dev.need_sync = dma_need_sync(dmab->dev.dev,
+ sg_dma_address(sgt->sgl));
p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt);
if (p)
dmab->private_data = sgt;
@@ -540,9 +541,9 @@ static void snd_dma_noncontig_sync(struct snd_dma_buffer *dmab,
if (mode == SNDRV_DMA_SYNC_CPU) {
if (dmab->dev.dir == DMA_TO_DEVICE)
return;
+ invalidate_kernel_vmap_range(dmab->area, dmab->bytes);
dma_sync_sgtable_for_cpu(dmab->dev.dev, dmab->private_data,
dmab->dev.dir);
- invalidate_kernel_vmap_range(dmab->area, dmab->bytes);
} else {
if (dmab->dev.dir == DMA_FROM_DEVICE)
return;
@@ -671,9 +672,13 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
*/
static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size)
{
- dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir);
- return dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr,
- dmab->dev.dir, DEFAULT_GFP);
+ void *p;
+
+ p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr,
+ dmab->dev.dir, DEFAULT_GFP);
+ if (p)
+ dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr);
+ return p;
}
static void snd_dma_noncoherent_free(struct snd_dma_buffer *dmab)
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 621883e71194..a056b3ef3c84 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -172,6 +172,19 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
}
EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);
+unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream)
+{
+ unsigned long flags = 0;
+ if (substream->pcm->nonatomic)
+ mutex_lock_nested(&substream->self_group.mutex,
+ SINGLE_DEPTH_NESTING);
+ else
+ spin_lock_irqsave_nested(&substream->self_group.lock, flags,
+ SINGLE_DEPTH_NESTING);
+ return flags;
+}
+EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested);
+
/**
* snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream
* @substream: PCM substream
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
index b7758dbe2371..5cb92f7ccbca 100644
--- a/sound/hda/intel-sdw-acpi.c
+++ b/sound/hda/intel-sdw-acpi.c
@@ -50,11 +50,11 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
static int
sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
{
- struct acpi_device *adev;
+ struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle);
int ret, i;
u8 count;
- if (acpi_bus_get_device(info->handle, &adev))
+ if (!adev)
return -EINVAL;
/* Found controller, find links supported */
@@ -119,7 +119,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
void *cdata, void **return_value)
{
struct sdw_intel_acpi_info *info = cdata;
- struct acpi_device *adev;
acpi_status status;
u64 adr;
@@ -127,7 +126,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
if (ACPI_FAILURE(status))
return AE_OK; /* keep going */
- if (acpi_bus_get_device(handle, &adev)) {
+ if (!acpi_fetch_acpi_dev(handle)) {
pr_err("%s: Couldn't find ACPI handle\n", __func__);
return AE_NOT_FOUND;
}
diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c
index 9f8123893cc8..50eb6c0e6658 100644
--- a/sound/pci/hda/cs35l41_hda_spi.c
+++ b/sound/pci/hda/cs35l41_hda_spi.c
@@ -28,11 +28,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi)
devm_regmap_init_spi(spi, &cs35l41_regmap_spi));
}
-static int cs35l41_hda_spi_remove(struct spi_device *spi)
+static void cs35l41_hda_spi_remove(struct spi_device *spi)
{
cs35l41_hda_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id cs35l41_hda_spi_id[] = {
diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
index 82c492b05667..cd1db943b7e0 100644
--- a/sound/pci/hda/hda_auto_parser.c
+++ b/sound/pci/hda/hda_auto_parser.c
@@ -981,7 +981,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec,
int id = HDA_FIXUP_ID_NOT_SET;
const char *name = NULL;
const char *type = NULL;
- int vendor, device;
+ unsigned int vendor, device;
if (codec->fixup_id != HDA_FIXUP_ID_NOT_SET)
return;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 7016b48227bf..f552785d301e 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3000,6 +3000,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec)
{
struct hda_pcm *cpcm;
+ /* Skip the shutdown if codec is not registered */
+ if (!codec->registered)
+ return;
+
list_for_each_entry(cpcm, &codec->pcm_list_head, list)
snd_pcm_suspend_all(cpcm->pcm);
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 3bf5e3410703..fc114e522480 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -91,6 +91,12 @@ static void snd_hda_gen_spec_free(struct hda_gen_spec *spec)
free_kctls(spec);
snd_array_free(&spec->paths);
snd_array_free(&spec->loopback_list);
+#ifdef CONFIG_SND_HDA_GENERIC_LEDS
+ if (spec->led_cdevs[LED_AUDIO_MUTE])
+ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]);
+ if (spec->led_cdevs[LED_AUDIO_MICMUTE])
+ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]);
+#endif
}
/*
@@ -3922,7 +3928,10 @@ static int create_mute_led_cdev(struct hda_codec *codec,
enum led_brightness),
bool micmute)
{
+ struct hda_gen_spec *spec = codec->spec;
struct led_classdev *cdev;
+ int idx = micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE;
+ int err;
cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL);
if (!cdev)
@@ -3932,10 +3941,14 @@ static int create_mute_led_cdev(struct hda_codec *codec,
cdev->max_brightness = 1;
cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute";
cdev->brightness_set_blocking = callback;
- cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE);
+ cdev->brightness = ledtrig_audio_get(idx);
cdev->flags = LED_CORE_SUSPENDRESUME;
- return devm_led_classdev_register(&codec->core.dev, cdev);
+ err = led_classdev_register(&codec->core.dev, cdev);
+ if (err < 0)
+ return err;
+ spec->led_cdevs[idx] = cdev;
+ return 0;
}
/**
diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
index 8e1bc8ea74fc..34eba40cc6e6 100644
--- a/sound/pci/hda/hda_generic.h
+++ b/sound/pci/hda/hda_generic.h
@@ -294,6 +294,9 @@ struct hda_gen_spec {
struct hda_jack_callback *cb);
void (*mic_autoswitch_hook)(struct hda_codec *codec,
struct hda_jack_callback *cb);
+
+ /* leds */
+ struct led_classdev *led_cdevs[NUM_AUDIO_LEDS];
};
/* values for add_stereo_mix_input flag */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4b0338c4c543..572ff0d1fafe 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1615,6 +1615,7 @@ static const struct snd_pci_quirk probe_mask_list[] = {
/* forced codec slots */
SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103),
SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103),
+ SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105),
/* WinFast VP200 H (Teradici) user reported broken communication */
SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101),
{}
@@ -1798,8 +1799,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
assign_position_fix(chip, check_position_fix(chip, position_fix[dev]));
- check_probe_mask(chip, dev);
-
if (single_cmd < 0) /* allow fallback to single_cmd at errors */
chip->fallback_to_single_cmd = 1;
else /* explicitly set to single_cmd or not */
@@ -1825,6 +1824,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
chip->bus.core.needs_damn_long_delay = 1;
}
+ check_probe_mask(chip, dev);
+
err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
if (err < 0) {
dev_err(card->dev, "Error creating device [card]!\n");
@@ -1940,6 +1941,7 @@ static int azx_first_init(struct azx *chip)
dma_bits = 32;
if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(dma_bits)))
dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32));
+ dma_set_max_seg_size(&pci->dev, UINT_MAX);
/* read number of streams from GCAP register instead of using
* hardcoded value
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 668274e52674..3a42457984e9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -98,6 +98,7 @@ struct alc_spec {
unsigned int gpio_mic_led_mask;
struct alc_coef_led mute_led_coef;
struct alc_coef_led mic_led_coef;
+ struct mutex coef_mutex;
hda_nid_t headset_mic_pin;
hda_nid_t headphone_mic_pin;
@@ -137,8 +138,24 @@ struct alc_spec {
* COEF access helper functions
*/
-static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
- unsigned int coef_idx)
+static void coef_mutex_lock(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+
+ snd_hda_power_up_pm(codec);
+ mutex_lock(&spec->coef_mutex);
+}
+
+static void coef_mutex_unlock(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+
+ mutex_unlock(&spec->coef_mutex);
+ snd_hda_power_down_pm(codec);
+}
+
+static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx)
{
unsigned int val;
@@ -147,28 +164,56 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
return val;
}
+static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx)
+{
+ unsigned int val;
+
+ coef_mutex_lock(codec);
+ val = __alc_read_coefex_idx(codec, nid, coef_idx);
+ coef_mutex_unlock(codec);
+ return val;
+}
+
#define alc_read_coef_idx(codec, coef_idx) \
alc_read_coefex_idx(codec, 0x20, coef_idx)
-static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
- unsigned int coef_idx, unsigned int coef_val)
+static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int coef_val)
{
snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx);
snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val);
}
+static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int coef_val)
+{
+ coef_mutex_lock(codec);
+ __alc_write_coefex_idx(codec, nid, coef_idx, coef_val);
+ coef_mutex_unlock(codec);
+}
+
#define alc_write_coef_idx(codec, coef_idx, coef_val) \
alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val)
+static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int mask,
+ unsigned int bits_set)
+{
+ unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx);
+
+ if (val != -1)
+ __alc_write_coefex_idx(codec, nid, coef_idx,
+ (val & ~mask) | bits_set);
+}
+
static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
unsigned int coef_idx, unsigned int mask,
unsigned int bits_set)
{
- unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx);
-
- if (val != -1)
- alc_write_coefex_idx(codec, nid, coef_idx,
- (val & ~mask) | bits_set);
+ coef_mutex_lock(codec);
+ __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set);
+ coef_mutex_unlock(codec);
}
#define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \
@@ -201,13 +246,15 @@ struct coef_fw {
static void alc_process_coef_fw(struct hda_codec *codec,
const struct coef_fw *fw)
{
+ coef_mutex_lock(codec);
for (; fw->nid; fw++) {
if (fw->mask == (unsigned short)-1)
- alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
+ __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
else
- alc_update_coefex_idx(codec, fw->nid, fw->idx,
- fw->mask, fw->val);
+ __alc_update_coefex_idx(codec, fw->nid, fw->idx,
+ fw->mask, fw->val);
}
+ coef_mutex_unlock(codec);
}
/*
@@ -1153,6 +1200,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid)
codec->spdif_status_reset = 1;
codec->forced_resume = 1;
codec->patch_ops = alc_patch_ops;
+ mutex_init(&spec->coef_mutex);
err = alc_codec_rename_from_preset(codec);
if (err < 0) {
@@ -2125,6 +2173,7 @@ static void alc1220_fixup_gb_x570(struct hda_codec *codec,
{
static const hda_nid_t conn1[] = { 0x0c };
static const struct coef_fw gb_x570_coefs[] = {
+ WRITE_COEF(0x07, 0x03c0),
WRITE_COEF(0x1a, 0x01c1),
WRITE_COEF(0x1b, 0x0202),
WRITE_COEF(0x43, 0x3005),
@@ -2551,7 +2600,8 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570),
- SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570),
+ SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570),
SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950),
@@ -2626,6 +2676,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
{.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
{.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"},
{.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
+ {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"},
{.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"},
{}
};
@@ -8969,6 +9020,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
@@ -9127,6 +9179,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6),
SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c
index c9caade5cb74..cd05ee2802c9 100644
--- a/sound/soc/amd/acp/acp-mach-common.c
+++ b/sound/soc/amd/acp/acp-mach-common.c
@@ -303,11 +303,11 @@ static const struct snd_soc_dapm_route rt1019_map_lr[] = {
static struct snd_soc_codec_conf rt1019_conf[] = {
{
- .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"),
+ .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"),
.name_prefix = "Left",
},
{
- .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"),
+ .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"),
.name_prefix = "Right",
},
};
diff --git a/sound/soc/amd/acp/acp-mach.h b/sound/soc/amd/acp/acp-mach.h
index fd6299844ebe..c855f50d6b34 100644
--- a/sound/soc/amd/acp/acp-mach.h
+++ b/sound/soc/amd/acp/acp-mach.h
@@ -21,7 +21,6 @@
#include <linux/gpio/consumer.h>
#define EN_SPKR_GPIO_GB 0x11F
-#define EN_SPKR_GPIO_NK 0x146
#define EN_SPKR_GPIO_NONE -EINVAL
enum be_id {
diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c
index 07de46142655..4cc431e54fe1 100644
--- a/sound/soc/amd/acp/acp-sof-mach.c
+++ b/sound/soc/amd/acp/acp-sof-mach.c
@@ -37,7 +37,7 @@ static struct acp_card_drvdata sof_rt5682_max_data = {
.hs_codec_id = RT5682,
.amp_codec_id = MAX98360A,
.dmic_codec_id = DMIC,
- .gpio_spkr_en = EN_SPKR_GPIO_NK,
+ .gpio_spkr_en = EN_SPKR_GPIO_NONE,
};
static struct acp_card_drvdata sof_rt5682s_max_data = {
@@ -47,7 +47,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = {
.hs_codec_id = RT5682S,
.amp_codec_id = MAX98360A,
.dmic_codec_id = DMIC,
- .gpio_spkr_en = EN_SPKR_GPIO_NK,
+ .gpio_spkr_en = EN_SPKR_GPIO_NONE,
};
static const struct snd_kcontrol_new acp_controls[] = {
diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c
index 655689c9778a..7c9242c2ff94 100644
--- a/sound/soc/codecs/adau1761-spi.c
+++ b/sound/soc/codecs/adau1761-spi.c
@@ -45,10 +45,9 @@ static int adau1761_spi_probe(struct spi_device *spi)
id->driver_data, adau1761_spi_switch_mode);
}
-static int adau1761_spi_remove(struct spi_device *spi)
+static void adau1761_spi_remove(struct spi_device *spi)
{
adau17x1_remove(&spi->dev);
- return 0;
}
static const struct spi_device_id adau1761_spi_id[] = {
diff --git a/sound/soc/codecs/adau1781-spi.c b/sound/soc/codecs/adau1781-spi.c
index bb5613574786..1a09633d5a88 100644
--- a/sound/soc/codecs/adau1781-spi.c
+++ b/sound/soc/codecs/adau1781-spi.c
@@ -45,10 +45,9 @@ static int adau1781_spi_probe(struct spi_device *spi)
id->driver_data, adau1781_spi_switch_mode);
}
-static int adau1781_spi_remove(struct spi_device *spi)
+static void adau1781_spi_remove(struct spi_device *spi)
{
adau17x1_remove(&spi->dev);
- return 0;
}
static const struct spi_device_id adau1781_spi_id[] = {
diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c
index 598e09024e23..ffdf8b615efa 100644
--- a/sound/soc/codecs/cpcap.c
+++ b/sound/soc/codecs/cpcap.c
@@ -1667,6 +1667,8 @@ static int cpcap_codec_probe(struct platform_device *pdev)
{
struct device_node *codec_node =
of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec");
+ if (!codec_node)
+ return -ENODEV;
pdev->dev.of_node = codec_node;
diff --git a/sound/soc/codecs/cs35l41-spi.c b/sound/soc/codecs/cs35l41-spi.c
index 6dfd5459aa20..169221a5b09f 100644
--- a/sound/soc/codecs/cs35l41-spi.c
+++ b/sound/soc/codecs/cs35l41-spi.c
@@ -55,13 +55,11 @@ static int cs35l41_spi_probe(struct spi_device *spi)
return cs35l41_probe(cs35l41, pdata);
}
-static int cs35l41_spi_remove(struct spi_device *spi)
+static void cs35l41_spi_remove(struct spi_device *spi)
{
struct cs35l41_private *cs35l41 = spi_get_drvdata(spi);
cs35l41_remove(cs35l41);
-
- return 0;
}
#ifdef CONFIG_OF
diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c
index 4aaee1873a11..4415fb364d4d 100644
--- a/sound/soc/codecs/cs4265.c
+++ b/sound/soc/codecs/cs4265.c
@@ -150,7 +150,6 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = {
SOC_SINGLE("E to F Buffer Disable Switch", CS4265_SPDIF_CTL1,
6, 1, 0),
SOC_ENUM("C Data Access", cam_mode_enum),
- SOC_SINGLE("SPDIF Switch", CS4265_SPDIF_CTL2, 5, 1, 1),
SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2,
3, 1, 0),
SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum),
@@ -186,7 +185,7 @@ static const struct snd_soc_dapm_widget cs4265_dapm_widgets[] = {
SND_SOC_DAPM_SWITCH("Loopback", SND_SOC_NOPM, 0, 0,
&loopback_ctl),
- SND_SOC_DAPM_SWITCH("SPDIF", SND_SOC_NOPM, 0, 0,
+ SND_SOC_DAPM_SWITCH("SPDIF", CS4265_SPDIF_CTL2, 5, 1,
&spdif_switch),
SND_SOC_DAPM_SWITCH("DAC", CS4265_PWRCTL, 1, 1,
&dac_switch),
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index b61f980cabdc..b07607a9ecea 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -277,7 +277,7 @@ struct hdmi_codec_priv {
bool busy;
struct snd_soc_jack *jack;
unsigned int jack_status;
- u8 iec_status[5];
+ u8 iec_status[AES_IEC958_STATUS_SIZE];
};
static const struct snd_soc_dapm_widget hdmi_widgets[] = {
diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index aec5127260fd..6ffe88345de5 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -2688,8 +2688,8 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
int reg, b2_reg;
/* Address does not automatically update if reading */
- reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
- b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
+ reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
+ b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
snd_soc_component_write(component, reg,
((band_idx * BAND_MAX + coeff_idx) *
@@ -2718,7 +2718,7 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
static void set_iir_band_coeff(struct snd_soc_component *component,
int iir_idx, int band_idx, uint32_t value)
{
- int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
+ int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
snd_soc_component_write(component, reg, (value & 0xFF));
snd_soc_component_write(component, reg, (value >> 8) & 0xFF);
@@ -2739,7 +2739,7 @@ static int rx_macro_put_iir_band_audio_mixer(
int iir_idx = ctl->iir_idx;
int band_idx = ctl->band_idx;
u32 coeff[BAND_MAX];
- int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
+ int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
memcpy(&coeff[0], ucontrol->value.bytes.data, params->max);
diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c
index d75fd61b9032..bc57d7687f16 100644
--- a/sound/soc/codecs/max9759.c
+++ b/sound/soc/codecs/max9759.c
@@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol,
struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol);
struct max9759 *priv = snd_soc_component_get_drvdata(c);
- if (ucontrol->value.integer.value[0] > 3)
+ if (ucontrol->value.integer.value[0] < 0 ||
+ ucontrol->value.integer.value[0] > 3)
return -EINVAL;
priv->gain = ucontrol->value.integer.value[0];
diff --git a/sound/soc/codecs/pcm3168a-spi.c b/sound/soc/codecs/pcm3168a-spi.c
index ecd379f308e6..b5b08046f545 100644
--- a/sound/soc/codecs/pcm3168a-spi.c
+++ b/sound/soc/codecs/pcm3168a-spi.c
@@ -26,11 +26,9 @@ static int pcm3168a_spi_probe(struct spi_device *spi)
return pcm3168a_probe(&spi->dev, regmap);
}
-static int pcm3168a_spi_remove(struct spi_device *spi)
+static void pcm3168a_spi_remove(struct spi_device *spi)
{
pcm3168a_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id pcm3168a_spi_id[] = {
diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
index 7cf559b47e1c..4d29e7196380 100644
--- a/sound/soc/codecs/pcm512x-spi.c
+++ b/sound/soc/codecs/pcm512x-spi.c
@@ -26,10 +26,9 @@ static int pcm512x_spi_probe(struct spi_device *spi)
return pcm512x_probe(&spi->dev, regmap);
}
-static int pcm512x_spi_remove(struct spi_device *spi)
+static void pcm512x_spi_remove(struct spi_device *spi)
{
pcm512x_remove(&spi->dev);
- return 0;
}
static const struct spi_device_id pcm512x_spi_id[] = {
diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c
index fb09715bf932..5b12cbf2ba21 100644
--- a/sound/soc/codecs/rt5668.c
+++ b/sound/soc/codecs/rt5668.c
@@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work)
container_of(work, struct rt5668_priv, jack_detect_work.work);
int val, btn_type;
- while (!rt5668->component)
- usleep_range(10000, 15000);
-
- while (!rt5668->component->card->instantiated)
- usleep_range(10000, 15000);
+ if (!rt5668->component || !rt5668->component->card ||
+ !rt5668->component->card->instantiated) {
+ /* card not yet ready, try later */
+ mod_delayed_work(system_power_efficient_wq,
+ &rt5668->jack_detect_work, msecs_to_jiffies(15));
+ return;
+ }
mutex_lock(&rt5668->calibrate_mutex);
diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c
index 20e0f90ea498..20fc0f3766de 100644
--- a/sound/soc/codecs/rt5682-i2c.c
+++ b/sound/soc/codecs/rt5682-i2c.c
@@ -59,18 +59,12 @@ static void rt5682_jd_check_handler(struct work_struct *work)
struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv,
jd_check_work.work);
- if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
- & RT5682_JDH_RS_MASK) {
+ if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK)
/* jack out */
- rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0);
-
- snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
- SND_JACK_HEADSET |
- SND_JACK_BTN_0 | SND_JACK_BTN_1 |
- SND_JACK_BTN_2 | SND_JACK_BTN_3);
- } else {
+ mod_delayed_work(system_power_efficient_wq,
+ &rt5682->jack_detect_work, 0);
+ else
schedule_delayed_work(&rt5682->jd_check_work, 500);
- }
}
static irqreturn_t rt5682_irq(int irq, void *data)
@@ -198,7 +192,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c,
}
mutex_init(&rt5682->calibrate_mutex);
- mutex_init(&rt5682->jdet_mutex);
rt5682_calibrate(rt5682);
rt5682_apply_patch_list(rt5682, &i2c->dev);
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 415ec564c82e..be68d573a490 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -922,15 +922,13 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component,
*
* Returns detect status.
*/
-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
+static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
{
struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
struct snd_soc_dapm_context *dapm = &component->dapm;
unsigned int val, count;
if (jack_insert) {
- snd_soc_dapm_mutex_lock(dapm);
-
snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
RT5682_PWR_VREF2 | RT5682_PWR_MB,
RT5682_PWR_VREF2 | RT5682_PWR_MB);
@@ -981,8 +979,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
snd_soc_component_update_bits(component, RT5682_MICBIAS_2,
RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK,
RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU);
-
- snd_soc_dapm_mutex_unlock(dapm);
} else {
rt5682_enable_push_button_irq(component, false);
snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
@@ -1011,7 +1007,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type);
return rt5682->jack_type;
}
-EXPORT_SYMBOL_GPL(rt5682_headset_detect);
static int rt5682_set_jack_detect(struct snd_soc_component *component,
struct snd_soc_jack *hs_jack, void *data)
@@ -1094,15 +1089,20 @@ void rt5682_jack_detect_handler(struct work_struct *work)
{
struct rt5682_priv *rt5682 =
container_of(work, struct rt5682_priv, jack_detect_work.work);
+ struct snd_soc_dapm_context *dapm;
int val, btn_type;
- while (!rt5682->component)
- usleep_range(10000, 15000);
+ if (!rt5682->component || !rt5682->component->card ||
+ !rt5682->component->card->instantiated) {
+ /* card not yet ready, try later */
+ mod_delayed_work(system_power_efficient_wq,
+ &rt5682->jack_detect_work, msecs_to_jiffies(15));
+ return;
+ }
- while (!rt5682->component->card->instantiated)
- usleep_range(10000, 15000);
+ dapm = snd_soc_component_get_dapm(rt5682->component);
- mutex_lock(&rt5682->jdet_mutex);
+ snd_soc_dapm_mutex_lock(dapm);
mutex_lock(&rt5682->calibrate_mutex);
val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
@@ -1162,6 +1162,9 @@ void rt5682_jack_detect_handler(struct work_struct *work)
rt5682->irq_work_delay_time = 50;
}
+ mutex_unlock(&rt5682->calibrate_mutex);
+ snd_soc_dapm_mutex_unlock(dapm);
+
snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
SND_JACK_HEADSET |
SND_JACK_BTN_0 | SND_JACK_BTN_1 |
@@ -1174,9 +1177,6 @@ void rt5682_jack_detect_handler(struct work_struct *work)
else
cancel_delayed_work_sync(&rt5682->jd_check_work);
}
-
- mutex_unlock(&rt5682->calibrate_mutex);
- mutex_unlock(&rt5682->jdet_mutex);
}
EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler);
@@ -1526,7 +1526,6 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
{
struct snd_soc_component *component =
snd_soc_dapm_to_component(w->dapm);
- struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
switch (event) {
case SND_SOC_DAPM_PRE_PMU:
@@ -1538,17 +1537,12 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
RT5682_DEPOP_1, 0x60, 0x60);
snd_soc_component_update_bits(component,
RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080);
-
- mutex_lock(&rt5682->jdet_mutex);
-
snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN,
RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN);
usleep_range(5000, 10000);
snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L);
-
- mutex_unlock(&rt5682->jdet_mutex);
break;
case SND_SOC_DAPM_POST_PMD:
diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h
index c917c76200ea..52ff0d9c36c5 100644
--- a/sound/soc/codecs/rt5682.h
+++ b/sound/soc/codecs/rt5682.h
@@ -1463,7 +1463,6 @@ struct rt5682_priv {
int jack_type;
int irq_work_delay_time;
- struct mutex jdet_mutex;
};
extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES];
@@ -1473,7 +1472,6 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component,
void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev);
-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert);
void rt5682_jack_detect_handler(struct work_struct *work);
bool rt5682_volatile_register(struct device *dev, unsigned int reg);
diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c
index efa1016831dd..1e662d1be2b3 100644
--- a/sound/soc/codecs/rt5682s.c
+++ b/sound/soc/codecs/rt5682s.c
@@ -824,11 +824,13 @@ static void rt5682s_jack_detect_handler(struct work_struct *work)
container_of(work, struct rt5682s_priv, jack_detect_work.work);
int val, btn_type;
- while (!rt5682s->component)
- usleep_range(10000, 15000);
-
- while (!rt5682s->component->card->instantiated)
- usleep_range(10000, 15000);
+ if (!rt5682s->component || !rt5682s->component->card ||
+ !rt5682s->component->card->instantiated) {
+ /* card not yet ready, try later */
+ mod_delayed_work(system_power_efficient_wq,
+ &rt5682s->jack_detect_work, msecs_to_jiffies(15));
+ return;
+ }
mutex_lock(&rt5682s->jdet_mutex);
mutex_lock(&rt5682s->calibrate_mutex);
diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c
index 6549e7fef3e3..c5ea3b115966 100644
--- a/sound/soc/codecs/tas2770.c
+++ b/sound/soc/codecs/tas2770.c
@@ -38,10 +38,12 @@ static void tas2770_reset(struct tas2770_priv *tas2770)
gpiod_set_value_cansleep(tas2770->reset_gpio, 0);
msleep(20);
gpiod_set_value_cansleep(tas2770->reset_gpio, 1);
+ usleep_range(1000, 2000);
}
snd_soc_component_write(tas2770->component, TAS2770_SW_RST,
TAS2770_RST);
+ usleep_range(1000, 2000);
}
static int tas2770_set_bias_level(struct snd_soc_component *component,
@@ -110,6 +112,7 @@ static int tas2770_codec_resume(struct snd_soc_component *component)
if (tas2770->sdz_gpio) {
gpiod_set_value_cansleep(tas2770->sdz_gpio, 1);
+ usleep_range(1000, 2000);
} else {
ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
TAS2770_PWR_CTRL_MASK,
@@ -510,8 +513,10 @@ static int tas2770_codec_probe(struct snd_soc_component *component)
tas2770->component = component;
- if (tas2770->sdz_gpio)
+ if (tas2770->sdz_gpio) {
gpiod_set_value_cansleep(tas2770->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
tas2770_reset(tas2770);
diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c
index a8958cd1c692..03cce8d6404f 100644
--- a/sound/soc/codecs/tlv320aic32x4-spi.c
+++ b/sound/soc/codecs/tlv320aic32x4-spi.c
@@ -46,11 +46,9 @@ static int aic32x4_spi_probe(struct spi_device *spi)
return aic32x4_probe(&spi->dev, regmap);
}
-static int aic32x4_spi_remove(struct spi_device *spi)
+static void aic32x4_spi_remove(struct spi_device *spi)
{
aic32x4_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id aic32x4_spi_id[] = {
diff --git a/sound/soc/codecs/tlv320aic3x-spi.c b/sound/soc/codecs/tlv320aic3x-spi.c
index 494e84402232..deed6ec7e081 100644
--- a/sound/soc/codecs/tlv320aic3x-spi.c
+++ b/sound/soc/codecs/tlv320aic3x-spi.c
@@ -35,11 +35,9 @@ static int aic3x_spi_probe(struct spi_device *spi)
return aic3x_probe(&spi->dev, regmap, id->driver_data);
}
-static int aic3x_spi_remove(struct spi_device *spi)
+static void aic3x_spi_remove(struct spi_device *spi)
{
aic3x_remove(&spi->dev);
-
- return 0;
}
static const struct spi_device_id aic3x_spi_id[] = {
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index eff200a07d9f..36cbc66914f9 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -1432,14 +1432,10 @@ static int wcd938x_sdw_connect_port(struct wcd938x_sdw_ch_info *ch_info,
return 0;
}
-static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 ch_id, u8 enable)
+static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 port_num, u8 ch_id, u8 enable)
{
- u8 port_num;
-
- port_num = wcd->ch_info[ch_id].port_num;
-
return wcd938x_sdw_connect_port(&wcd->ch_info[ch_id],
- &wcd->port_config[port_num],
+ &wcd->port_config[port_num - 1],
enable);
}
@@ -2563,7 +2559,7 @@ static int wcd938x_ear_pa_put_gain(struct snd_kcontrol *kcontrol,
WCD938X_EAR_GAIN_MASK,
ucontrol->value.integer.value[0]);
- return 0;
+ return 1;
}
static int wcd938x_get_compander(struct snd_kcontrol *kcontrol,
@@ -2593,6 +2589,7 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
struct wcd938x_sdw_priv *wcd;
int value = ucontrol->value.integer.value[0];
+ int portidx;
struct soc_mixer_control *mc;
bool hphr;
@@ -2606,12 +2603,14 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
else
wcd938x->comp1_enable = value;
+ portidx = wcd->ch_info[mc->reg].port_num;
+
if (value)
- wcd938x_connect_port(wcd, mc->reg, true);
+ wcd938x_connect_port(wcd, portidx, mc->reg, true);
else
- wcd938x_connect_port(wcd, mc->reg, false);
+ wcd938x_connect_port(wcd, portidx, mc->reg, false);
- return 0;
+ return 1;
}
static int wcd938x_ldoh_get(struct snd_kcontrol *kcontrol,
@@ -2882,9 +2881,11 @@ static int wcd938x_get_swr_port(struct snd_kcontrol *kcontrol,
struct wcd938x_sdw_priv *wcd;
struct soc_mixer_control *mixer = (struct soc_mixer_control *)kcontrol->private_value;
int dai_id = mixer->shift;
- int portidx = mixer->reg;
+ int portidx, ch_idx = mixer->reg;
+
wcd = wcd938x->sdw_priv[dai_id];
+ portidx = wcd->ch_info[ch_idx].port_num;
ucontrol->value.integer.value[0] = wcd->port_enable[portidx];
@@ -2899,12 +2900,14 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
struct wcd938x_sdw_priv *wcd;
struct soc_mixer_control *mixer =
(struct soc_mixer_control *)kcontrol->private_value;
- int portidx = mixer->reg;
+ int ch_idx = mixer->reg;
+ int portidx;
int dai_id = mixer->shift;
bool enable;
wcd = wcd938x->sdw_priv[dai_id];
+ portidx = wcd->ch_info[ch_idx].port_num;
if (ucontrol->value.integer.value[0])
enable = true;
else
@@ -2912,9 +2915,9 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
wcd->port_enable[portidx] = enable;
- wcd938x_connect_port(wcd, portidx, enable);
+ wcd938x_connect_port(wcd, portidx, ch_idx, enable);
- return 0;
+ return 1;
}
diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c
index 28b4656c4e14..1bef1c500c8e 100644
--- a/sound/soc/codecs/wm0010.c
+++ b/sound/soc/codecs/wm0010.c
@@ -969,7 +969,7 @@ static int wm0010_spi_probe(struct spi_device *spi)
return 0;
}
-static int wm0010_spi_remove(struct spi_device *spi)
+static void wm0010_spi_remove(struct spi_device *spi)
{
struct wm0010_priv *wm0010 = spi_get_drvdata(spi);
@@ -980,8 +980,6 @@ static int wm0010_spi_remove(struct spi_device *spi)
if (wm0010->irq)
free_irq(wm0010->irq, wm0010);
-
- return 0;
}
static struct spi_driver wm0010_spi_driver = {
diff --git a/sound/soc/codecs/wm8804-spi.c b/sound/soc/codecs/wm8804-spi.c
index 9a8da1511c34..628568724c20 100644
--- a/sound/soc/codecs/wm8804-spi.c
+++ b/sound/soc/codecs/wm8804-spi.c
@@ -24,10 +24,9 @@ static int wm8804_spi_probe(struct spi_device *spi)
return wm8804_probe(&spi->dev, regmap);
}
-static int wm8804_spi_remove(struct spi_device *spi)
+static void wm8804_spi_remove(struct spi_device *spi)
{
wm8804_remove(&spi->dev);
- return 0;
}
static const struct of_device_id wm8804_of_match[] = {
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index f3672e3d1703..0582585236a2 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1441,7 +1441,8 @@ static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl)
int ret, i;
for (i = 0; i < 5; ++i) {
- ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, sizeof(coeff_v1));
+ ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1,
+ min(cs_ctl->len, sizeof(coeff_v1)));
if (ret < 0)
return ret;
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index af3c3b90c0ac..83b4a22bf15a 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op)
dev_err(&op->dev, "platform_device_alloc() failed\n");
ret = platform_device_add(pdata->codec_device);
- if (ret)
+ if (ret) {
dev_err(&op->dev, "platform_device_add() failed: %d\n", ret);
+ platform_device_put(pdata->codec_device);
+ }
ret = snd_soc_register_card(card);
- if (ret)
+ if (ret) {
dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret);
+ platform_device_del(pdata->codec_device);
+ platform_device_put(pdata->codec_device);
+ }
platform_set_drvdata(op, pdata);
-
return ret;
+
}
static int pcm030_fabric_remove(struct platform_device *op)
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index a89d1cfdda32..78419e18717d 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -28,6 +28,30 @@ static const struct snd_soc_ops simple_ops = {
.hw_params = asoc_simple_hw_params,
};
+static int asoc_simple_parse_platform(struct device_node *node,
+ struct snd_soc_dai_link_component *dlc)
+{
+ struct of_phandle_args args;
+ int ret;
+
+ if (!node)
+ return 0;
+
+ /*
+ * Get node via "sound-dai = <&phandle port>"
+ * it will be used as xxx_of_node on soc_bind_dai_link()
+ */
+ ret = of_parse_phandle_with_args(node, DAI, CELL, 0, &args);
+ if (ret)
+ return ret;
+
+ /* dai_name is not required and may not exist for plat component */
+
+ dlc->of_node = args.np;
+
+ return 0;
+}
+
static int asoc_simple_parse_dai(struct device_node *node,
struct snd_soc_dai_link_component *dlc,
int *is_single_link)
@@ -289,7 +313,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
if (ret < 0)
goto dai_link_of_err;
- ret = asoc_simple_parse_dai(plat, platforms, NULL);
+ ret = asoc_simple_parse_platform(plat, platforms);
if (ret < 0)
goto dai_link_of_err;
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 148ddf4cace0..aeca58246fc7 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -952,6 +952,7 @@ static int skl_first_init(struct hdac_bus *bus)
/* allow 64bit DMA address if supported by H/W */
if (dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(64)))
dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(32));
+ dma_set_max_seg_size(bus->dev, UINT_MAX);
/* initialize streams */
snd_hdac_ext_stream_init_all
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 9306b7ca2644..0d154350f180 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -216,7 +216,7 @@ config SND_SOC_MT8195_MT6359_RT1019_RT5682
config SND_SOC_MT8195_MT6359_RT1011_RT5682
tristate "ASoC Audio driver for MT8195 with MT6359 RT1011 RT5682 codec"
- depends on I2C
+ depends on I2C && GPIOLIB
depends on SND_SOC_MT8195 && MTK_PMIC_WRAP
select SND_SOC_MT6359
select SND_SOC_RT1011
diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c
index a59e9d20cb46..4b1773c1fb95 100644
--- a/sound/soc/qcom/lpass-platform.c
+++ b/sound/soc/qcom/lpass-platform.c
@@ -524,7 +524,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component,
return -EINVAL;
}
- ret = regmap_update_bits(map, reg_irqclr, val_irqclr, val_irqclr);
+ ret = regmap_write_bits(map, reg_irqclr, val_irqclr, val_irqclr);
if (ret) {
dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", ret);
return ret;
@@ -665,7 +665,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
return -EINVAL;
}
if (interrupts & LPAIF_IRQ_PER(chan)) {
- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val));
+ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val));
if (rv) {
dev_err(soc_runtime->dev,
"error writing to irqclear reg: %d\n", rv);
@@ -676,7 +676,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
}
if (interrupts & LPAIF_IRQ_XRUN(chan)) {
- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val));
+ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val));
if (rv) {
dev_err(soc_runtime->dev,
"error writing to irqclear reg: %d\n", rv);
@@ -688,7 +688,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
}
if (interrupts & LPAIF_IRQ_ERR(chan)) {
- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val));
+ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val));
if (rv) {
dev_err(soc_runtime->dev,
"error writing to irqclear reg: %d\n", rv);
diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c
index eb1c3aec479b..19c4a90ec1ea 100644
--- a/sound/soc/qcom/qdsp6/q6apm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6apm-dai.c
@@ -308,8 +308,11 @@ static int q6apm_dai_close(struct snd_soc_component *component,
struct snd_pcm_runtime *runtime = substream->runtime;
struct q6apm_dai_rtd *prtd = runtime->private_data;
- q6apm_graph_stop(prtd->graph);
- q6apm_unmap_memory_regions(prtd->graph, substream->stream);
+ if (prtd->state) { /* only stop graph that is started */
+ q6apm_graph_stop(prtd->graph);
+ q6apm_unmap_memory_regions(prtd->graph, substream->stream);
+ }
+
q6apm_graph_close(prtd->graph);
prtd->graph = NULL;
kfree(prtd);
diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c
index cbd7ea48837b..142476f1396f 100644
--- a/sound/soc/soc-acpi.c
+++ b/sound/soc/soc-acpi.c
@@ -55,16 +55,13 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_find_machine);
static acpi_status snd_soc_acpi_find_package(acpi_handle handle, u32 level,
void *context, void **ret)
{
- struct acpi_device *adev;
+ struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
acpi_status status;
struct snd_soc_acpi_package_context *pkg_ctx = context;
pkg_ctx->data_valid = false;
- if (acpi_bus_get_device(handle, &adev))
- return AE_OK;
-
- if (adev->status.present && adev->status.functional) {
+ if (adev && adev->status.present && adev->status.functional) {
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
union acpi_object *myobj = NULL;
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 08eaa9ddf191..a0ca58ba1627 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
unsigned int sign_bit = mc->sign_bit;
unsigned int mask = (1 << fls(max)) - 1;
unsigned int invert = mc->invert;
- int err;
+ int err, ret;
bool type_2r = false;
unsigned int val2 = 0;
unsigned int val, val_mask;
@@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
if (sign_bit)
mask = BIT(sign_bit + 1) - 1;
- val = ((ucontrol->value.integer.value[0] + min) & mask);
+ if (ucontrol->value.integer.value[0] < 0)
+ return -EINVAL;
+ val = ucontrol->value.integer.value[0];
+ if (mc->platform_max && ((int)val + min) > mc->platform_max)
+ return -EINVAL;
+ if (val > max - min)
+ return -EINVAL;
+ val = (val + min) & mask;
if (invert)
val = max - val;
val_mask = mask << shift;
val = val << shift;
if (snd_soc_volsw_is_stereo(mc)) {
- val2 = ((ucontrol->value.integer.value[1] + min) & mask);
+ if (ucontrol->value.integer.value[1] < 0)
+ return -EINVAL;
+ val2 = ucontrol->value.integer.value[1];
+ if (mc->platform_max && ((int)val2 + min) > mc->platform_max)
+ return -EINVAL;
+ if (val2 > max - min)
+ return -EINVAL;
+ val2 = (val2 + min) & mask;
if (invert)
val2 = max - val2;
if (reg == reg2) {
@@ -336,12 +350,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
err = snd_soc_component_update_bits(component, reg, val_mask, val);
if (err < 0)
return err;
+ ret = err;
- if (type_2r)
+ if (type_2r) {
err = snd_soc_component_update_bits(component, reg2, val_mask,
- val2);
+ val2);
+ /* Don't discard any error code or drop change flag */
+ if (ret == 0 || err < 0) {
+ ret = err;
+ }
+ }
- return err;
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_put_volsw);
@@ -407,15 +427,24 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
int min = mc->min;
unsigned int mask = (1U << (fls(min + max) - 1)) - 1;
int err = 0;
+ int ret;
unsigned int val, val_mask;
+ if (ucontrol->value.integer.value[0] < 0)
+ return -EINVAL;
+ val = ucontrol->value.integer.value[0];
+ if (mc->platform_max && val > mc->platform_max)
+ return -EINVAL;
+ if (val > max - min)
+ return -EINVAL;
val_mask = mask << shift;
- val = (ucontrol->value.integer.value[0] + min) & mask;
+ val = (val + min) & mask;
val = val << shift;
err = snd_soc_component_update_bits(component, reg, val_mask, val);
if (err < 0)
return err;
+ ret = err;
if (snd_soc_volsw_is_stereo(mc)) {
unsigned int val2;
@@ -426,6 +455,11 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
err = snd_soc_component_update_bits(component, reg2, val_mask,
val2);
+
+ /* Don't discard any error code or drop change flag */
+ if (ret == 0 || err < 0) {
+ ret = err;
+ }
}
return err;
}
@@ -485,7 +519,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
unsigned int mask = (1 << fls(max)) - 1;
unsigned int invert = mc->invert;
unsigned int val, val_mask;
- int ret;
+ int err, ret;
if (invert)
val = (max - ucontrol->value.integer.value[0]) & mask;
@@ -494,9 +528,10 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
val_mask = mask << shift;
val = val << shift;
- ret = snd_soc_component_update_bits(component, reg, val_mask, val);
- if (ret < 0)
- return ret;
+ err = snd_soc_component_update_bits(component, reg, val_mask, val);
+ if (err < 0)
+ return err;
+ ret = err;
if (snd_soc_volsw_is_stereo(mc)) {
if (invert)
@@ -506,8 +541,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
val_mask = mask << shift;
val = val << shift;
- ret = snd_soc_component_update_bits(component, rreg, val_mask,
+ err = snd_soc_component_update_bits(component, rreg, val_mask,
val);
+ /* Don't discard any error code or drop change flag */
+ if (ret == 0 || err < 0) {
+ ret = err;
+ }
}
return ret;
@@ -856,8 +895,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
unsigned long mask = (1UL<<mc->nbits)-1;
long max = mc->max;
long val = ucontrol->value.integer.value[0];
+ int ret = 0;
unsigned int i;
+ if (val < mc->min || val > mc->max)
+ return -EINVAL;
if (invert)
val = max - val;
val &= mask;
@@ -868,9 +910,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
regmask, regval);
if (err < 0)
return err;
+ if (err > 0)
+ ret = err;
}
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_put_xr_sx);
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 7abfc48b26ca..9a954680d492 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -46,8 +46,8 @@ static inline void snd_soc_dpcm_stream_lock_irq(struct snd_soc_pcm_runtime *rtd,
snd_pcm_stream_lock_irq(snd_soc_dpcm_get_substream(rtd, stream));
}
-#define snd_soc_dpcm_stream_lock_irqsave(rtd, stream, flags) \
- snd_pcm_stream_lock_irqsave(snd_soc_dpcm_get_substream(rtd, stream), flags)
+#define snd_soc_dpcm_stream_lock_irqsave_nested(rtd, stream, flags) \
+ snd_pcm_stream_lock_irqsave_nested(snd_soc_dpcm_get_substream(rtd, stream), flags)
static inline void snd_soc_dpcm_stream_unlock_irq(struct snd_soc_pcm_runtime *rtd,
int stream)
@@ -1268,6 +1268,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm, *d;
+ LIST_HEAD(deleted_dpcms);
snd_soc_dpcm_mutex_assert_held(fe);
@@ -1287,13 +1288,18 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
/* BEs still alive need new FE */
dpcm_be_reparent(fe, dpcm->be, stream);
- dpcm_remove_debugfs_state(dpcm);
-
list_del(&dpcm->list_be);
+ list_move(&dpcm->list_fe, &deleted_dpcms);
+ }
+ snd_soc_dpcm_stream_unlock_irq(fe, stream);
+
+ while (!list_empty(&deleted_dpcms)) {
+ dpcm = list_first_entry(&deleted_dpcms, struct snd_soc_dpcm,
+ list_fe);
list_del(&dpcm->list_fe);
+ dpcm_remove_debugfs_state(dpcm);
kfree(dpcm);
}
- snd_soc_dpcm_stream_unlock_irq(fe, stream);
}
/* get BE for DAI widget and stream */
@@ -2094,7 +2100,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
be = dpcm->be;
be_substream = snd_soc_dpcm_get_substream(be, stream);
- snd_soc_dpcm_stream_lock_irqsave(be, stream, flags);
+ snd_soc_dpcm_stream_lock_irqsave_nested(be, stream, flags);
/* is this op for this BE ? */
if (!snd_soc_dpcm_be_can_update(fe, be, stream))
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index c8fb082209ce..1385695d7745 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -956,6 +956,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev)
dev_dbg(sdev->dev, "DMA mask is 32 bit\n");
dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32));
}
+ dma_set_max_seg_size(&pci->dev, UINT_MAX);
/* init streams */
ret = hda_dsp_stream_init(sdev);
diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c
index 91afea9d5de6..ce19a6058b27 100644
--- a/sound/soc/xilinx/xlnx_formatter_pcm.c
+++ b/sound/soc/xilinx/xlnx_formatter_pcm.c
@@ -37,6 +37,7 @@
#define XLNX_AUD_XFER_COUNT 0x28
#define XLNX_AUD_CH_STS_START 0x2C
#define XLNX_BYTES_PER_CH 0x44
+#define XLNX_AUD_ALIGN_BYTES 64
#define AUD_STS_IOC_IRQ_MASK BIT(31)
#define AUD_STS_CH_STS_MASK BIT(29)
@@ -368,12 +369,32 @@ static int xlnx_formatter_pcm_open(struct snd_soc_component *component,
snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware);
runtime->private_data = stream_data;
- /* Resize the period size divisible by 64 */
+ /* Resize the period bytes as divisible by 64 */
err = snd_pcm_hw_constraint_step(runtime, 0,
- SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64);
+ SNDRV_PCM_HW_PARAM_PERIOD_BYTES,
+ XLNX_AUD_ALIGN_BYTES);
if (err) {
dev_err(component->dev,
- "unable to set constraint on period bytes\n");
+ "Unable to set constraint on period bytes\n");
+ return err;
+ }
+
+ /* Resize the buffer bytes as divisible by 64 */
+ err = snd_pcm_hw_constraint_step(runtime, 0,
+ SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+ XLNX_AUD_ALIGN_BYTES);
+ if (err) {
+ dev_err(component->dev,
+ "Unable to set constraint on buffer bytes\n");
+ return err;
+ }
+
+ /* Set periods as integer multiple */
+ err = snd_pcm_hw_constraint_integer(runtime,
+ SNDRV_PCM_HW_PARAM_PERIODS);
+ if (err < 0) {
+ dev_err(component->dev,
+ "Unable to set constraint on periods to be integer\n");
return err;
}
diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c
index 76c0e37a838c..56d2c712e257 100644
--- a/sound/spi/at73c213.c
+++ b/sound/spi/at73c213.c
@@ -1001,7 +1001,7 @@ out:
return retval;
}
-static int snd_at73c213_remove(struct spi_device *spi)
+static void snd_at73c213_remove(struct spi_device *spi)
{
struct snd_card *card = dev_get_drvdata(&spi->dev);
struct snd_at73c213 *chip = card->private_data;
@@ -1066,8 +1066,6 @@ out:
ssc_free(chip->ssc);
snd_card_free(card);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c
index 70319c822c10..2d444ec74202 100644
--- a/sound/usb/implicit.c
+++ b/sound/usb/implicit.c
@@ -47,13 +47,13 @@ struct snd_usb_implicit_fb_match {
static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
/* Generic matching */
IMPLICIT_FB_GENERIC_DEV(0x0499, 0x1509), /* Steinberg UR22 */
- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2080), /* M-Audio FastTrack Ultra */
- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2081), /* M-Audio FastTrack Ultra */
IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2030), /* M-Audio Fast Track C400 */
IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2031), /* M-Audio Fast Track C600 */
/* Fixed EP */
/* FIXME: check the availability of generic matching */
+ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */
+ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2081, 0x81, 2), /* M-Audio FastTrack Ultra */
IMPLICIT_FB_FIXED_DEV(0x2466, 0x8010, 0x81, 2), /* Fractal Audio Axe-Fx III */
IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0001, 0x81, 2), /* Solid State Logic SSL2 */
IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index e8f3f8d622ec..a5641956ef10 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1527,6 +1527,10 @@ error:
usb_audio_err(chip,
"cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
UAC_GET_CUR, validx, idx, cval->val_type);
+
+ if (val)
+ *val = 0;
+
return filter_error(cval, ret);
}
@@ -3674,17 +3678,14 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list)
err = snd_usb_set_cur_mix_value(cval, c + 1, idx,
cval->cache_val[idx]);
if (err < 0)
- return err;
+ break;
}
idx++;
}
} else {
/* master */
- if (cval->cached) {
- err = snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val);
- if (err < 0)
- return err;
- }
+ if (cval->cached)
+ snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val);
}
return 0;
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index b1522e43173e..0ea39565e623 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -84,7 +84,7 @@
* combination.
*/
{
- USB_DEVICE(0x041e, 0x4095),
+ USB_AUDIO_DEVICE(0x041e, 0x4095),
.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
.ifnum = QUIRK_ANY_INTERFACE,
.type = QUIRK_COMPOSITE,
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index 1c94eaff1931..4a3ff6468aa7 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -1261,7 +1261,7 @@ static int had_pcm_mmap(struct snd_pcm_substream *substream,
{
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
return remap_pfn_range(vma, vma->vm_start,
- substream->dma_buffer.addr >> PAGE_SHIFT,
+ substream->runtime->dma_addr >> PAGE_SHIFT,
vma->vm_end - vma->vm_start, vma->vm_page_prot);
}
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..323e251ed37b 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2
+
/* SVE registers */
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 18de5f76f198..65d147974f8d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -299,7 +299,9 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index dc632b41f135..65c0d9ce1e29 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3faf0f97edb1..a4a39c3e0f19 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -476,6 +476,7 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 2da3316bb559..bf6e96011dfe 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 59cf2343f3d9..d0d7b9bc6cad 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
* Output:
* rax original destination
*/
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
movl %edx, %ecx
rep movsb
RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index d624f2bc42f1..fc9ffd3ff3b2 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
*
* rax original destination
*/
-SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
movq %r9,%rax
RET
SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
4c: vrcp14ps/d Vpd,Wpd (66),(ev)
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
62: vpexpandb/w Vx,Wx (66),(ev)
63: vpcompressb/w Wx,Vx (66),(ev)
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
30: kshiftrb/w Vk,Uk,Ib (66),(v)
31: kshiftrd/q Vk,Uk,Ib (66),(v)
32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
ce: vgf2p8affineqb Vx,Wx,Ib (66)
cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
EndTable
GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
3: xrstors
4: xsavec
5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 9ddeca947635..320a88ac28c9 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -9,7 +9,11 @@ ifeq ($(V),1)
msg =
else
Q = @
- msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+ ifeq ($(silent),1)
+ msg =
+ else
+ msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+ endif
MAKEFLAGS=--no-print-directory
endif
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index ab9353f2fd46..9a5c1f008fe6 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx)
*/
struct task_struct *prev = (struct task_struct *)ctx[1];
struct task_struct *next = (struct task_struct *)ctx[2];
- struct event event = {};
+ struct runq_event event = {};
u64 *tsp, delta_us;
long state;
u32 pid;
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
index 2414cc764461..d78f4148597f 100644
--- a/tools/bpf/runqslower/runqslower.c
+++ b/tools/bpf/runqslower/runqslower.c
@@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void)
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
- const struct event *e = data;
+ const struct runq_event *e = data;
struct tm *tm;
char ts[32];
time_t t;
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
index 9db225425e5f..4f70f07200c2 100644
--- a/tools/bpf/runqslower/runqslower.h
+++ b/tools/bpf/runqslower/runqslower.h
@@ -4,7 +4,7 @@
#define TASK_COMM_LEN 16
-struct event {
+struct runq_event {
char task[TASK_COMM_LEN];
__u64 delta_us;
pid_t pid;
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index 1600b17dbb8a..1d3a90d93fe2 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -11,7 +11,7 @@ from drgn.helpers.linux import list_for_each_entry, list_empty
from drgn.helpers.linux import for_each_page
from drgn.helpers.linux.cpumask import for_each_online_cpu
from drgn.helpers.linux.percpu import per_cpu_ptr
-from drgn import container_of, FaultError, Object
+from drgn import container_of, FaultError, Object, cast
DESC = """
@@ -69,15 +69,15 @@ def oo_objects(s):
def count_partial(n, fn):
- nr_pages = 0
- for page in list_for_each_entry('struct page', n.partial.address_of_(),
- 'lru'):
- nr_pages += fn(page)
- return nr_pages
+ nr_objs = 0
+ for slab in list_for_each_entry('struct slab', n.partial.address_of_(),
+ 'slab_list'):
+ nr_objs += fn(slab)
+ return nr_objs
-def count_free(page):
- return page.objects - page.inuse
+def count_free(slab):
+ return slab.objects - slab.inuse
def slub_get_slabinfo(s, cfg):
@@ -145,14 +145,14 @@ def detect_kernel_config():
return cfg
-def for_each_slab_page(prog):
+def for_each_slab(prog):
PGSlab = 1 << prog.constant('PG_slab')
PGHead = 1 << prog.constant('PG_head')
for page in for_each_page(prog):
try:
if page.flags.value_() & PGSlab:
- yield page
+ yield cast('struct slab *', page)
except FaultError:
pass
@@ -190,13 +190,13 @@ def main():
'list'):
obj_cgroups.add(ptr.value_())
- # look over all slab pages, belonging to non-root memcgs
- # and look for objects belonging to the given memory cgroup
- for page in for_each_slab_page(prog):
- objcg_vec_raw = page.memcg_data.value_()
+ # look over all slab folios and look for objects belonging
+ # to the given memory cgroup
+ for slab in for_each_slab(prog):
+ objcg_vec_raw = slab.memcg_data.value_()
if objcg_vec_raw == 0:
continue
- cache = page.slab_cache
+ cache = slab.slab_cache
if not cache:
continue
addr = cache.value_()
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 9563d294f181..507ee1f2aa96 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1133,6 +1133,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
+#define KVM_CAP_PPC_AIL_MODE_3 210
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1623,9 +1625,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
-
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
@@ -2047,4 +2046,7 @@ struct kvm_stats_desc {
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
deleted file mode 100644
index 45fcbf99d72e..000000000000
--- a/tools/include/uapi/linux/lirc.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * lirc.h - linux infrared remote control header file
- * last modified 2010/07/13 by Jarod Wilson
- */
-
-#ifndef _LINUX_LIRC_H
-#define _LINUX_LIRC_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define PULSE_BIT 0x01000000
-#define PULSE_MASK 0x00FFFFFF
-
-#define LIRC_MODE2_SPACE 0x00000000
-#define LIRC_MODE2_PULSE 0x01000000
-#define LIRC_MODE2_FREQUENCY 0x02000000
-#define LIRC_MODE2_TIMEOUT 0x03000000
-
-#define LIRC_VALUE_MASK 0x00FFFFFF
-#define LIRC_MODE2_MASK 0xFF000000
-
-#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
-#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
-#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
-#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
-
-#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
-#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
-
-#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
-#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
-#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
-#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
-
-/* used heavily by lirc userspace */
-#define lirc_t int
-
-/*** lirc compatible hardware features ***/
-
-#define LIRC_MODE2SEND(x) (x)
-#define LIRC_SEND2MODE(x) (x)
-#define LIRC_MODE2REC(x) ((x) << 16)
-#define LIRC_REC2MODE(x) ((x) >> 16)
-
-#define LIRC_MODE_RAW 0x00000001
-#define LIRC_MODE_PULSE 0x00000002
-#define LIRC_MODE_MODE2 0x00000004
-#define LIRC_MODE_SCANCODE 0x00000008
-#define LIRC_MODE_LIRCCODE 0x00000010
-
-
-#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW)
-#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE)
-#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2)
-#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
-
-#define LIRC_CAN_SEND_MASK 0x0000003f
-
-#define LIRC_CAN_SET_SEND_CARRIER 0x00000100
-#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200
-#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400
-
-#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW)
-#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE)
-#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2)
-#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE)
-#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
-
-#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
-
-#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16)
-#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
-
-#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
-#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000
-#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000
-#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000
-#define LIRC_CAN_SET_REC_FILTER 0x08000000
-
-#define LIRC_CAN_MEASURE_CARRIER 0x02000000
-#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000
-
-#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
-#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
-
-#define LIRC_CAN_NOTIFY_DECODE 0x01000000
-
-/*** IOCTL commands for lirc driver ***/
-
-#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32)
-
-#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32)
-#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32)
-#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32)
-
-#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32)
-#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32)
-
-/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
-#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32)
-
-#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32)
-#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32)
-/* Note: these can reset the according pulse_width */
-#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32)
-#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32)
-#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32)
-#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32)
-
-/*
- * when a timeout != 0 is set the driver will send a
- * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
- * never sent, timeout is disabled by default
- */
-#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32)
-
-/* 1 enables, 0 disables timeout reports in MODE2 */
-#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32)
-
-/*
- * if enabled from the next key press on the driver will send
- * LIRC_MODE2_FREQUENCY packets
- */
-#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32)
-
-/*
- * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
- * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
- */
-#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32)
-
-#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32)
-
-/*
- * Return the recording timeout, which is either set by
- * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols.
- */
-#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32)
-
-/*
- * struct lirc_scancode - decoded scancode with protocol for use with
- * LIRC_MODE_SCANCODE
- *
- * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
- * was decoded.
- * @flags: should be 0 for transmit. When receiving scancodes,
- * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
- * depending on the protocol
- * @rc_proto: see enum rc_proto
- * @keycode: the translated keycode. Set to 0 for transmit.
- * @scancode: the scancode received or to be sent
- */
-struct lirc_scancode {
- __u64 timestamp;
- __u16 flags;
- __u16 rc_proto;
- __u32 keycode;
- __u64 scancode;
-};
-
-/* Set if the toggle bit of rc-5 or rc-6 is enabled */
-#define LIRC_SCANCODE_FLAG_TOGGLE 1
-/* Set if this is a nec or sanyo repeat */
-#define LIRC_SCANCODE_FLAG_REPEAT 2
-
-/**
- * enum rc_proto - the Remote Controller protocol
- *
- * @RC_PROTO_UNKNOWN: Protocol not known
- * @RC_PROTO_OTHER: Protocol known but proprietary
- * @RC_PROTO_RC5: Philips RC5 protocol
- * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
- * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
- * @RC_PROTO_JVC: JVC protocol
- * @RC_PROTO_SONY12: Sony 12 bit protocol
- * @RC_PROTO_SONY15: Sony 15 bit protocol
- * @RC_PROTO_SONY20: Sony 20 bit protocol
- * @RC_PROTO_NEC: NEC protocol
- * @RC_PROTO_NECX: Extended NEC protocol
- * @RC_PROTO_NEC32: NEC 32 bit protocol
- * @RC_PROTO_SANYO: Sanyo protocol
- * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
- * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
- * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
- * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
- * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
- * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
- * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
- * @RC_PROTO_SHARP: Sharp protocol
- * @RC_PROTO_XMP: XMP protocol
- * @RC_PROTO_CEC: CEC protocol
- * @RC_PROTO_IMON: iMon Pad protocol
- * @RC_PROTO_RCMM12: RC-MM protocol 12 bits
- * @RC_PROTO_RCMM24: RC-MM protocol 24 bits
- * @RC_PROTO_RCMM32: RC-MM protocol 32 bits
- */
-enum rc_proto {
- RC_PROTO_UNKNOWN = 0,
- RC_PROTO_OTHER = 1,
- RC_PROTO_RC5 = 2,
- RC_PROTO_RC5X_20 = 3,
- RC_PROTO_RC5_SZ = 4,
- RC_PROTO_JVC = 5,
- RC_PROTO_SONY12 = 6,
- RC_PROTO_SONY15 = 7,
- RC_PROTO_SONY20 = 8,
- RC_PROTO_NEC = 9,
- RC_PROTO_NECX = 10,
- RC_PROTO_NEC32 = 11,
- RC_PROTO_SANYO = 12,
- RC_PROTO_MCIR2_KBD = 13,
- RC_PROTO_MCIR2_MSE = 14,
- RC_PROTO_RC6_0 = 15,
- RC_PROTO_RC6_6A_20 = 16,
- RC_PROTO_RC6_6A_24 = 17,
- RC_PROTO_RC6_6A_32 = 18,
- RC_PROTO_RC6_MCE = 19,
- RC_PROTO_SHARP = 20,
- RC_PROTO_XMP = 21,
- RC_PROTO_CEC = 22,
- RC_PROTO_IMON = 23,
- RC_PROTO_RCMM12 = 24,
- RC_PROTO_RCMM24 = 25,
- RC_PROTO_RCMM32 = 26,
-};
-
-#endif
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 4cd39aaccbe7..d37629dbad72 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -251,6 +251,8 @@ enum {
PERF_BR_SYSRET = 8, /* syscall return */
PERF_BR_COND_CALL = 9, /* conditional function call */
PERF_BR_COND_RET = 10, /* conditional function return */
+ PERF_BR_ERET = 11, /* exception return */
+ PERF_BR_IRQ = 12, /* irq */
PERF_BR_MAX,
};
@@ -465,6 +467,8 @@ struct perf_event_attr {
/*
* User provided data if sigtrap=1, passed back to user via
* siginfo_t::si_perf_data, e.g. to permit user to identify the event.
+ * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be
+ * truncated accordingly on 32 bit architectures.
*/
__u64 sig_data;
};
@@ -1332,9 +1336,9 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3 0x04 /* remote board */
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index bb73e9a0b24f..e998764f0262 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -272,4 +272,7 @@ struct prctl_mm_map {
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#define PR_SET_VMA 0x53564d41
+# define PR_SET_VMA_ANON_NAME 0
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 5fbb79e30819..2d3e5df39a59 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -56,8 +56,10 @@
* *
****************************************************************************/
+#define AES_IEC958_STATUS_SIZE 24
+
struct snd_aes_iec958 {
- unsigned char status[24]; /* AES/IEC958 channel status bits */
+ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
unsigned char subcode[147]; /* AES/IEC958 subcode bits */
unsigned char pad; /* nothing */
unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
@@ -202,6 +204,11 @@ typedef int __bitwise snd_pcm_format_t;
#define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */
#define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */
#define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */
+/*
+ * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
+ * available bit count in most significant bit. It's for the case of so-called 'left-justified' or
+ * `right-padding` sample which has less width than 32 bit.
+ */
#define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10)
#define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11)
#define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12)
@@ -300,7 +307,7 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */
-
+#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */
#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 581f9ffb4237..1973a18c096b 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -3,11 +3,7 @@
#define __LIBPERF_INTERNAL_CPUMAP_H
#include <linux/refcount.h>
-
-/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
-struct perf_cpu {
- int cpu;
-};
+#include <perf/cpumap.h>
/**
* A sized, reference counted, sorted array of integers representing CPU
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 15b8faafd615..4a2edbdb5e2b 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -7,6 +7,11 @@
#include <stdio.h>
#include <stdbool.h>
+/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
+struct perf_cpu {
+ int cpu;
+};
+
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 93696affda2e..6fa0d651576b 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -2,6 +2,7 @@ LIBPERF_0.0.1 {
global:
libperf_init;
perf_cpu_map__dummy_new;
+ perf_cpu_map__default_new;
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index f7ee07cb5818..0d1634cedf44 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -13,6 +13,7 @@
#include <internal/lib.h>
#include <linux/kernel.h>
#include <linux/math64.h>
+#include <linux/stringify.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -294,6 +295,103 @@ static u64 read_timestamp(void)
return low | ((u64)high) << 32;
}
+#elif defined(__aarch64__)
+#define read_sysreg(r) ({ \
+ u64 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+static u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+#define PMEVCNTR_READ(idx) \
+ static u64 read_pmevcntr_##idx(void) { \
+ return read_sysreg(pmevcntr##idx##_el0); \
+ }
+
+PMEVCNTR_READ(0);
+PMEVCNTR_READ(1);
+PMEVCNTR_READ(2);
+PMEVCNTR_READ(3);
+PMEVCNTR_READ(4);
+PMEVCNTR_READ(5);
+PMEVCNTR_READ(6);
+PMEVCNTR_READ(7);
+PMEVCNTR_READ(8);
+PMEVCNTR_READ(9);
+PMEVCNTR_READ(10);
+PMEVCNTR_READ(11);
+PMEVCNTR_READ(12);
+PMEVCNTR_READ(13);
+PMEVCNTR_READ(14);
+PMEVCNTR_READ(15);
+PMEVCNTR_READ(16);
+PMEVCNTR_READ(17);
+PMEVCNTR_READ(18);
+PMEVCNTR_READ(19);
+PMEVCNTR_READ(20);
+PMEVCNTR_READ(21);
+PMEVCNTR_READ(22);
+PMEVCNTR_READ(23);
+PMEVCNTR_READ(24);
+PMEVCNTR_READ(25);
+PMEVCNTR_READ(26);
+PMEVCNTR_READ(27);
+PMEVCNTR_READ(28);
+PMEVCNTR_READ(29);
+PMEVCNTR_READ(30);
+
+/*
+ * Read a value direct from PMEVCNTR<idx>
+ */
+static u64 read_perf_counter(unsigned int counter)
+{
+ static u64 (* const read_f[])(void) = {
+ read_pmevcntr_0,
+ read_pmevcntr_1,
+ read_pmevcntr_2,
+ read_pmevcntr_3,
+ read_pmevcntr_4,
+ read_pmevcntr_5,
+ read_pmevcntr_6,
+ read_pmevcntr_7,
+ read_pmevcntr_8,
+ read_pmevcntr_9,
+ read_pmevcntr_10,
+ read_pmevcntr_11,
+ read_pmevcntr_13,
+ read_pmevcntr_12,
+ read_pmevcntr_14,
+ read_pmevcntr_15,
+ read_pmevcntr_16,
+ read_pmevcntr_17,
+ read_pmevcntr_18,
+ read_pmevcntr_19,
+ read_pmevcntr_20,
+ read_pmevcntr_21,
+ read_pmevcntr_22,
+ read_pmevcntr_23,
+ read_pmevcntr_24,
+ read_pmevcntr_25,
+ read_pmevcntr_26,
+ read_pmevcntr_27,
+ read_pmevcntr_28,
+ read_pmevcntr_29,
+ read_pmevcntr_30,
+ read_pmccntr
+ };
+
+ if (counter < ARRAY_SIZE(read_f))
+ return (read_f[counter])();
+
+ return 0;
+}
+
+static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index d39378eaf897..87b0510a556f 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -14,6 +14,8 @@ static int libperf_print(enum libperf_print_level level,
int test_cpumap(int argc, char **argv)
{
struct perf_cpu_map *cpus;
+ struct perf_cpu cpu;
+ int idx;
__T_START;
@@ -27,6 +29,15 @@ int test_cpumap(int argc, char **argv)
perf_cpu_map__put(cpus);
perf_cpu_map__put(cpus);
+ cpus = perf_cpu_map__default_new();
+ if (!cpus)
+ return -1;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+ __T("wrong cpu number", cpu.cpu != -1);
+
+ perf_cpu_map__put(cpus);
+
__T_END;
return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index b3479dfa9a1c..fa854c83b7e7 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET)
+#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdarg.h>
@@ -526,12 +527,12 @@ static int test_stat_multiplexing(void)
min = counts[0].val;
for (i = 0; i < EVENT_NUM; i++) {
- __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
+ __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n",
i, counts[i].val, counts[i].run, counts[i].ena);
perf_counts_values__scale(&counts[i], true, &scaled);
if (scaled == 1) {
- __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
+ __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n",
counts[i].val,
(double)counts[i].run / (double)counts[i].ena * 100.0,
counts[i].run, counts[i].ena);
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 33ae9334861a..89be89afb24d 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = event,
+#ifdef __aarch64__
+ .config1 = 0x2, /* Request user access */
+#endif
};
int err, i;
@@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
pc = perf_evsel__mmap_base(evsel, 0, 0);
__T("failed to get mmapped address", pc);
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
__T("userspace counter access not supported", pc->cap_user_rdpmc);
__T("userspace counter access not enabled", pc->index);
__T("userspace counter width not set", pc->pmc_width >= 32);
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
index 794a375dad36..b2aec04fce8f 100644
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...)
static inline void *xrealloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret) {
- ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret)
- die("Out of memory, realloc failed");
- }
+ if (!ret)
+ die("Out of memory, realloc failed");
return ret;
}
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 394ee57d58f2..ee819a402b69 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -485,6 +485,57 @@ have R ->po X. It wouldn't make sense for a computation to depend
somehow on a value that doesn't get loaded from shared memory until
later in the code!
+Here's a trick question: When is a dependency not a dependency? Answer:
+When it is purely syntactic rather than semantic. We say a dependency
+between two accesses is purely syntactic if the second access doesn't
+actually depend on the result of the first. Here is a trivial example:
+
+ r1 = READ_ONCE(x);
+ WRITE_ONCE(y, r1 * 0);
+
+There appears to be a data dependency from the load of x to the store
+of y, since the value to be stored is computed from the value that was
+loaded. But in fact, the value stored does not really depend on
+anything since it will always be 0. Thus the data dependency is only
+syntactic (it appears to exist in the code) but not semantic (the
+second access will always be the same, regardless of the value of the
+first access). Given code like this, a compiler could simply discard
+the value returned by the load from x, which would certainly destroy
+any dependency. (The compiler is not permitted to eliminate entirely
+the load generated for a READ_ONCE() -- that's one of the nice
+properties of READ_ONCE() -- but it is allowed to ignore the load's
+value.)
+
+It's natural to object that no one in their right mind would write
+code like the above. However, macro expansions can easily give rise
+to this sort of thing, in ways that often are not apparent to the
+programmer.
+
+Another mechanism that can lead to purely syntactic dependencies is
+related to the notion of "undefined behavior". Certain program
+behaviors are called "undefined" in the C language specification,
+which means that when they occur there are no guarantees at all about
+the outcome. Consider the following example:
+
+ int a[1];
+ int i;
+
+ r1 = READ_ONCE(i);
+ r2 = READ_ONCE(a[r1]);
+
+Access beyond the end or before the beginning of an array is one kind
+of undefined behavior. Therefore the compiler doesn't have to worry
+about what will happen if r1 is nonzero, and it can assume that r1
+will always be zero regardless of the value actually loaded from i.
+(If the assumption turns out to be wrong the resulting behavior will
+be undefined anyway, so the compiler doesn't care!) Thus the value
+from the load can be discarded, breaking the address dependency.
+
+The LKMM is unaware that purely syntactic dependencies are different
+from semantic dependencies and therefore mistakenly predicts that the
+accesses in the two examples above will be ordered. This is another
+example of how the compiler can undermine the memory model. Be warned.
+
THE READS-FROM RELATION: rf, rfi, and rfe
-----------------------------------------
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c10ef78df050..479e769ca324 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
const struct elf *elf = file->elf;
struct insn insn;
int x86_64, ret;
- unsigned char op1, op2,
+ unsigned char op1, op2, op3,
rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
@@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op1 = insn.opcode.bytes[0];
op2 = insn.opcode.bytes[1];
+ op3 = insn.opcode.bytes[2];
if (insn.rex_prefix.nbytes) {
rex = insn.rex_prefix.bytes[0];
@@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
/* nopl/nopw */
*type = INSN_NOP;
+ } else if (op2 == 0x38 && op3 == 0xf8) {
+ if (insn.prefixes.nbytes == 1 &&
+ insn.prefixes.bytes[0] == 0xf2) {
+ /* ENQCMD cannot be used in the kernel. */
+ WARN("ENQCMD instruction at %s:%lx", sec->name,
+ offset);
+ }
+
} else if (op2 == 0xa0 || op2 == 0xa8) {
/* push fs/gs */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c2d2ab9a2861..7c33ec67c4a9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2854,7 +2854,7 @@ static inline bool func_uaccess_safe(struct symbol *func)
static inline const char *call_dest_name(struct instruction *insn)
{
- static char pvname[16];
+ static char pvname[19];
struct reloc *rel;
int idx;
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 9708ae892061..ba429cadb18f 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2197,6 +2197,924 @@
"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00 \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8 \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9 \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8 \txsusldtrk ",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 5da17d41d302..3a47e98fec33 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2459,6 +2459,1432 @@
"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 78 49 04 c8 \tldtilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 78 49 04 c8 \tldtilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 49 04 c8 \tsttilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 49 04 c8 \tsttilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5c d1 \ttdpbf16ps %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7b 5e d1 \ttdpbssd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5e d1 \ttdpbsud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 79 5e d1 \ttdpbusd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 78 5e d1 \ttdpbuud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7b 4b 0c c8 \ttileloadd (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7b 4b 14 c8 \ttileloadd (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 4b 0c c8 \ttileloaddt1 (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 4b 14 c8 \ttileloaddt1 (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 78 49 c0 \ttilerelease ",},
+{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7a 4b 0c c8 \ttilestored %tmm1,(%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7a 4b 14 c8 \ttilestored %tmm2,(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 7b 49 c0 \ttilezero %tmm0",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "",
+"c4 e2 7b 49 f8 \ttilezero %tmm7",},
+{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "",
+"f3 0f 01 ee \tclui ",},
+{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "",
+"f3 0f c7 f0 \tsenduipi %rax",},
+{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "",
+"f3 41 0f c7 f0 \tsenduipi %r8",},
+{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "",
+"f3 0f 01 ef \tstui ",},
+{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "",
+"f3 0f 01 ed \ttestui ",},
+{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "",
+"f3 0f 01 ec \tuiret ",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 79 c1 \tvcvtsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 2a c8 \tvcvtsi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 2c c1 \tvcvttsh2si %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 78 c1 \tvcvttsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 7b c8 \tvcvtusi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00 \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8 \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9 \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8 \txsusldtrk ",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index c3808e94c46e..a391464c8dee 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1910,6 +1910,724 @@ int main(void)
asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ /* AMX */
+
+ asm volatile("ldtilecfg (%rax,%rcx,8)");
+ asm volatile("ldtilecfg (%r8,%rcx,8)");
+ asm volatile("sttilecfg (%rax,%rcx,8)");
+ asm volatile("sttilecfg (%r8,%rcx,8)");
+ asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2");
+ asm volatile("tdpbssd %tmm0, %tmm1, %tmm2");
+ asm volatile("tdpbsud %tmm0, %tmm1, %tmm2");
+ asm volatile("tdpbusd %tmm0, %tmm1, %tmm2");
+ asm volatile("tdpbuud %tmm0, %tmm1, %tmm2");
+ asm volatile("tileloadd (%rax,%rcx,8), %tmm1");
+ asm volatile("tileloadd (%r8,%rcx,8), %tmm2");
+ asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1");
+ asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2");
+ asm volatile("tilerelease");
+ asm volatile("tilestored %tmm1, (%rax,%rcx,8)");
+ asm volatile("tilestored %tmm2, (%r8,%rcx,8)");
+ asm volatile("tilezero %tmm0");
+ asm volatile("tilezero %tmm7");
+
+ /* User Interrupt */
+
+ asm volatile("clui");
+ asm volatile("senduipi %rax");
+ asm volatile("senduipi %r8");
+ asm volatile("stui");
+ asm volatile("testui");
+ asm volatile("uiret");
+
+ /* AVX512-FP16 */
+
+ asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+ asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+ asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+ asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+ asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+ asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+ asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+ asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+ asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+ asm volatile("vcomish %xmm2, %xmm1");
+ asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtdq2ph %zmm2, %ymm1");
+ asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtdq2ph %xmm2, %xmm1");
+ asm volatile("vcvtdq2ph %ymm2, %xmm1");
+ asm volatile("vcvtpd2ph %zmm2, %xmm1");
+ asm volatile("vcvtpd2ph %xmm2, %xmm1");
+ asm volatile("vcvtpd2ph %ymm2, %xmm1");
+ asm volatile("vcvtph2dq %ymm2, %zmm1");
+ asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2dq %xmm2, %xmm1");
+ asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2dq %xmm2, %ymm1");
+ asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2pd %xmm2, %zmm1");
+ asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2pd %xmm2, %xmm1");
+ asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2pd %xmm2, %ymm1");
+ asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2ps %ymm2, %zmm1");
+ asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2ps %xmm2, %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2ps %xmm2, %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2ps %xmm2, %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2ps %xmm2, %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2psx %ymm2, %zmm1");
+ asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2psx %xmm2, %xmm1");
+ asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2psx %xmm2, %ymm1");
+ asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2qq %xmm2, %zmm1");
+ asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2qq %xmm2, %xmm1");
+ asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2qq %xmm2, %ymm1");
+ asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2udq %ymm2, %zmm1");
+ asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2udq %xmm2, %xmm1");
+ asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2udq %xmm2, %ymm1");
+ asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2uqq %xmm2, %zmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2uqq %xmm2, %xmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2uqq %xmm2, %ymm1");
+ asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2uw %zmm2, %zmm1");
+ asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2uw %xmm2, %xmm1");
+ asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2uw %ymm2, %ymm1");
+ asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2w %zmm2, %zmm1");
+ asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2w %xmm2, %xmm1");
+ asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2w %ymm2, %ymm1");
+ asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+ asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2phx %zmm2, %ymm1");
+ asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtps2phx %xmm2, %xmm1");
+ asm volatile("vcvtps2phx %ymm2, %xmm1");
+ asm volatile("vcvtqq2ph %zmm2, %xmm1");
+ asm volatile("vcvtqq2ph %xmm2, %xmm1");
+ asm volatile("vcvtqq2ph %ymm2, %xmm1");
+ asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax");
+ asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2usi %xmm1, %eax");
+ asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax");
+ asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvtsh2usi %xmm1, %rax");
+ asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax");
+ asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax");
+ asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvttph2dq %ymm2, %zmm1");
+ asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2dq %xmm2, %xmm1");
+ asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2dq %xmm2, %ymm1");
+ asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2qq %xmm2, %zmm1");
+ asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2qq %xmm2, %xmm1");
+ asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2qq %xmm2, %ymm1");
+ asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2udq %ymm2, %zmm1");
+ asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2udq %xmm2, %xmm1");
+ asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2udq %xmm2, %ymm1");
+ asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2uqq %xmm2, %zmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2uqq %xmm2, %xmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2uqq %xmm2, %ymm1");
+ asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2uw %zmm2, %zmm1");
+ asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2uw %xmm2, %xmm1");
+ asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2uw %ymm2, %ymm1");
+ asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2w %zmm2, %zmm1");
+ asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2w %xmm2, %xmm1");
+ asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2w %ymm2, %ymm1");
+ asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttsh2si %xmm1, %eax");
+ asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax");
+ asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvttsh2si %xmm1, %rax");
+ asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax");
+ asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax");
+ asm volatile("vcvttsh2usi %xmm1, %eax");
+ asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax");
+ asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvttsh2usi %xmm1, %rax");
+ asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax");
+ asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax");
+ asm volatile("vcvtudq2ph %zmm2, %ymm1");
+ asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtudq2ph %xmm2, %xmm1");
+ asm volatile("vcvtudq2ph %ymm2, %xmm1");
+ asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+ asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+ asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+ asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtuw2ph %zmm2, %zmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtuw2ph %xmm2, %xmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtuw2ph %ymm2, %ymm1");
+ asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtw2ph %zmm2, %zmm1");
+ asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtw2ph %xmm2, %xmm1");
+ asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtw2ph %ymm2, %ymm1");
+ asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+ asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+ asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+ asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfpclassph $0x12, %zmm1, %k5");
+ asm volatile("vfpclassph $0x12, %xmm1, %k5");
+ asm volatile("vfpclassph $0x12, %ymm1, %k5");
+ asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+ asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5");
+ asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+ asm volatile("vgetexpph %zmm2, %zmm1");
+ asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vgetexpph %xmm2, %xmm1");
+ asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vgetexpph %ymm2, %ymm1");
+ asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+ asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+ asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+ asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vminph %zmm3, %zmm2, %zmm1");
+ asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vminph %xmm3, %xmm2, %xmm1");
+ asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vminph %ymm3, %ymm2, %ymm1");
+ asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmovw %xmm1, %eax");
+ asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)");
+ asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vmovw %eax, %xmm1");
+ asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+ asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+ asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+ asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrcpph %zmm2, %zmm1");
+ asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrcpph %xmm2, %xmm1");
+ asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrcpph %ymm2, %ymm1");
+ asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrsqrtph %zmm2, %zmm1");
+ asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrsqrtph %xmm2, %xmm1");
+ asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrsqrtph %ymm2, %ymm1");
+ asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+ asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+ asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+ asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsqrtph %zmm2, %zmm1");
+ asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vsqrtph %xmm2, %xmm1");
+ asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vsqrtph %ymm2, %ymm1");
+ asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+ asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+ asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+ asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+ asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vucomish %xmm2, %xmm1");
+ asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@@ -3670,8 +4388,479 @@ int main(void)
asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+ /* AVX512-FP16 */
+
+ asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+ asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+ asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+ asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+ asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+ asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+ asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+ asm volatile("vcomish %xmm2, %xmm1");
+ asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtdq2ph %zmm2, %ymm1");
+ asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtdq2ph %xmm2, %xmm1");
+ asm volatile("vcvtdq2ph %ymm2, %xmm1");
+ asm volatile("vcvtpd2ph %zmm2, %xmm1");
+ asm volatile("vcvtpd2ph %xmm2, %xmm1");
+ asm volatile("vcvtpd2ph %ymm2, %xmm1");
+ asm volatile("vcvtph2dq %ymm2, %zmm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2dq %xmm2, %xmm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2dq %xmm2, %ymm1");
+ asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2pd %xmm2, %zmm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2pd %xmm2, %xmm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2pd %xmm2, %ymm1");
+ asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2ps %ymm2, %zmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2ps %xmm2, %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2ps %xmm2, %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2ps %xmm2, %xmm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2ps %xmm2, %ymm1");
+ asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2psx %ymm2, %zmm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2psx %xmm2, %xmm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2psx %xmm2, %ymm1");
+ asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2qq %xmm2, %zmm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2qq %xmm2, %xmm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2qq %xmm2, %ymm1");
+ asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2udq %ymm2, %zmm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2udq %xmm2, %xmm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2udq %xmm2, %ymm1");
+ asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2uqq %xmm2, %zmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2uqq %xmm2, %xmm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2uqq %xmm2, %ymm1");
+ asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2uw %zmm2, %zmm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2uw %xmm2, %xmm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2uw %ymm2, %ymm1");
+ asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtph2w %zmm2, %zmm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtph2w %xmm2, %xmm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtph2w %ymm2, %ymm1");
+ asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+ asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+ asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vcvtps2phx %zmm2, %ymm1");
+ asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtps2phx %xmm2, %xmm1");
+ asm volatile("vcvtps2phx %ymm2, %xmm1");
+ asm volatile("vcvtqq2ph %zmm2, %xmm1");
+ asm volatile("vcvtqq2ph %xmm2, %xmm1");
+ asm volatile("vcvtqq2ph %ymm2, %xmm1");
+ asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsh2usi %xmm1, %eax");
+ asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvttph2dq %ymm2, %zmm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2dq %xmm2, %xmm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2dq %xmm2, %ymm1");
+ asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2qq %xmm2, %zmm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2qq %xmm2, %xmm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2qq %xmm2, %ymm1");
+ asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2udq %ymm2, %zmm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2udq %xmm2, %xmm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2udq %xmm2, %ymm1");
+ asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2uqq %xmm2, %zmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2uqq %xmm2, %xmm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2uqq %xmm2, %ymm1");
+ asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2uw %zmm2, %zmm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2uw %xmm2, %xmm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2uw %ymm2, %ymm1");
+ asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttph2w %zmm2, %zmm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvttph2w %xmm2, %xmm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvttph2w %ymm2, %ymm1");
+ asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvttsh2si %xmm1, %eax");
+ asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvttsh2usi %xmm1, %eax");
+ asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+ asm volatile("vcvtudq2ph %zmm2, %ymm1");
+ asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtudq2ph %xmm2, %xmm1");
+ asm volatile("vcvtudq2ph %ymm2, %xmm1");
+ asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+ asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+ asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+ asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vcvtuw2ph %zmm2, %zmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtuw2ph %xmm2, %xmm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtuw2ph %ymm2, %ymm1");
+ asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vcvtw2ph %zmm2, %zmm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vcvtw2ph %xmm2, %xmm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vcvtw2ph %ymm2, %ymm1");
+ asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+ asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+ asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+ asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vfpclassph $0x12, %zmm1, %k5");
+ asm volatile("vfpclassph $0x12, %xmm1, %k5");
+ asm volatile("vfpclassph $0x12, %ymm1, %k5");
+ asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+ asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+ asm volatile("vgetexpph %zmm2, %zmm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vgetexpph %xmm2, %xmm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vgetexpph %ymm2, %ymm1");
+ asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+ asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+ asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vminph %zmm3, %zmm2, %zmm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vminph %xmm3, %xmm2, %xmm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vminph %ymm3, %ymm2, %ymm1");
+ asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmovw %xmm1, %eax");
+ asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+ asm volatile("vmovw %eax, %xmm1");
+ asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+ asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrcpph %zmm2, %zmm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrcpph %xmm2, %xmm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrcpph %ymm2, %ymm1");
+ asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+ asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+ asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+ asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vrsqrtph %zmm2, %zmm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vrsqrtph %xmm2, %xmm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vrsqrtph %ymm2, %ymm1");
+ asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+ asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsqrtph %zmm2, %zmm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+ asm volatile("vsqrtph %xmm2, %xmm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+ asm volatile("vsqrtph %ymm2, %ymm1");
+ asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+ asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+ asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+ asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+ asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+ asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+ asm volatile("vucomish %xmm2, %xmm1");
+ asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
#endif /* #ifndef __x86_64__ */
+ /* Prediction history reset */
+
+ asm volatile("hreset $0");
+
+ /* Serialize instruction execution */
+
+ asm volatile("serialize");
+
+ /* TSX suspend load address tracking */
+
+ asm volatile("xresldtrk");
+ asm volatile("xsusldtrk");
+
/* SGX */
asm volatile("encls");
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index f924246eff78..8d9b55959256 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -29,7 +29,7 @@ struct evsel *arch_evlist__leader(struct list_head *list)
__evlist__for_each_entry(list, evsel) {
if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
- evsel->name && strstr(evsel->name, "slots"))
+ evsel->name && strcasestr(evsel->name, "slots"))
return evsel;
}
return first;
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 740ae764537e..134612bde0cb 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -106,7 +106,7 @@ static void nest_epollfd(void)
printinfo("Nesting level(s): %d\n", nested);
epollfdp = calloc(nested, sizeof(int));
- if (!epollfd)
+ if (!epollfdp)
err(EXIT_FAILURE, "calloc");
for (i = 0; i < nested; i++) {
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index dec24dc0e767..a8785dec5ca6 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1115,6 +1115,7 @@ enum perf_ftrace_subcommand {
int cmd_ftrace(int argc, const char **argv)
{
int ret;
+ int (*cmd_func)(struct perf_ftrace *) = NULL;
struct perf_ftrace ftrace = {
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
@@ -1221,6 +1222,28 @@ int cmd_ftrace(int argc, const char **argv)
goto out_delete_filters;
}
+ switch (subcmd) {
+ case PERF_FTRACE_TRACE:
+ if (!argc && target__none(&ftrace.target))
+ ftrace.target.system_wide = true;
+ cmd_func = __cmd_ftrace;
+ break;
+ case PERF_FTRACE_LATENCY:
+ if (list_empty(&ftrace.filters)) {
+ pr_err("Should provide a function to measure\n");
+ parse_options_usage(ftrace_usage, options, "T", 1);
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
+ cmd_func = __cmd_latency;
+ break;
+ case PERF_FTRACE_NONE:
+ default:
+ pr_err("Invalid subcommand\n");
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
+
ret = target__validate(&ftrace.target);
if (ret) {
char errbuf[512];
@@ -1248,27 +1271,7 @@ int cmd_ftrace(int argc, const char **argv)
goto out_delete_evlist;
}
- switch (subcmd) {
- case PERF_FTRACE_TRACE:
- if (!argc && target__none(&ftrace.target))
- ftrace.target.system_wide = true;
- ret = __cmd_ftrace(&ftrace);
- break;
- case PERF_FTRACE_LATENCY:
- if (list_empty(&ftrace.filters)) {
- pr_err("Should provide a function to measure\n");
- parse_options_usage(ftrace_usage, options, "T", 1);
- ret = -EINVAL;
- goto out_delete_evlist;
- }
- ret = __cmd_latency(&ftrace);
- break;
- case PERF_FTRACE_NONE:
- default:
- pr_err("Invalid subcommand\n");
- ret = -EINVAL;
- break;
- }
+ ret = cmd_func(&ftrace);
out_delete_evlist:
evlist__delete(ftrace.evlist);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index abae8184e171..fa478ddcd18a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -463,7 +463,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
- evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(SYM) &&
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 32844d8a0ea5..52b137a184a6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1536,13 +1536,20 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
return fprintf(fp, " ? ");
}
+static pid_t workload_pid = -1;
static bool done = false;
static bool interrupted = false;
-static void sig_handler(int sig)
+static void sighandler_interrupt(int sig __maybe_unused)
{
- done = true;
- interrupted = sig == SIGINT;
+ done = interrupted = true;
+}
+
+static void sighandler_chld(int sig __maybe_unused, siginfo_t *info,
+ void *context __maybe_unused)
+{
+ if (info->si_pid == workload_pid)
+ done = true;
}
static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
@@ -3938,7 +3945,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
bool draining = false;
trace->live = true;
- signal(SIGCHLD, sig_handler);
if (!trace->raw_augmented_syscalls) {
if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
@@ -4018,6 +4024,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
fprintf(trace->output, "Couldn't run the workload!\n");
goto out_delete_evlist;
}
+ workload_pid = evlist->workload.pid;
}
err = evlist__open(evlist);
@@ -4887,10 +4894,16 @@ int cmd_trace(int argc, const char **argv)
const char * const trace_subcommands[] = { "record", NULL };
int err = -1;
char bf[BUFSIZ];
+ struct sigaction sigchld_act;
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
- signal(SIGINT, sig_handler);
+ signal(SIGINT, sighandler_interrupt);
+
+ memset(&sigchld_act, 0, sizeof(sigchld_act));
+ sigchld_act.sa_flags = SA_SIGINFO;
+ sigchld_act.sa_sigaction = sighandler_chld;
+ sigaction(SIGCHLD, &sigchld_act, NULL);
trace.evlist = evlist__new();
trace.sctbl = syscalltbl__new();
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index a36f49fb4dbe..1116fc6bf2ac 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -45,8 +45,10 @@ Following tests are defined (with perf commands):
perf record -d kill (test-record-data)
perf record -F 100 kill (test-record-freq)
perf record -g kill (test-record-graph-default)
+ perf record -g kill (test-record-graph-default-aarch64)
perf record --call-graph dwarf kill (test-record-graph-dwarf)
perf record --call-graph fp kill (test-record-graph-fp)
+ perf record --call-graph fp kill (test-record-graph-fp-aarch64)
perf record --group -e cycles,instructions kill (test-record-group)
perf record -e '{cycles,instructions}' kill (test-record-group1)
perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2)
diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default
index 5d8234d50845..f0a18b4ea4f5 100644
--- a/tools/perf/tests/attr/test-record-graph-default
+++ b/tools/perf/tests/attr/test-record-graph-default
@@ -2,6 +2,8 @@
command = record
args = --no-bpf-event -g kill >/dev/null 2>&1
ret = 1
+# arm64 enables registers in the default mode (fp)
+arch = !aarch64
[event:base-record]
sample_type=295
diff --git a/tools/perf/tests/attr/test-record-graph-default-aarch64 b/tools/perf/tests/attr/test-record-graph-default-aarch64
new file mode 100644
index 000000000000..e98d62efb6f7
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-default-aarch64
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-bpf-event -g kill >/dev/null 2>&1
+ret = 1
+arch = aarch64
+
+[event:base-record]
+sample_type=4391
+sample_regs_user=1073741824
diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp
index 5630521c0b0f..a6e60e839205 100644
--- a/tools/perf/tests/attr/test-record-graph-fp
+++ b/tools/perf/tests/attr/test-record-graph-fp
@@ -2,6 +2,8 @@
command = record
args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1
ret = 1
+# arm64 enables registers in fp mode
+arch = !aarch64
[event:base-record]
sample_type=295
diff --git a/tools/perf/tests/attr/test-record-graph-fp-aarch64 b/tools/perf/tests/attr/test-record-graph-fp-aarch64
new file mode 100644
index 000000000000..cbeea9971285
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-fp-aarch64
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1
+ret = 1
+arch = aarch64
+
+[event:base-record]
+sample_type=4391
+sample_regs_user=1073741824
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
index 1f147fe6595f..e32ece90e164 100644
--- a/tools/perf/tests/sigtrap.c
+++ b/tools/perf/tests/sigtrap.c
@@ -22,19 +22,6 @@
#include "tests.h"
#include "../perf-sys.h"
-/*
- * PowerPC and S390 do not support creation of instruction breakpoints using the
- * perf_event interface.
- *
- * Just disable the test for these architectures until these issues are
- * resolved.
- */
-#if defined(__powerpc__) || defined(__s390x__)
-#define BP_ACCOUNT_IS_SUPPORTED 0
-#else
-#define BP_ACCOUNT_IS_SUPPORTED 1
-#endif
-
#define NUM_THREADS 5
static struct {
@@ -135,7 +122,7 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m
char sbuf[STRERR_BUFSIZE];
int i, fd, ret = TEST_FAIL;
- if (!BP_ACCOUNT_IS_SUPPORTED) {
+ if (!BP_SIGNAL_IS_SUPPORTED) {
pr_debug("Test not supported on this architecture");
return TEST_SKIP;
}
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 3109d7b05e11..3d278785fe57 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -4,7 +4,7 @@
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
printf "static const char *prctl_options[] = {\n"
-regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*'
+regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$'
egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
sed -r "s/$regex/\2 \1/g" | \
sort -n | xargs printf "\t[%s] = \"%s\",\n"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 01900689dc00..8190a124b99d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2036,6 +2036,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
memset(&objdump_process, 0, sizeof(objdump_process));
objdump_process.argv = objdump_argv;
objdump_process.out = -1;
+ objdump_process.err = -1;
if (start_command(&objdump_process)) {
pr_err("Failure starting to run %s\n", command);
err = -1;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 7ecfaac7536a..16ec605a9fe4 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1220,9 +1220,10 @@ bpf__obj_config_map(struct bpf_object *obj,
pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
out:
- free(map_name);
if (!err)
*key_scan_pos += strlen(map_opt);
+
+ free(map_name);
return err;
}
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index 631e34a0b66f..ac60c08e8e2a 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -266,7 +266,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
idx = evsel->core.idx;
err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
if (err) {
- pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n",
+ pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
idx, evsel__name(evsel), evsel->cgrp->name);
goto out;
}
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index 2285b1eb3128..a9a909db8cc7 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -49,7 +49,9 @@ const char *branch_type_name(int type)
"SYSCALL",
"SYSRET",
"COND_CALL",
- "COND_RET"
+ "COND_RET",
+ "ERET",
+ "IRQ"
};
if (type >= 0 && type < PERF_BR_MAX)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 4f672f7d008c..8b95fb3c4d7b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -50,8 +50,6 @@ struct cs_etm_auxtrace {
u8 timeless_decoding;
u8 snapshot_mode;
u8 data_queued;
- u8 sample_branches;
- u8 sample_instructions;
int num_cpu;
u64 latest_kernel_timestamp;
@@ -410,8 +408,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
{
struct cs_etm_packet *tmp;
- if (etm->sample_branches || etm->synth_opts.last_branch ||
- etm->sample_instructions) {
+ if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
+ etm->synth_opts.instructions) {
/*
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
@@ -1365,7 +1363,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
err = cs_etm__synth_event(session, &attr, id);
if (err)
return err;
- etm->sample_branches = true;
etm->branches_sample_type = attr.sample_type;
etm->branches_id = id;
id += 1;
@@ -1389,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
err = cs_etm__synth_event(session, &attr, id);
if (err)
return err;
- etm->sample_instructions = true;
etm->instructions_sample_type = attr.sample_type;
etm->instructions_id = id;
id += 1;
@@ -1420,7 +1416,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
tidq->prev_packet->last_instr_taken_branch)
cs_etm__update_last_branch_rb(etmq, tidq);
- if (etm->sample_instructions &&
+ if (etm->synth_opts.instructions &&
tidq->period_instructions >= etm->instructions_sample_period) {
/*
* Emit instruction sample periodically
@@ -1503,7 +1499,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
}
}
- if (etm->sample_branches) {
+ if (etm->synth_opts.branches) {
bool generate_sample = false;
/* Generate sample for tracing on packet */
@@ -1557,6 +1553,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
goto swap_packet;
if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
u64 addr;
@@ -1582,7 +1579,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
}
- if (etm->sample_branches &&
+ if (etm->synth_opts.branches &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
err = cs_etm__synth_branch_sample(etmq, tidq);
if (err)
@@ -1614,6 +1611,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
* the trace.
*/
if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
u64 addr;
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index f5d260b1df4d..15a4547d608e 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr)
if (!files)
return -ENOMEM;
- data->dir.version = PERF_DIR_VERSION;
- data->dir.files = files;
- data->dir.nr = nr;
-
for (i = 0; i < nr; i++) {
struct perf_data_file *file = &files[i];
@@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr)
file->fd = ret;
}
+ data->dir.version = PERF_DIR_VERSION;
+ data->dir.files = files;
+ data->dir.nr = nr;
return 0;
out_err:
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index 7f234215147d..57f02beef023 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -154,8 +154,8 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
perf_cpu_map__put(matched_cpus);
perf_cpu_map__put(unmatched_cpus);
}
-
- ret = (unmatched_count == events_nr) ? -1 : 0;
+ if (events_nr)
+ ret = (unmatched_count == events_nr) ? -1 : 0;
out:
perf_cpu_map__put(cpus);
return ret;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index eaad04e1672a..41a66a48cbdf 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -346,7 +346,7 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
{
struct evlist_cpu_iterator itr = {
.container = evlist,
- .evsel = evlist__first(evlist),
+ .evsel = NULL,
.cpu_map_idx = 0,
.evlist_cpu_map_idx = 0,
.evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
@@ -354,16 +354,22 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
.affinity = affinity,
};
- if (itr.affinity) {
- itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
- affinity__set(itr.affinity, itr.cpu.cpu);
- itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
- /*
- * If this CPU isn't in the evsel's cpu map then advance through
- * the list.
- */
- if (itr.cpu_map_idx == -1)
- evlist_cpu_iterator__next(&itr);
+ if (evlist__empty(evlist)) {
+ /* Ensure the empty list doesn't iterate. */
+ itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+ } else {
+ itr.evsel = evlist__first(evlist);
+ if (itr.affinity) {
+ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+ affinity__set(itr.affinity, itr.cpu.cpu);
+ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance
+ * through the list.
+ */
+ if (itr.cpu_map_idx == -1)
+ evlist_cpu_iterator__next(&itr);
+ }
}
return itr;
}
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 5acf053fca7d..aa0c5179836d 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -50,41 +50,32 @@
#ifndef SYM_END
#define SYM_END(name, sym_type) \
.type name sym_type ASM_NL \
+ .set .L__sym_size_##name, .-name ASM_NL \
.size name, .-name
#endif
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage) \
+ linkage(alias) ASM_NL \
+ .set alias, name ASM_NL \
+ .type alias sym_type ASM_NL \
+ .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \
+ .size alias, .L__sym_size_##alias
#endif
/* SYM_FUNC_START -- use for global functions */
#ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
#define SYM_FUNC_START(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
/* SYM_FUNC_START_LOCAL -- use for local functions */
#ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
#endif
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name) \
- SYM_END(name, SYM_T_FUNC)
-#endif
-
/* SYM_FUNC_START_WEAK -- use for weak functions */
#ifndef SYM_FUNC_START_WEAK
#define SYM_FUNC_START_WEAK(name) \
@@ -96,9 +87,32 @@
* SYM_FUNC_START_WEAK, ...
*/
#ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_END(name) \
SYM_END(name, SYM_T_FUNC)
#endif
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f70ba56912d4..394550003693 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2073,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread,
ams->addr = ip;
ams->al_addr = al.addr;
+ ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@@ -2092,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread,
ams->addr = addr;
ams->al_addr = al.addr;
+ ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
index 7d22ade082c8..e08817b0c30f 100644
--- a/tools/perf/util/map_symbol.h
+++ b/tools/perf/util/map_symbol.h
@@ -18,6 +18,7 @@ struct addr_map_symbol {
struct map_symbol ms;
u64 addr;
u64 al_addr;
+ char al_level;
u64 phys_addr;
u64 data_page_size;
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9739b05b999e..24997925ae00 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1648,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
{
struct parse_events_term *term;
struct list_head *list = NULL;
+ struct list_head *orig_head = NULL;
struct perf_pmu *pmu = NULL;
int ok = 0;
char *config;
@@ -1674,7 +1675,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
list_add_tail(&term->list, head);
-
/* Add it for all PMUs that support the alias */
list = malloc(sizeof(struct list_head));
if (!list)
@@ -1687,13 +1687,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
list_for_each_entry(alias, &pmu->aliases, list) {
if (!strcasecmp(alias->name, str)) {
+ parse_events_copy_term_list(head, &orig_head);
if (!parse_events_add_pmu(parse_state, list,
- pmu->name, head,
+ pmu->name, orig_head,
true, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
}
+ parse_events_terms__delete(orig_head);
}
}
}
@@ -2193,7 +2195,7 @@ int perf_pmu__test_parse_init(void)
for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
tmp->type = symbols[i].type;
tmp->symbol = strdup(symbols[i].symbol);
- if (!list->symbol)
+ if (!tmp->symbol)
goto err_free;
}
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 47b7531f51da..98af3fa4ea35 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
- bit_name(HW_INDEX),
+ bit_name(TYPE_SAVE), bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 2c0d30f08e78..498b05708db5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1503,11 +1503,12 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
- dump_sample(evsel, event, sample, perf_env__arch(machine->env));
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
+ dump_sample(evsel, event, sample, perf_env__arch(NULL));
return 0;
}
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index cfba8c337783..2da081ef532b 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *from = &he->branch_info->from;
return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
- he->level, bf, size, width);
+ from->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *to = &he->branch_info->to;
return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
- he->level, bf, size, width);
+ to->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 5db83e51ceef..9cbe351b141f 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -585,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
- if (strcmp(evsel__name(alias), evsel__name(counter)) ||
- alias->scale != counter->scale ||
- alias->cgrp != counter->cgrp ||
- strcmp(alias->unit, counter->unit) ||
- evsel__is_clock(alias) != evsel__is_clock(counter) ||
- !strcmp(alias->pmu_name, counter->pmu_name))
- break;
- alias->merged_stat = true;
- cb(config, alias, data, false);
+ /* Merge events with the same name, etc. but on different PMUs. */
+ if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
+ alias->scale == counter->scale &&
+ alias->cgrp == counter->cgrp &&
+ !strcmp(alias->unit, counter->unit) &&
+ evsel__is_clock(alias) == evsel__is_clock(counter) &&
+ strcmp(alias->pmu_name, counter->pmu_name)) {
+ alias->merged_stat = true;
+ cb(config, alias, data, false);
+ }
}
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2ed3140a1fa..dfde9eada224 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
- if (prev->end == prev->start && prev->end != curr->start)
+ if (prev->end == prev->start || prev->end != curr->start)
arch__symbols__fixup_end(prev, curr);
}
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 70f095624a0b..b654de0841f8 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
perf_event__handler_t process, bool needs_mmap,
bool data_mmap, unsigned int nr_threads_synthesize)
{
+ /*
+ * When perf runs in non-root PID namespace, and the namespace's proc FS
+ * is not mounted, nsinfo__is_in_root_namespace() returns false.
+ * In this case, the proc FS is coming for the parent namespace, thus
+ * perf tool will wrongly gather process info from its parent PID
+ * namespace.
+ *
+ * To avoid the confusion that the perf tool runs in a child PID
+ * namespace but it synthesizes thread info from its parent PID
+ * namespace, returns failure with warning.
+ */
+ if (!nsinfo__is_in_root_namespace()) {
+ pr_err("Perf runs in non-root PID namespace but it tries to ");
+ pr_err("gather process info from its parent PID namespace.\n");
+ pr_err("Please mount the proc file system properly, e.g. ");
+ pr_err("add the option '--mount-proc' for unshare command.\n");
+ return -EPERM;
+ }
+
if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine,
needs_mmap, data_mmap);
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 3b1594447f29..e9b6de314654 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -143,9 +143,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \
utils/helpers/bitmask.h \
utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
-LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
-LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
-LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
+LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h
+LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c
+LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o
LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS))
override CFLAGS += -pipe
diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/TODO
index b196a139a3e4..b196a139a3e4 100644
--- a/tools/power/cpupower/ToDo
+++ b/tools/power/cpupower/TODO
diff --git a/tools/power/cpupower/lib/acpi_cppc.c b/tools/power/cpupower/lib/acpi_cppc.c
new file mode 100644
index 000000000000..c401ac331e9f
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpupower_intern.h"
+#include "acpi_cppc.h"
+
+/* ACPI CPPC sysfs access ***********************************************/
+
+static int acpi_cppc_read_file(unsigned int cpu, const char *fname,
+ char *buf, size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/acpi_cppc/%s",
+ cpu, fname);
+ return cpupower_read_sysfs(path, buf, buflen);
+}
+
+static const char * const acpi_cppc_value_files[] = {
+ [HIGHEST_PERF] = "highest_perf",
+ [LOWEST_PERF] = "lowest_perf",
+ [NOMINAL_PERF] = "nominal_perf",
+ [LOWEST_NONLINEAR_PERF] = "lowest_nonlinear_perf",
+ [LOWEST_FREQ] = "lowest_freq",
+ [NOMINAL_FREQ] = "nominal_freq",
+ [REFERENCE_PERF] = "reference_perf",
+ [WRAPAROUND_TIME] = "wraparound_time"
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu, enum acpi_cppc_value which)
+{
+ unsigned long long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_CPPC_VALUE_FILES)
+ return 0;
+
+ len = acpi_cppc_read_file(cpu, acpi_cppc_value_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return 0;
+
+ value = strtoull(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
diff --git a/tools/power/cpupower/lib/acpi_cppc.h b/tools/power/cpupower/lib/acpi_cppc.h
new file mode 100644
index 000000000000..85ca83080316
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_CPPC_H__
+#define __ACPI_CPPC_H__
+
+enum acpi_cppc_value {
+ HIGHEST_PERF,
+ LOWEST_PERF,
+ NOMINAL_PERF,
+ LOWEST_NONLINEAR_PERF,
+ LOWEST_FREQ,
+ NOMINAL_FREQ,
+ REFERENCE_PERF,
+ WRAPAROUND_TIME,
+ MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+ enum acpi_cppc_value which);
+
+#endif /* _ACPI_CPPC_H */
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index c3b56db8b921..1516d23c17c9 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
[STATS_NUM_TRANSITIONS] = "stats/total_trans"
};
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
- enum cpufreq_value which)
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+ const char **table,
+ unsigned int index,
+ unsigned int size)
{
unsigned long value;
unsigned int len;
char linebuf[MAX_LINE_LEN];
char *endp;
- if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+ if (!table || index >= size || !table[index])
return 0;
- len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
- linebuf, sizeof(linebuf));
+ len = sysfs_cpufreq_read_file(cpu, table[index], linebuf,
+ sizeof(linebuf));
if (len == 0)
return 0;
@@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
return value;
}
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+ enum cpufreq_value which)
+{
+ return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files,
+ which,
+ MAX_CPUFREQ_VALUE_READ_FILES);
+}
+
/* read access to files which contain one string */
enum cpufreq_string {
@@ -124,7 +133,7 @@ static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
- enum cpufreq_string which)
+ enum cpufreq_string which)
{
char linebuf[MAX_LINE_LEN];
char *result;
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 95f4fd9e2656..2f3c84035806 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
int cpufreq_set_frequency(unsigned int cpu,
unsigned long target_frequency);
+/*
+ * get the sysfs value from specific table
+ *
+ * Read the value with the sysfs file name from specific table. Does
+ * only work if the cpufreq driver has the specific sysfs interfaces.
+ */
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+ const char **table,
+ unsigned int index,
+ unsigned int size);
+
#ifdef __cplusplus
}
#endif
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 6aa8d239dff9..dd545b499480 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -53,6 +53,9 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters.
\fB\-n\fR \fB\-\-no-rounding\fR
Output frequencies and latencies without rounding off values.
.TP
+\fB\-c\fR \fB\-\-perf\fR
+Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC).
+.TP
.SH "REMARKS"
.LP
By default only values of core zero are displayed. How to display settings of
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 21916cff7516..8cef3c71e19e 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -4,7 +4,7 @@
cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
.SH "SYNTAX"
.LP
-cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP]
.SH "DESCRIPTION"
.LP
The cpupower idle\-set subcommand allows to set cpu idle, also called cpu
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index f9895e31ff5a..0646f615fe2d 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -84,43 +84,6 @@ static void proc_cpufreq_output(void)
}
static int no_rounding;
-static void print_speed(unsigned long speed)
-{
- unsigned long tmp;
-
- if (no_rounding) {
- if (speed > 1000000)
- printf("%u.%06u GHz", ((unsigned int) speed/1000000),
- ((unsigned int) speed%1000000));
- else if (speed > 1000)
- printf("%u.%03u MHz", ((unsigned int) speed/1000),
- (unsigned int) (speed%1000));
- else
- printf("%lu kHz", speed);
- } else {
- if (speed > 1000000) {
- tmp = speed%10000;
- if (tmp >= 5000)
- speed += 10000;
- printf("%u.%02u GHz", ((unsigned int) speed/1000000),
- ((unsigned int) (speed%1000000)/10000));
- } else if (speed > 100000) {
- tmp = speed%1000;
- if (tmp >= 500)
- speed += 1000;
- printf("%u MHz", ((unsigned int) speed/1000));
- } else if (speed > 1000) {
- tmp = speed%100;
- if (tmp >= 50)
- speed += 100;
- printf("%u.%01u MHz", ((unsigned int) speed/1000),
- ((unsigned int) (speed%1000)/100));
- }
- }
-
- return;
-}
-
static void print_duration(unsigned long duration)
{
unsigned long tmp;
@@ -183,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu)
printf(_(" Supported: %s\n"), support ? _("yes") : _("no"));
printf(_(" Active: %s\n"), active ? _("yes") : _("no"));
- if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
- cpupower_cpu_info.family >= 0x10) ||
- cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+ cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+ return 0;
+ } else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+ cpupower_cpu_info.family >= 0x10) ||
+ cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
ret = decode_pstates(cpu, b_states, pstates, &pstate_no);
if (ret)
return ret;
@@ -254,11 +220,11 @@ static int get_boost_mode(unsigned int cpu)
if (freqs) {
printf(_(" boost frequency steps: "));
while (freqs->next) {
- print_speed(freqs->frequency);
+ print_speed(freqs->frequency, no_rounding);
printf(", ");
freqs = freqs->next;
}
- print_speed(freqs->frequency);
+ print_speed(freqs->frequency, no_rounding);
printf("\n");
cpufreq_put_available_frequencies(freqs);
}
@@ -277,7 +243,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human)
return -EINVAL;
}
if (human) {
- print_speed(freq);
+ print_speed(freq, no_rounding);
} else
printf("%lu", freq);
printf(_(" (asserted by call to kernel)\n"));
@@ -296,7 +262,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human)
return -EINVAL;
}
if (human) {
- print_speed(freq);
+ print_speed(freq, no_rounding);
} else
printf("%lu", freq);
printf(_(" (asserted by call to hardware)\n"));
@@ -316,9 +282,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human)
if (human) {
printf(_(" hardware limits: "));
- print_speed(min);
+ print_speed(min, no_rounding);
printf(" - ");
- print_speed(max);
+ print_speed(max, no_rounding);
printf("\n");
} else {
printf("%lu %lu\n", min, max);
@@ -350,9 +316,9 @@ static int get_policy(unsigned int cpu)
return -EINVAL;
}
printf(_(" current policy: frequency should be within "));
- print_speed(policy->min);
+ print_speed(policy->min, no_rounding);
printf(_(" and "));
- print_speed(policy->max);
+ print_speed(policy->max, no_rounding);
printf(".\n ");
printf(_("The governor \"%s\" may decide which speed to use\n"
@@ -436,7 +402,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human)
struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time);
while (stats) {
if (human) {
- print_speed(stats->frequency);
+ print_speed(stats->frequency, no_rounding);
printf(":%.2f%%",
(100.0 * stats->time_in_state) / total_time);
} else
@@ -472,6 +438,17 @@ static int get_latency(unsigned int cpu, unsigned int human)
return 0;
}
+/* --performance / -c */
+
+static int get_perf_cap(unsigned int cpu)
+{
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+ cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE)
+ amd_pstate_show_perf_and_freq(cpu, no_rounding);
+
+ return 0;
+}
+
static void debug_output_one(unsigned int cpu)
{
struct cpufreq_available_frequencies *freqs;
@@ -486,11 +463,11 @@ static void debug_output_one(unsigned int cpu)
if (freqs) {
printf(_(" available frequency steps: "));
while (freqs->next) {
- print_speed(freqs->frequency);
+ print_speed(freqs->frequency, no_rounding);
printf(", ");
freqs = freqs->next;
}
- print_speed(freqs->frequency);
+ print_speed(freqs->frequency, no_rounding);
printf("\n");
cpufreq_put_available_frequencies(freqs);
}
@@ -500,6 +477,7 @@ static void debug_output_one(unsigned int cpu)
if (get_freq_hardware(cpu, 1) < 0)
get_freq_kernel(cpu, 1);
get_boost_mode(cpu);
+ get_perf_cap(cpu);
}
static struct option info_opts[] = {
@@ -518,6 +496,7 @@ static struct option info_opts[] = {
{"proc", no_argument, NULL, 'o'},
{"human", no_argument, NULL, 'm'},
{"no-rounding", no_argument, NULL, 'n'},
+ {"performance", no_argument, NULL, 'c'},
{ },
};
@@ -531,7 +510,7 @@ int cmd_freq_info(int argc, char **argv)
int output_param = 0;
do {
- ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts,
+ ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts,
NULL);
switch (ret) {
case '?':
@@ -554,6 +533,7 @@ int cmd_freq_info(int argc, char **argv)
case 'e':
case 's':
case 'y':
+ case 'c':
if (output_param) {
output_param = -1;
cont = 0;
@@ -660,6 +640,9 @@ int cmd_freq_info(int argc, char **argv)
case 'y':
ret = get_latency(cpu, human);
break;
+ case 'c':
+ ret = get_perf_cap(cpu);
+ break;
}
if (ret)
return ret;
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 97f2c857048e..c519cc89c97f 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -8,7 +8,10 @@
#include <pci/pci.h>
#include "helpers/helpers.h"
+#include "cpufreq.h"
+#include "acpi_cppc.h"
+/* ACPI P-States Helper Functions for AMD Processors ***************/
#define MSR_AMD_PSTATE_STATUS 0xc0010063
#define MSR_AMD_PSTATE 0xc0010064
#define MSR_AMD_PSTATE_LIMIT 0xc0010061
@@ -146,4 +149,78 @@ int amd_pci_get_num_boost_states(int *active, int *states)
pci_cleanup(pci_acc);
return 0;
}
+
+/* ACPI P-States Helper Functions for AMD Processors ***************/
+
+/* AMD P-State Helper Functions ************************************/
+enum amd_pstate_value {
+ AMD_PSTATE_HIGHEST_PERF,
+ AMD_PSTATE_MAX_FREQ,
+ AMD_PSTATE_LOWEST_NONLINEAR_FREQ,
+ MAX_AMD_PSTATE_VALUE_READ_FILES,
+};
+
+static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = {
+ [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf",
+ [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq",
+ [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq",
+};
+
+static unsigned long amd_pstate_get_data(unsigned int cpu,
+ enum amd_pstate_value value)
+{
+ return cpufreq_get_sysfs_value_from_table(cpu,
+ amd_pstate_value_files,
+ value,
+ MAX_AMD_PSTATE_VALUE_READ_FILES);
+}
+
+void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
+{
+ unsigned long highest_perf, nominal_perf, cpuinfo_min,
+ cpuinfo_max, amd_pstate_max;
+
+ highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF);
+ nominal_perf = acpi_cppc_get_data(cpu, NOMINAL_PERF);
+
+ *support = highest_perf > nominal_perf ? 1 : 0;
+ if (!(*support))
+ return;
+
+ cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max);
+ amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ);
+
+ *active = cpuinfo_max == amd_pstate_max ? 1 : 0;
+}
+
+void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
+{
+ printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "),
+ amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF));
+ /*
+ * If boost isn't active, the cpuinfo_max doesn't indicate real max
+ * frequency. So we read it back from amd-pstate sysfs entry.
+ */
+ print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding);
+ printf(".\n");
+
+ printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "),
+ acpi_cppc_get_data(cpu, NOMINAL_PERF));
+ print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000,
+ no_rounding);
+ printf(".\n");
+
+ printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
+ acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF));
+ print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ),
+ no_rounding);
+ printf(".\n");
+
+ printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "),
+ acpi_cppc_get_data(cpu, LOWEST_PERF));
+ print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding);
+ printf(".\n");
+}
+
+/* AMD P-State Helper Functions ************************************/
#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 72eb43593180..eae91f11d187 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -149,6 +149,19 @@ out:
if (ext_cpuid_level >= 0x80000008 &&
cpuid_ebx(0x80000008) & (1 << 4))
cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU;
+
+ if (cpupower_amd_pstate_enabled()) {
+ cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE;
+
+ /*
+ * If AMD P-State is enabled, the firmware will treat
+ * AMD P-State function as high priority.
+ */
+ cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB;
+ cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR;
+ cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE;
+ cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF;
+ }
}
if (cpu_info->vendor == X86_VENDOR_INTEL) {
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 33ffacee7fcb..96e4bede078b 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -11,6 +11,7 @@
#include <libintl.h>
#include <locale.h>
+#include <stdbool.h>
#include "helpers/bitmask.h"
#include <cpupower.h>
@@ -73,6 +74,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
#define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100
#define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200
#define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400
+#define CPUPOWER_CAP_AMD_PSTATE 0x00000800
#define CPUPOWER_AMD_CPBDIS 0x02000000
@@ -135,6 +137,16 @@ extern int decode_pstates(unsigned int cpu, int boost_states,
extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
int *active, int * states);
+
+/* AMD P-State stuff **************************/
+bool cpupower_amd_pstate_enabled(void);
+void amd_pstate_boost_init(unsigned int cpu,
+ int *support, int *active);
+void amd_pstate_show_perf_and_freq(unsigned int cpu,
+ int no_rounding);
+
+/* AMD P-State stuff **************************/
+
/*
* CPUID functions returning a single datum
*/
@@ -167,6 +179,15 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
int *active, int * states)
{ return -1; }
+static inline bool cpupower_amd_pstate_enabled(void)
+{ return false; }
+static inline void amd_pstate_boost_init(unsigned int cpu, int *support,
+ int *active)
+{}
+static inline void amd_pstate_show_perf_and_freq(unsigned int cpu,
+ int no_rounding)
+{}
+
/* cpuid and cpuinfo helpers **************************/
static inline unsigned int cpuid_eax(unsigned int op) { return 0; };
@@ -184,5 +205,6 @@ extern struct bitmask *offline_cpus;
void get_cpustate(void);
void print_online_cpus(void);
void print_offline_cpus(void);
+void print_speed(unsigned long speed, int no_rounding);
#endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index fc6e34511721..9547b29254a7 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -3,9 +3,11 @@
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
+#include <string.h>
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
+#include "cpufreq.h"
#if defined(__i386__) || defined(__x86_64__)
@@ -39,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
if (ret)
return ret;
}
+ } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+ amd_pstate_boost_init(cpu, support, active);
} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
*support = *active = 1;
return 0;
@@ -83,6 +87,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
return 0;
}
+bool cpupower_amd_pstate_enabled(void)
+{
+ char *driver = cpufreq_get_driver(0);
+ bool ret = false;
+
+ if (!driver)
+ return ret;
+
+ if (!strcmp(driver, "amd-pstate"))
+ ret = true;
+
+ cpufreq_put_driver(driver);
+
+ return ret;
+}
+
#endif /* #if defined(__i386__) || defined(__x86_64__) */
/* get_cpustate
@@ -144,3 +164,43 @@ void print_offline_cpus(void)
printf(_("cpupower set operation was not performed on them\n"));
}
}
+
+/*
+ * print_speed
+ *
+ * Print the exact CPU frequency with appropriate unit
+ */
+void print_speed(unsigned long speed, int no_rounding)
+{
+ unsigned long tmp;
+
+ if (no_rounding) {
+ if (speed > 1000000)
+ printf("%u.%06u GHz", ((unsigned int)speed / 1000000),
+ ((unsigned int)speed % 1000000));
+ else if (speed > 1000)
+ printf("%u.%03u MHz", ((unsigned int)speed / 1000),
+ (unsigned int)(speed % 1000));
+ else
+ printf("%lu kHz", speed);
+ } else {
+ if (speed > 1000000) {
+ tmp = speed % 10000;
+ if (tmp >= 5000)
+ speed += 10000;
+ printf("%u.%02u GHz", ((unsigned int)speed / 1000000),
+ ((unsigned int)(speed % 1000000) / 10000));
+ } else if (speed > 100000) {
+ tmp = speed % 1000;
+ if (tmp >= 500)
+ speed += 1000;
+ printf("%u MHz", ((unsigned int)speed / 1000));
+ } else if (speed > 1000) {
+ tmp = speed % 100;
+ if (tmp >= 50)
+ speed += 100;
+ printf("%u.%01u MHz", ((unsigned int)speed / 1000),
+ ((unsigned int)(speed % 1000) / 100));
+ }
+ }
+}
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
new file mode 100755
index 000000000000..2dea4032ac56
--- /dev/null
+++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State
+trace event.
+
+Prerequisites:
+ Python version 2.7.x or higher
+ gnuplot 5.0 or higher
+ gnuplot-py 1.8 or higher
+ (Most of the distributions have these required packages. They may be called
+ gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+
+ Kernel config for Linux trace is enabled
+
+ see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import signal
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+sys.path.append('../intel_pstate_tracer')
+#import intel_pstate_tracer
+import intel_pstate_tracer as ipt
+
+__license__ = "GPL version 2"
+
+MAX_CPUS = 256
+# Define the csv file columns
+C_COMM = 15
+C_ELAPSED = 14
+C_SAMPLE = 13
+C_DURATION = 12
+C_LOAD = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_FREQ = 7
+C_MAX_PERF = 6
+C_DES_PERF = 5
+C_MIN_PERF = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file
+
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def plot_per_cpu_freq(cpu_index):
+ """ Plot per cpu frequency """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_frequency.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:7]')
+ g_plot('set ytics 0, 1')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set ylabel "CPU frequency"')
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ))
+
+def plot_per_cpu_des_perf(cpu_index):
+ """ Plot per cpu desired perf """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_des_perf.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:255]')
+ g_plot('set ylabel "des perf"')
+ g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF))
+
+def plot_per_cpu_load(cpu_index):
+ """ Plot per cpu load """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_load.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:100]')
+ g_plot('set ytics 0, 10')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_all_cpu_frequency():
+ """ Plot all cpu frequencies """
+
+ output_png = 'all_cpu_frequencies.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_all_cpu_des_perf():
+ """ Plot all cpu desired perf """
+
+ output_png = 'all_cpu_des_perf.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set ylabel "des perf"')
+ g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_all_cpu_load():
+ """ Plot all cpu load """
+
+ output_png = 'all_cpu_load.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask):
+ """ Store master csv file information """
+
+ global graph_data_present
+
+ if cpu_mask[cpu_int] == 0:
+ return
+
+ try:
+ f_handle = open('cpu.csv', 'a')
+ string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm)
+ f_handle.write(string_buffer)
+ f_handle.close()
+ except:
+ print('IO error cpu.csv')
+ return
+
+ graph_data_present = True;
+
+
+def cleanup_data_files():
+ """ clean up existing data files """
+
+ if os.path.exists('cpu.csv'):
+ os.remove('cpu.csv')
+ f_handle = open('cpu.csv', 'a')
+ f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm')
+ f_handle.write('\n')
+ f_handle.close()
+
+def read_trace_data(file_name, cpu_mask):
+ """ Read and parse trace data """
+
+ global current_max_cpu
+ global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+ try:
+ data = open(file_name, 'r').read()
+ except:
+ print('Error opening ', file_name)
+ sys.exit(2)
+
+ for line in data.splitlines():
+ search_obj = \
+ re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)'
+ , line)
+
+ if search_obj:
+ cpu = search_obj.group(3)
+ cpu_int = int(cpu)
+ cpu = str(cpu_int)
+
+ time_pre_dec = search_obj.group(6)
+ time_post_dec = search_obj.group(8)
+ min_perf = search_obj.group(10)
+ des_perf = search_obj.group(12)
+ max_perf = search_obj.group(14)
+ freq = search_obj.group(16)
+ mperf = search_obj.group(18)
+ aperf = search_obj.group(20)
+ tsc = search_obj.group(22)
+
+ common_comm = search_obj.group(2).replace(' ', '')
+
+ if sample_num == 0 :
+ start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+ sample_num += 1
+
+ if last_sec_cpu[cpu_int] == 0 :
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ else :
+ duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+ duration_ms = Decimal(duration_us) / Decimal(1000)
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+ load = Decimal(int(mperf)*100)/ Decimal(tsc)
+ freq_ghz = Decimal(freq)/Decimal(1000000)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask)
+
+ if cpu_int > current_max_cpu:
+ current_max_cpu = cpu_int
+# Now separate the main overall csv file into per CPU csv files.
+ ipt.split_csv(current_max_cpu, cpu_mask)
+
+
+def signal_handler(signal, frame):
+ print(' SIGINT: Forcing cleanup before exit.')
+ if interval:
+ ipt.disable_trace(trace_file)
+ ipt.clear_trace_file()
+ ipt.free_trace_buffer()
+ sys.exit(0)
+
+trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable"
+signal.signal(signal.SIGINT, signal_handler)
+
+interval = ""
+file_name = ""
+cpu_list = ""
+test_name = ""
+memory = "10240"
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+except getopt.GetoptError:
+ ipt.print_help('amd_pstate')
+ sys.exit(2)
+for opt, arg in opts:
+ if opt == '-h':
+ print()
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ file_name = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ test_name = arg
+ elif opt in ("-m", "--memory"):
+ memory = arg
+
+if not (valid1 and valid2):
+ ipt.print_help('amd_pstate')
+ sys.exit()
+
+if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+ os.mkdir('results')
+ ipt.fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(test_name):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+os.mkdir(test_name)
+ipt.fix_ownership(test_name)
+os.chdir(test_name)
+
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+cleanup_data_files()
+
+if interval:
+ file_name = "/sys/kernel/debug/tracing/trace"
+ ipt.clear_trace_file()
+ ipt.set_trace_buffer_size(memory)
+ ipt.enable_trace(trace_file)
+ time.sleep(int(interval))
+ ipt.disable_trace(trace_file)
+
+current_max_cpu = 0
+
+read_trace_data(file_name, cpu_mask)
+
+if interval:
+ ipt.clear_trace_file()
+ ipt.free_trace_buffer()
+
+if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+ plot_per_cpu_freq(cpu_no)
+ plot_per_cpu_des_perf(cpu_no)
+ plot_per_cpu_load(cpu_no)
+
+plot_all_cpu_des_perf()
+plot_all_cpu_frequency()
+plot_all_cpu_load()
+
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ ipt.fix_ownership(f)
+
+os.chdir('../../')
diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build
index b61456d75190..81e36bd578b1 100644
--- a/tools/power/x86/intel-speed-select/Build
+++ b/tools/power/x86/intel-speed-select/Build
@@ -1 +1 @@
-intel-speed-select-y += isst-config.o isst-core.o isst-display.o
+intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 12c6939dca2a..d2fba1297d96 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,8 +13,8 @@ endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override LDFLAGS += -lnl-genl-3 -lnl-3
ALL_TARGETS := intel-speed-select
ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
@@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h
mkdir -p $(OUTPUT)include/linux 2>&1 || true
ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@
-prepare: $(OUTPUT)include/linux/isst_if.h
+$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h
+ mkdir -p $(OUTPUT)include/linux 2>&1 || true
+ ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@
+
+prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h
ISST_IN := $(OUTPUT)intel-speed-select-in.o
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
new file mode 100644
index 000000000000..e85676711372
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Read HFI events for OOB
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+/*
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+
+ * WPA Supplicant - driver interaction with Linux nl80211/cfg80211
+ * Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of
+ * BSD license.
+ *
+ * Requires
+ * libnl-genl-3-dev
+ *
+ * For Fedora/CenOS
+ * dnf install libnl3-devel
+ * For Ubuntu
+ * apt install libnl-3-dev libnl-genl-3-dev
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <linux/thermal.h>
+#include "isst.h"
+
+struct hfi_event_data {
+ struct nl_sock *nl_handle;
+ struct nl_cb *nl_cb;
+};
+
+struct hfi_event_data drv;
+
+static int ack_handler(struct nl_msg *msg, void *arg)
+{
+ int *err = arg;
+ *err = 0;
+ return NL_STOP;
+}
+
+static int finish_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+ *ret = 0;
+ return NL_SKIP;
+}
+
+static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err,
+ void *arg)
+{
+ int *ret = arg;
+ *ret = err->error;
+ return NL_SKIP;
+}
+
+static int seq_check_handler(struct nl_msg *msg, void *arg)
+{
+ return NL_OK;
+}
+
+static int send_and_recv_msgs(struct hfi_event_data *drv,
+ struct nl_msg *msg,
+ int (*valid_handler)(struct nl_msg *, void *),
+ void *valid_data)
+{
+ struct nl_cb *cb;
+ int err = -ENOMEM;
+
+ cb = nl_cb_clone(drv->nl_cb);
+ if (!cb)
+ goto out;
+
+ err = nl_send_auto_complete(drv->nl_handle, msg);
+ if (err < 0)
+ goto out;
+
+ err = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err);
+ nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err);
+
+ if (valid_handler)
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM,
+ valid_handler, valid_data);
+
+ while (err > 0)
+ nl_recvmsgs(drv->nl_handle, cb);
+ out:
+ nl_cb_put(cb);
+ nlmsg_free(msg);
+ return err;
+}
+
+struct family_data {
+ const char *group;
+ int id;
+};
+
+static int family_handler(struct nl_msg *msg, void *arg)
+{
+ struct family_data *res = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int i;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) {
+ struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1];
+ nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp),
+ nla_len(mcgrp), NULL);
+ if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb2[CTRL_ATTR_MCAST_GRP_ID] ||
+ strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]),
+ res->group,
+ nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0)
+ continue;
+ res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ };
+
+ return 0;
+}
+
+static int nl_get_multicast_id(struct hfi_event_data *drv,
+ const char *family, const char *group)
+{
+ struct nl_msg *msg;
+ int ret = -1;
+ struct family_data res = { group, -ENOENT };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+ genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"),
+ 0, 0, CTRL_CMD_GETFAMILY, 0);
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = send_and_recv_msgs(drv, msg, family_handler, &res);
+ msg = NULL;
+ if (ret == 0)
+ ret = res.id;
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return ret;
+}
+
+struct perf_cap {
+ int cpu;
+ int perf;
+ int eff;
+};
+
+static void process_hfi_event(struct perf_cap *perf_cap)
+{
+ process_level_change(perf_cap->cpu);
+}
+
+static int handle_event(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ int ret;
+ struct perf_cap perf_cap;
+
+ ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret);
+ if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) {
+ struct nlattr *cap;
+ int j, index = 0;
+
+ debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n");
+ nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) {
+ switch (index) {
+ case 0:
+ perf_cap.cpu = nla_get_u32(cap);
+ break;
+ case 1:
+ perf_cap.perf = nla_get_u32(cap);
+ break;
+ case 2:
+ perf_cap.eff = nla_get_u32(cap);
+ break;
+ default:
+ break;
+ }
+ ++index;
+ if (index == 3) {
+ index = 0;
+ process_hfi_event(&perf_cap);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int _hfi_exit;
+
+static int check_hf_suport(void)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ __cpuid(6, eax, ebx, ecx, edx);
+ if (eax & BIT(19))
+ return 1;
+
+ return 0;
+}
+
+int hfi_main(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int err = 0;
+ int mcast_id;
+ int no_block = 0;
+
+ if (!check_hf_suport()) {
+ fprintf(stderr, "CPU Doesn't support HFI\n");
+ return -1;
+ }
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "nl_socket_alloc failed\n");
+ return -1;
+ }
+
+ if (genl_connect(sock)) {
+ fprintf(stderr, "genl_connect(sk_event) failed\n");
+ goto free_sock;
+ }
+
+ drv.nl_handle = sock;
+ drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (drv.nl_cb == NULL) {
+ printf("Failed to allocate netlink callbacks");
+ goto free_sock;
+ }
+
+ mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME,
+ THERMAL_GENL_EVENT_GROUP_NAME);
+ if (mcast_id < 0) {
+ fprintf(stderr, "nl_get_multicast_id failed\n");
+ goto free_sock;
+ }
+
+ if (nl_socket_add_membership(sock, mcast_id)) {
+ fprintf(stderr, "nl_socket_add_membership failed");
+ goto free_sock;
+ }
+
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL);
+
+ if (no_block)
+ nl_socket_set_nonblocking(sock);
+
+ debug_printf("hfi is initialized\n");
+
+ while (!_hfi_exit && !err) {
+ err = nl_recvmsgs(sock, cb);
+ debug_printf("nl_recv_message err:%d\n", err);
+ }
+
+ return 0;
+
+ /* Netlink library doesn't have calls to dealloc cb or disconnect */
+free_sock:
+ nl_socket_free(sock);
+
+ return -1;
+}
+
+void hfi_exit(void)
+{
+ _hfi_exit = 1;
+}
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index efe72fa48224..060390e88e37 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,8 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.11";
+static const char *version_str = "v1.12";
+
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -368,7 +369,7 @@ int get_topo_max_cpus(void)
return topo_max_cpus;
}
-static void set_cpu_online_offline(int cpu, int state)
+void set_cpu_online_offline(int cpu, int state)
{
char buffer[128];
int fd, ret;
@@ -409,12 +410,10 @@ static void force_all_cpus_online(void)
unlink("/var/run/isst_cpu_topology.dat");
}
-#define MAX_PACKAGE_COUNT 8
-#define MAX_DIE_PER_PACKAGE 2
-static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
- void *, void *),
- void *arg1, void *arg2, void *arg3,
- void *arg4)
+void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+ void *, void *),
+ void *arg1, void *arg2, void *arg3,
+ void *arg4)
{
int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT];
int pkg_index = 0, i;
@@ -2803,7 +2802,9 @@ static void usage(void)
printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n");
printf("\t[-v|--version] : Print version\n");
-
+ printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n");
+ printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n");
+ printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n");
printf("\nResult format\n");
printf("\tResult display uses a common format for each command:\n");
printf("\tResults are formatted in text/JSON with\n");
@@ -2837,6 +2838,9 @@ static void cmdline(int argc, char **argv)
int opt, force_cpus_online = 0;
int option_index = 0;
int ret;
+ int oob_mode = 0;
+ int poll_interval = -1;
+ int no_daemon = 0;
static struct option long_options[] = {
{ "all-cpus-online", no_argument, 0, 'a' },
@@ -2849,6 +2853,9 @@ static void cmdline(int argc, char **argv)
{ "out", required_argument, 0, 'o' },
{ "retry", required_argument, 0, 'r' },
{ "version", no_argument, 0, 'v' },
+ { "oob", no_argument, 0, 'b' },
+ { "no-daemon", no_argument, 0, 'n' },
+ { "poll-interval", required_argument, 0, 'w' },
{ 0, 0, 0, 0 }
};
@@ -2875,7 +2882,7 @@ static void cmdline(int argc, char **argv)
}
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
+ while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options,
&option_index)) != -1) {
switch (opt) {
case 'a':
@@ -2920,12 +2927,26 @@ static void cmdline(int argc, char **argv)
case 'v':
print_version();
break;
+ case 'b':
+ oob_mode = 1;
+ break;
+ case 'n':
+ no_daemon = 1;
+ break;
+ case 'w':
+ ret = strtol(optarg, &ptr, 10);
+ if (!ret) {
+ fprintf(stderr, "Invalid poll interval count\n");
+ exit(0);
+ }
+ poll_interval = ret;
+ break;
default:
usage();
}
}
- if (optind > (argc - 2)) {
+ if (optind > (argc - 2) && !oob_mode) {
usage();
exit(0);
}
@@ -2936,6 +2957,17 @@ static void cmdline(int argc, char **argv)
set_cpu_present_cpu_mask();
set_cpu_target_cpu_mask();
+ if (oob_mode) {
+ create_cpu_map();
+ if (debug_flag)
+ fprintf(stderr, "OOB mode is enabled in debug mode\n");
+
+ ret = isst_daemon(debug_flag, poll_interval, no_daemon);
+ if (ret)
+ fprintf(stderr, "OOB mode enable failed\n");
+ goto out;
+ }
+
if (!is_clx_n_platform()) {
ret = isst_fill_platform_info();
if (ret)
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
new file mode 100644
index 000000000000..dd372924bc82
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Allow speed select to daemonize
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <time.h>
+
+#include "isst.h"
+
+static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+
+static void init_levels(void)
+{
+ int i, j;
+
+ for (i = 0; i < MAX_PACKAGE_COUNT; ++i)
+ for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j)
+ per_package_levels_info[i][j] = -1;
+}
+
+void process_level_change(int cpu)
+{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ int pkg_id = get_physical_package_id(cpu);
+ int die_id = get_physical_die_id(cpu);
+ struct isst_pkg_ctdp pkg_dev;
+ time_t tm;
+ int ret;
+
+ if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) {
+ debug_printf("Invalid package/die info for cpu:%d\n", cpu);
+ return;
+ }
+
+ tm = time(NULL);
+ if (tm - per_package_levels_tm[pkg_id][die_id] < 2 )
+ return;
+
+ per_package_levels_tm[pkg_id][die_id] = tm;
+
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ debug_printf("Can't get tdp levels for cpu:%d\n", cpu);
+ return;
+ }
+
+ debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu,
+ pkg_id, die_id, pkg_dev.current_level);
+
+ if (pkg_dev.locked) {
+ debug_printf("config TDP s locked \n");
+ return;
+ }
+
+ if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level)
+ return;
+
+ debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n",
+ cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id],
+ pkg_dev.current_level);
+
+ per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level;
+
+ ctdp_level.core_cpumask_size =
+ alloc_cpu_set(&ctdp_level.core_cpumask);
+ ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ free_cpu_set(ctdp_level.core_cpumask);
+ debug_printf("Can't get core_mask:%d\n", cpu);
+ return;
+ }
+
+ if (ctdp_level.cpu_count) {
+ int i, max_cpus = get_topo_max_cpus();
+ for (i = 0; i < max_cpus; ++i) {
+ if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+ continue;
+ if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+ fprintf(stderr, "online cpu %d\n", i);
+ set_cpu_online_offline(i, 1);
+ } else {
+ fprintf(stderr, "offline cpu %d\n", i);
+ set_cpu_online_offline(i, 0);
+ }
+ }
+ }
+
+ free_cpu_set(ctdp_level.core_cpumask);
+}
+
+static void _poll_for_config_change(int cpu, void *arg1, void *arg2,
+ void *arg3, void *arg4)
+{
+ process_level_change(cpu);
+}
+
+static void poll_for_config_change(void)
+{
+ for_each_online_package_in_set(_poll_for_config_change, NULL, NULL,
+ NULL, NULL);
+}
+
+static int done = 0;
+static int pid_file_handle;
+
+static void signal_handler(int sig)
+{
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ done = 1;
+ hfi_exit();
+ exit(0);
+ break;
+ default:
+ break;
+ }
+}
+
+static void daemonize(char *rundir, char *pidfile)
+{
+ int pid, sid, i;
+ char str[10];
+ struct sigaction sig_actions;
+ sigset_t sig_set;
+ int ret;
+
+ if (getppid() == 1)
+ return;
+
+ sigemptyset(&sig_set);
+ sigaddset(&sig_set, SIGCHLD);
+ sigaddset(&sig_set, SIGTSTP);
+ sigaddset(&sig_set, SIGTTOU);
+ sigaddset(&sig_set, SIGTTIN);
+ sigprocmask(SIG_BLOCK, &sig_set, NULL);
+
+ sig_actions.sa_handler = signal_handler;
+ sigemptyset(&sig_actions.sa_mask);
+ sig_actions.sa_flags = 0;
+
+ sigaction(SIGHUP, &sig_actions, NULL);
+ sigaction(SIGTERM, &sig_actions, NULL);
+ sigaction(SIGINT, &sig_actions, NULL);
+
+ pid = fork();
+ if (pid < 0) {
+ /* Could not fork */
+ exit(EXIT_FAILURE);
+ }
+ if (pid > 0)
+ exit(EXIT_SUCCESS);
+
+ umask(027);
+
+ sid = setsid();
+ if (sid < 0)
+ exit(EXIT_FAILURE);
+
+ /* close all descriptors */
+ for (i = getdtablesize(); i >= 0; --i)
+ close(i);
+
+ i = open("/dev/null", O_RDWR);
+ ret = dup(i);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ ret = dup(i);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ ret = chdir(rundir);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600);
+ if (pid_file_handle == -1) {
+ /* Couldn't open lock file */
+ exit(1);
+ }
+ /* Try to lock file */
+#ifdef LOCKF_SUPPORT
+ if (lockf(pid_file_handle, F_TLOCK, 0) == -1) {
+#else
+ if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) {
+#endif
+ /* Couldn't get lock on lock file */
+ fprintf(stderr, "Couldn't get lock file %d\n", getpid());
+ exit(1);
+ }
+ snprintf(str, sizeof(str), "%d\n", getpid());
+ ret = write(pid_file_handle, str, strlen(str));
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ close(i);
+}
+
+int isst_daemon(int debug_mode, int poll_interval, int no_daemon)
+{
+ int ret;
+
+ if (!no_daemon && poll_interval < 0 && !debug_mode) {
+ fprintf(stderr, "OOB mode is enabled and will run as daemon\n");
+ daemonize((char *) "/tmp/",
+ (char *)"/tmp/hfi-events.pid");
+ } else {
+ signal(SIGINT, signal_handler);
+ }
+
+ init_levels();
+
+ if (poll_interval < 0) {
+ ret = hfi_main();
+ if (ret) {
+ fprintf(stderr, "HFI initialization failed\n");
+ }
+ fprintf(stderr, "Must specify poll-interval\n");
+ return ret;
+ }
+
+ debug_printf("Starting loop\n");
+ while (!done) {
+ sleep(poll_interval);
+ poll_for_config_change();
+ }
+
+ return 0;
+}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 1aa15d5ea57c..0796d8c6a882 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -76,6 +76,9 @@
#define DISP_FREQ_MULTIPLIER 100
+#define MAX_PACKAGE_COUNT 8
+#define MAX_DIE_PER_PACKAGE 2
+
struct isst_clos_config {
int pkg_id;
int die_id;
@@ -260,4 +263,14 @@ extern int is_skx_based_platform(void);
extern int is_spr_platform(void);
extern int is_icx_platform(void);
extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
+
+extern void set_cpu_online_offline(int cpu, int state);
+extern void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+ void *, void *),
+ void *arg1, void *arg2, void *arg3,
+ void *arg4);
+extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon);
+extern void process_level_change(int cpu);
+extern int hfi_main(void);
+extern void hfi_exit(void);
#endif
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index e15e20696d17..b46e9eb8f5aa 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -63,7 +63,7 @@ C_USEC = 3
C_SEC = 2
C_CPU = 1
-global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file
# 11 digits covers uptime to 115 days
getcontext().prec = 11
@@ -72,17 +72,17 @@ sample_num =0
last_sec_cpu = [0] * MAX_CPUS
last_usec_cpu = [0] * MAX_CPUS
-def print_help():
- print('intel_pstate_tracer.py:')
+def print_help(driver_name):
+ print('%s_tracer.py:'%driver_name)
print(' Usage:')
print(' If the trace file is available, then to simply parse and plot, use (sudo not required):')
- print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+ print(' ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name)
print(' Or')
- print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+ print(' ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name)
print(' To generate trace file, parse and plot, use (sudo required):')
- print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
+ print(' sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name)
print(' Or')
- print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
+ print(' sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name)
print(' Optional argument:')
print(' cpus: comma separated list of CPUs')
print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
@@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot):
g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
-def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask):
""" Store master csv file information """
global graph_data_present
@@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t
graph_data_present = True;
-def split_csv():
+def split_csv(current_max_cpu, cpu_mask):
""" seperate the all csv file into per CPU csv files. """
- global current_max_cpu
-
if os.path.exists('cpu.csv'):
for index in range(0, current_max_cpu + 1):
if cpu_mask[int(index)] != 0:
@@ -381,27 +379,25 @@ def clear_trace_file():
print('IO error clearing trace file ')
sys.exit(2)
-def enable_trace():
+def enable_trace(trace_file):
""" Enable trace """
try:
- open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
- , 'w').write("1")
+ open(trace_file,'w').write("1")
except:
print('IO error enabling trace ')
sys.exit(2)
-def disable_trace():
+def disable_trace(trace_file):
""" Disable trace """
try:
- open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
- , 'w').write("0")
+ open(trace_file, 'w').write("0")
except:
print('IO error disabling trace ')
sys.exit(2)
-def set_trace_buffer_size():
+def set_trace_buffer_size(memory):
""" Set trace buffer size """
try:
@@ -421,7 +417,7 @@ def free_trace_buffer():
print('IO error freeing trace buffer ')
sys.exit(2)
-def read_trace_data(filename):
+def read_trace_data(filename, cpu_mask):
""" Read and parse trace data """
global current_max_cpu
@@ -481,135 +477,137 @@ def read_trace_data(filename):
tsc_ghz = Decimal(0)
if duration_ms != Decimal(0) :
tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
- store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask)
if cpu_int > current_max_cpu:
current_max_cpu = cpu_int
# End of for each trace line loop
# Now seperate the main overall csv file into per CPU csv files.
- split_csv()
+ split_csv(current_max_cpu, cpu_mask)
def signal_handler(signal, frame):
print(' SIGINT: Forcing cleanup before exit.')
if interval:
- disable_trace()
+ disable_trace(trace_file)
clear_trace_file()
# Free the memory
free_trace_buffer()
sys.exit(0)
-signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+ trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable"
+ signal.signal(signal.SIGINT, signal_handler)
-interval = ""
-filename = ""
-cpu_list = ""
-testname = ""
-memory = "10240"
-graph_data_present = False;
+ interval = ""
+ filename = ""
+ cpu_list = ""
+ testname = ""
+ memory = "10240"
+ graph_data_present = False;
-valid1 = False
-valid2 = False
+ valid1 = False
+ valid2 = False
-cpu_mask = zeros((MAX_CPUS,), dtype=int)
+ cpu_mask = zeros((MAX_CPUS,), dtype=int)
-try:
- opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
-except getopt.GetoptError:
- print_help()
- sys.exit(2)
-for opt, arg in opts:
- if opt == '-h':
- print()
+ try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+ except getopt.GetoptError:
+ print_help('intel_pstate')
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print_help('intel_pstate')
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ filename = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ testname = arg
+ elif opt in ("-m", "--memory"):
+ memory = arg
+
+ if not (valid1 and valid2):
+ print_help('intel_pstate')
sys.exit()
- elif opt in ("-t", "--trace_file"):
- valid1 = True
- location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
- filename = os.path.join(location, arg)
- elif opt in ("-i", "--interval"):
- valid1 = True
- interval = arg
- elif opt in ("-c", "--cpu"):
- cpu_list = arg
- elif opt in ("-n", "--name"):
- valid2 = True
- testname = arg
- elif opt in ("-m", "--memory"):
- memory = arg
-
-if not (valid1 and valid2):
- print_help()
- sys.exit()
-
-if cpu_list:
- for p in re.split("[,]", cpu_list):
- if int(p) < MAX_CPUS :
- cpu_mask[int(p)] = 1
-else:
- for i in range (0, MAX_CPUS):
- cpu_mask[i] = 1
-
-if not os.path.exists('results'):
- os.mkdir('results')
+
+ if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+ else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+ if not os.path.exists('results'):
+ os.mkdir('results')
+ # The regular user needs to own the directory, not root.
+ fix_ownership('results')
+
+ os.chdir('results')
+ if os.path.exists(testname):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+ os.mkdir(testname)
# The regular user needs to own the directory, not root.
- fix_ownership('results')
-
-os.chdir('results')
-if os.path.exists(testname):
- print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
- sys.exit()
-os.mkdir(testname)
-# The regular user needs to own the directory, not root.
-fix_ownership(testname)
-os.chdir(testname)
-
-# Temporary (or perhaps not)
-cur_version = sys.version_info
-print('python version (should be >= 2.7):')
-print(cur_version)
-
-# Left as "cleanup" for potential future re-run ability.
-cleanup_data_files()
-
-if interval:
- filename = "/sys/kernel/debug/tracing/trace"
- clear_trace_file()
- set_trace_buffer_size()
- enable_trace()
- print('Sleeping for ', interval, 'seconds')
- time.sleep(int(interval))
- disable_trace()
-
-current_max_cpu = 0
-
-read_trace_data(filename)
-
-if interval:
- clear_trace_file()
- # Free the memory
- free_trace_buffer()
-
-if graph_data_present == False:
- print('No valid data to plot')
- sys.exit(2)
-
-for cpu_no in range(0, current_max_cpu + 1):
- plot_perf_busy_with_sample(cpu_no)
- plot_perf_busy(cpu_no)
- plot_durations(cpu_no)
- plot_loads(cpu_no)
-
-plot_pstate_cpu_with_sample()
-plot_pstate_cpu()
-plot_load_cpu()
-plot_frequency_cpu()
-plot_duration_cpu()
-plot_scaled_cpu()
-plot_boost_cpu()
-plot_ghz_cpu()
-
-# It is preferrable, but not necessary, that the regular user owns the files, not root.
-for root, dirs, files in os.walk('.'):
- for f in files:
- fix_ownership(f)
-
-os.chdir('../../')
+ fix_ownership(testname)
+ os.chdir(testname)
+
+ # Temporary (or perhaps not)
+ cur_version = sys.version_info
+ print('python version (should be >= 2.7):')
+ print(cur_version)
+
+ # Left as "cleanup" for potential future re-run ability.
+ cleanup_data_files()
+
+ if interval:
+ filename = "/sys/kernel/debug/tracing/trace"
+ clear_trace_file()
+ set_trace_buffer_size(memory)
+ enable_trace(trace_file)
+ print('Sleeping for ', interval, 'seconds')
+ time.sleep(int(interval))
+ disable_trace(trace_file)
+
+ current_max_cpu = 0
+
+ read_trace_data(filename, cpu_mask)
+
+ if interval:
+ clear_trace_file()
+ # Free the memory
+ free_trace_buffer()
+
+ if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+ for cpu_no in range(0, current_max_cpu + 1):
+ plot_perf_busy_with_sample(cpu_no)
+ plot_perf_busy(cpu_no)
+ plot_durations(cpu_no)
+ plot_loads(cpu_no)
+
+ plot_pstate_cpu_with_sample()
+ plot_pstate_cpu()
+ plot_load_cpu()
+ plot_frequency_cpu()
+ plot_duration_cpu()
+ plot_scaled_cpu()
+ plot_boost_cpu()
+ plot_ghz_cpu()
+
+ # It is preferrable, but not necessary, that the regular user owns the files, not root.
+ for root, dirs, files in os.walk('.'):
+ for f in files:
+ fix_ownership(f)
+
+ os.chdir('../../')
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 47d3ba895d6d..bc5ae0872fed 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2323,7 +2323,7 @@ int skx_pkg_cstate_limits[16] =
};
int icx_pkg_cstate_limits[16] =
- { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
PCLRSV, PCLRSV
};
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index b0be5f40a3f1..79d102304470 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -90,7 +90,7 @@ EXTRA_WARNINGS += -Wstrict-aliasing=3
else ifneq ($(CROSS_COMPILE),)
CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%))
-GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc))
+GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null))
ifneq ($(GCC_TOOLCHAIN_DIR),)
CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE))
CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 44bbe54f25f1..3c4196cef3ed 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -6,6 +6,7 @@
# Author: Felix Guo <felixguoxiuping@gmail.com>
# Author: Brendan Higgins <brendanhiggins@google.com>
+import importlib.abc
import importlib.util
import logging
import subprocess
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
index 4f32133ed77c..13d854afca9d 100755
--- a/tools/testing/kunit/run_checks.py
+++ b/tools/testing/kunit/run_checks.py
@@ -61,7 +61,7 @@ def main(argv: Sequence[str]) -> None:
elif isinstance(ex, subprocess.CalledProcessError):
print(f'{name}: FAILED')
else:
- print('{name}: unexpected exception: {ex}')
+ print(f'{name}: unexpected exception: {ex}')
continue
output = ex.output
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 16ec895bbe5f..5bd9e6e80625 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page)
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
-#define preemptible() (1)
+#define pagefault_disabled() (0)
static inline void *kmap(struct page *page)
{
@@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
#define kmemleak_free(a)
#define PageSlab(p) (0)
+#define flush_dcache_page(p)
#define MAX_ERRNO 4095
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index d8eeeafb50dc..1e13b7523918 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,7 +18,6 @@
#include "../../kselftest.h"
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
extern void do_syscall(int sve_vl);
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index af798b9d232c..4c418b2021e0 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -21,8 +21,6 @@
#include "../../kselftest.h"
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
-
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
#define NT_ARM_SVE 0x405
@@ -261,7 +259,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
}
ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
- "Set FPSIMD registers via %s\n", type->name);
+ "Got FPSIMD registers via %s\n", type->name);
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
goto out;
@@ -489,6 +487,8 @@ static int do_parent(pid_t child)
unsigned int vq, vl;
bool vl_supported;
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
/* Attach to the child */
while (1) {
int sig;
@@ -557,7 +557,14 @@ static int do_parent(pid_t child)
}
/* prctl() flags */
- ptrace_set_get_inherit(child, &vec_types[i]);
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_get_inherit(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n",
+ vec_types[i].name);
+ }
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
index a876db1f096a..325bca0de0f6 100644
--- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -19,17 +19,6 @@
#include "kselftest.h"
#include "mte_common_util.h"
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
-# define PR_MTE_TCF_SHIFT 1
-# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TAG_SHIFT 3
-# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
-
#include "mte_def.h"
#define NUM_ITERATIONS 1024
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 1de7a0abd0ae..f4ae5f87a3b7 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -11,6 +12,7 @@
#include <string.h>
#include <ucontext.h>
#include <unistd.h>
+#include <sys/uio.h>
#include <sys/mman.h>
#include "kselftest.h"
@@ -19,14 +21,28 @@
static size_t page_sz;
-static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+#define TEST_NAME_MAX 100
+
+enum test_type {
+ READ_TEST,
+ WRITE_TEST,
+ READV_TEST,
+ WRITEV_TEST,
+ LAST_TEST,
+};
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+ int tag_offset, int tag_len,
+ enum test_type test_type)
{
int fd, i, err;
char val = 'A';
- size_t len, read_len;
+ ssize_t len, syscall_len;
void *ptr, *ptr_next;
+ int fileoff, ptroff, size;
+ int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
- err = KSFT_FAIL;
+ err = KSFT_PASS;
len = 2 * page_sz;
mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
fd = create_temp_file();
@@ -43,9 +59,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
}
mte_initialize_current_context(mode, (uintptr_t)ptr, len);
/* Copy from file into buffer with valid tag */
- read_len = read(fd, ptr, len);
+ syscall_len = read(fd, ptr, len);
mte_wait_after_trig();
- if (cur_mte_cxt.fault_valid || read_len < len)
+ if (cur_mte_cxt.fault_valid || syscall_len < len)
goto usermem_acc_err;
/* Verify same pattern is read */
for (i = 0; i < len; i++)
@@ -54,36 +70,136 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
if (i < len)
goto usermem_acc_err;
- /* Tag the next half of memory with different value */
- ptr_next = (void *)((unsigned long)ptr + page_sz);
+ if (!tag_len)
+ tag_len = len - tag_offset;
+ /* Tag a part of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + tag_offset);
ptr_next = mte_insert_new_tag(ptr_next);
- mte_set_tag_address_range(ptr_next, page_sz);
+ mte_set_tag_address_range(ptr_next, tag_len);
- lseek(fd, 0, 0);
- /* Copy from file into buffer with invalid tag */
- read_len = read(fd, ptr, len);
- mte_wait_after_trig();
- /*
- * Accessing user memory in kernel with invalid tag should fail in sync
- * mode without fault but may not fail in async mode as per the
- * implemented MTE userspace support in Arm64 kernel.
- */
- if (mode == MTE_SYNC_ERR &&
- !cur_mte_cxt.fault_valid && read_len < len) {
- err = KSFT_PASS;
- } else if (mode == MTE_ASYNC_ERR &&
- !cur_mte_cxt.fault_valid && read_len == len) {
- err = KSFT_PASS;
+ for (fileoff = 0; fileoff < 16; fileoff++) {
+ for (ptroff = 0; ptroff < 16; ptroff++) {
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ size = sizes[i];
+ lseek(fd, 0, 0);
+
+ /* perform file operation on buffer with invalid tag */
+ switch (test_type) {
+ case READ_TEST:
+ syscall_len = read(fd, ptr + ptroff, size);
+ break;
+ case WRITE_TEST:
+ syscall_len = write(fd, ptr + ptroff, size);
+ break;
+ case READV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = readv(fd, iov, 1);
+ break;
+ }
+ case WRITEV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = writev(fd, iov, 1);
+ break;
+ }
+ case LAST_TEST:
+ goto usermem_acc_err;
+ }
+
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (cur_mte_cxt.fault_valid) {
+ goto usermem_acc_err;
+ }
+ if (mode == MTE_SYNC_ERR && syscall_len < len) {
+ /* test passed */
+ } else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
+ /* test passed */
+ } else {
+ goto usermem_acc_err;
+ }
+ }
+ }
}
+
+ goto exit;
+
usermem_acc_err:
+ err = KSFT_FAIL;
+exit:
mte_free_memory((void *)ptr, len, mem_type, true);
close(fd);
return err;
}
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+ const char* test_type;
+ const char* mte_type;
+ const char* map_type;
+
+ switch (type) {
+ case READ_TEST:
+ test_type = "read";
+ break;
+ case WRITE_TEST:
+ test_type = "write";
+ break;
+ case READV_TEST:
+ test_type = "readv";
+ break;
+ case WRITEV_TEST:
+ test_type = "writev";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (sync) {
+ case MTE_SYNC_ERR:
+ mte_type = "MTE_SYNC_ERR";
+ break;
+ case MTE_ASYNC_ERR:
+ mte_type = "MTE_ASYNC_ERR";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (map) {
+ case MAP_SHARED:
+ map_type = "MAP_SHARED";
+ break;
+ case MAP_PRIVATE:
+ map_type = "MAP_PRIVATE";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(name, name_len,
+ "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+ test_type, mte_type, map_type, len, offset);
+}
+
int main(int argc, char *argv[])
{
int err;
+ int t, s, m, l, o;
+ int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+ int maps[] = {MAP_SHARED, MAP_PRIVATE};
+ int tag_lens[] = {0, MT_GRANULE_SIZE};
+ int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+ char test_name[TEST_NAME_MAX];
page_sz = getpagesize();
if (!page_sz) {
@@ -98,17 +214,28 @@ int main(int argc, char *argv[])
mte_register_signal(SIGSEGV, mte_default_handler);
/* Set test plan */
- ksft_set_plan(4);
+ ksft_set_plan(64);
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check memory access from kernel in sync mode, private mapping and mmap memory\n");
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
- "Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
-
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
- "Check memory access from kernel in async mode, private mapping and mmap memory\n");
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
- "Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+ for (t = 0; t < LAST_TEST; t++) {
+ for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+ for (m = 0; m < ARRAY_SIZE(maps); m++) {
+ for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+ for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+ int sync = mte_sync[s];
+ int map = maps[m];
+ int offset = tag_offsets[o];
+ int tag_len = tag_lens[l];
+ int res = check_usermem_access_fault(USE_MMAP, sync,
+ map, offset,
+ tag_len, t);
+ format_test_name(test_name, TEST_NAME_MAX,
+ t, sync, map, tag_len, offset);
+ evaluate_test(res, test_name);
+ }
+ }
+ }
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index ebe8694dbef0..f909b70d9e98 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -53,6 +53,7 @@ struct tdescr {
char *name;
char *descr;
unsigned long feats_required;
+ unsigned long feats_incompatible;
/* bitmask of effectively supported feats: populated at run-time */
unsigned long feats_supported;
bool initialized;
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2f8c23af3b5e..5743897984b0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats)
{
size_t flen = MAX_FEATS_SZ - 1;
+ feats_string[0] = '\0';
+
for (int i = 0; i < FMAX_END; i++) {
if (feats & (1UL << i)) {
size_t tlen = strlen(feats_names[i]);
@@ -256,7 +258,7 @@ int test_init(struct tdescr *td)
td->minsigstksz = MINSIGSTKSZ;
fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
- if (td->feats_required) {
+ if (td->feats_required || td->feats_incompatible) {
td->feats_supported = 0;
/*
* Checking for CPU required features using both the
@@ -267,15 +269,29 @@ int test_init(struct tdescr *td)
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
if (feats_ok(td)) {
- fprintf(stderr,
- "Required Features: [%s] supported\n",
- feats_to_string(td->feats_required &
- td->feats_supported));
+ if (td->feats_required & td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] supported\n",
+ feats_to_string(td->feats_required &
+ td->feats_supported));
+ if (!(td->feats_incompatible & td->feats_supported))
+ fprintf(stderr,
+ "Incompatible Features: [%s] absent\n",
+ feats_to_string(td->feats_incompatible));
} else {
- fprintf(stderr,
- "Required Features: [%s] NOT supported\n",
- feats_to_string(td->feats_required &
- ~td->feats_supported));
+ if ((td->feats_required & td->feats_supported) !=
+ td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] NOT supported\n",
+ feats_to_string(td->feats_required &
+ ~td->feats_supported));
+ if (td->feats_incompatible & td->feats_supported)
+ fprintf(stderr,
+ "Incompatible Features: [%s] supported\n",
+ feats_to_string(td->feats_incompatible &
+ ~td->feats_supported));
+
+
td->result = KSFT_SKIP;
return 0;
}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274..f3aa99ba67bb 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -18,6 +18,8 @@ void test_result(struct tdescr *td);
static inline bool feats_ok(struct tdescr *td)
{
+ if (td->feats_incompatible & td->feats_supported)
+ return false;
return (td->feats_required & td->feats_supported) == td->feats_required;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
new file mode 100644
index 000000000000..f74b82305da8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "timer_crash.skel.h"
+
+enum {
+ MODE_ARRAY,
+ MODE_HASH,
+};
+
+static void test_timer_crash_mode(int mode)
+{
+ struct timer_crash *skel;
+
+ skel = timer_crash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
+ return;
+ skel->bss->pid = getpid();
+ skel->bss->crash_map = mode;
+ if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach"))
+ goto end;
+ usleep(1);
+end:
+ timer_crash__destroy(skel);
+}
+
+void test_timer_crash(void)
+{
+ if (test__start_subtest("array"))
+ test_timer_crash_mode(MODE_ARRAY);
+ if (test__start_subtest("hash"))
+ test_timer_crash_mode(MODE_HASH);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 2966564b8497..6c85b00f27b2 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -235,7 +235,7 @@ SEC("sk_msg1")
int bpf_prog4(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int *start, *end, *start_push, *end_push, *start_pop, *pop;
+ int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -249,8 +249,11 @@ int bpf_prog4(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
@@ -263,6 +266,7 @@ int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+ int err = 0;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -279,8 +283,11 @@ int bpf_prog6(struct sk_msg_md *msg)
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
@@ -338,7 +345,7 @@ SEC("sk_msg5")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -352,8 +359,11 @@ int bpf_prog10(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_PASS;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c
new file mode 100644
index 000000000000..f8f7944e70da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_crash.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_elem {
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} amap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} hmap SEC(".maps");
+
+int pid = 0;
+int crash_map = 0; /* 0 for amap, 1 for hmap */
+
+SEC("fentry/do_nanosleep")
+int sys_enter(void *ctx)
+{
+ struct map_elem *e, value = {};
+ void *map = crash_map ? (void *)&hmap : (void *)&amap;
+
+ if (bpf_get_current_task_btf()->tgid != pid)
+ return 0;
+
+ *(void **)&value = (void *)0xdeadcaf3;
+
+ bpf_map_update_elem(map, &(int){0}, &value, 0);
+ /* For array map, doing bpf_map_update_elem will do a
+ * check_and_free_timer_in_array, which will trigger the crash if timer
+ * pointer was overwritten, for hmap we need to use bpf_timer_cancel.
+ */
+ if (crash_map == 1) {
+ e = bpf_map_lookup_elem(map, &(int){0});
+ if (!e)
+ return 0;
+ bpf_timer_cancel(&e->timer);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index ebf68dce5504..2893e9f2f1e0 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -28,7 +28,6 @@
// 5. We can read keycode from same /dev/lirc device
#include <linux/bpf.h>
-#include <linux/lirc.h>
#include <linux/input.h>
#include <errno.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 0cf7e90c0052..dbaa7aabbb4a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -583,7 +583,7 @@ int clone_into_cgroup_run_wait(const char *cgroup)
return 0;
}
-int cg_prepare_for_wait(const char *cgroup)
+static int __prepare_for_wait(const char *cgroup, const char *filename)
{
int fd, ret = -1;
@@ -591,8 +591,7 @@ int cg_prepare_for_wait(const char *cgroup)
if (fd == -1)
return fd;
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
+ ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
if (ret == -1) {
close(fd);
fd = -1;
@@ -601,6 +600,16 @@ int cg_prepare_for_wait(const char *cgroup)
return fd;
}
+int cg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "cgroup.events");
+}
+
+int memcg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "memory.events");
+}
+
int cg_wait_for(int fd)
{
int ret = -1;
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 4f66d10626d2..628738532ac9 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -55,4 +55,5 @@ extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
extern int cg_prepare_for_wait(const char *cgroup);
+extern int memcg_prepare_for_wait(const char *cgroup);
extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c19a97dd02d4..36ccf2322e21 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -16,6 +16,7 @@
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
+#include <sys/mman.h>
#include "../kselftest.h"
#include "cgroup_util.h"
@@ -628,6 +629,82 @@ cleanup:
return ret;
}
+static int alloc_anon_mlock(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ void *buf;
+
+ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ 0, 0);
+ if (buf == MAP_FAILED)
+ return -1;
+
+ mlock(buf, size);
+ munmap(buf, size);
+ return 0;
+}
+
+/*
+ * This test checks that memory.high is able to throttle big single shot
+ * allocation i.e. large allocation within one kernel entry.
+ */
+static int test_memcg_high_sync(const char *root)
+{
+ int ret = KSFT_FAIL, pid, fd = -1;
+ char *memcg;
+ long pre_high, pre_max;
+ long post_high, post_max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ pre_high = cg_read_key_long(memcg, "memory.events", "high ");
+ pre_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (pre_high < 0 || pre_max < 0)
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "140M"))
+ goto cleanup;
+
+ fd = memcg_prepare_for_wait(memcg);
+ if (fd < 0)
+ goto cleanup;
+
+ pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
+ if (pid < 0)
+ goto cleanup;
+
+ cg_wait_for(fd);
+
+ post_high = cg_read_key_long(memcg, "memory.events", "high ");
+ post_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (post_high < 0 || post_max < 0)
+ goto cleanup;
+
+ if (pre_high == post_high || pre_max != post_max)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
/*
* This test checks that memory.max limits the amount of
* memory which can be consumed by either anonymous memory
@@ -1180,6 +1257,7 @@ struct memcg_test {
T(test_memcg_min),
T(test_memcg_low),
T(test_memcg_high),
+ T(test_memcg_high_sync),
T(test_memcg_max),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index 076cf4325f78..cd4582129c7d 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -126,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
int main(int argc, char *argv[])
{
- pid_t pid;
-
uid_t uid = getuid();
ksft_print_header();
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 31f8c9a76c5f..60ce18ed0666 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -194,5 +194,5 @@ prerequisite
# Run requested functions
clear_dumps $OUTFILE
-do_test >> $OUTFILE.txt
+do_test | tee -a $OUTFILE.txt
dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 937d36ae9a69..0470c5f3e690 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -6,5 +6,6 @@ TEST_GEN_FILES += huge_count_read_write
TEST_FILES = _chk_dependency.sh _debugfs_common.sh
TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
+TEST_PROGS += sysfs.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
new file mode 100644
index 000000000000..2e3ae77cb6db
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest frmework requirement - SKIP code is 4.
+ksft_skip=4
+
+ensure_write_succ()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if ! echo "$content" > "$file"
+ then
+ echo "writing $content to $file failed"
+ echo "expected success because $reason"
+ exit 1
+ fi
+}
+
+ensure_write_fail()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if echo "$content" > "$file"
+ then
+ echo "writing $content to $file succeed ($fail_reason)"
+ echo "expected failure because $reason"
+ exit 1
+ fi
+}
+
+ensure_dir()
+{
+ dir=$1
+ to_ensure=$2
+ if [ "$to_ensure" = "exist" ] && [ ! -d "$dir" ]
+ then
+ echo "$dir dir is expected but not found"
+ exit 1
+ elif [ "$to_ensure" = "not_exist" ] && [ -d "$dir" ]
+ then
+ echo "$dir dir is not expected but found"
+ exit 1
+ fi
+}
+
+ensure_file()
+{
+ file=$1
+ to_ensure=$2
+ permission=$3
+ if [ "$to_ensure" = "exist" ]
+ then
+ if [ ! -f "$file" ]
+ then
+ echo "$file is expected but not found"
+ exit 1
+ fi
+ perm=$(stat -c "%a" "$file")
+ if [ ! "$perm" = "$permission" ]
+ then
+ echo "$file permission: expected $permission but $perm"
+ exit 1
+ fi
+ elif [ "$to_ensure" = "not_exist" ] && [ -f "$dir" ]
+ then
+ echo "$file is not expected but found"
+ exit 1
+ fi
+}
+
+test_range()
+{
+ range_dir=$1
+ ensure_dir "$range_dir" "exist"
+ ensure_file "$range_dir/min" "exist" 600
+ ensure_file "$range_dir/max" "exist" 600
+}
+
+test_stats()
+{
+ stats_dir=$1
+ ensure_dir "$stats_dir" "exist"
+ for f in nr_tried sz_tried nr_applied sz_applied qt_exceeds
+ do
+ ensure_file "$stats_dir/$f" "exist" "400"
+ done
+}
+
+test_watermarks()
+{
+ watermarks_dir=$1
+ ensure_dir "$watermarks_dir" "exist"
+ ensure_file "$watermarks_dir/metric" "exist" "600"
+ ensure_file "$watermarks_dir/interval_us" "exist" "600"
+ ensure_file "$watermarks_dir/high" "exist" "600"
+ ensure_file "$watermarks_dir/mid" "exist" "600"
+ ensure_file "$watermarks_dir/low" "exist" "600"
+}
+
+test_weights()
+{
+ weights_dir=$1
+ ensure_dir "$weights_dir" "exist"
+ ensure_file "$weights_dir/sz_permil" "exist" "600"
+ ensure_file "$weights_dir/nr_accesses_permil" "exist" "600"
+ ensure_file "$weights_dir/age_permil" "exist" "600"
+}
+
+test_quotas()
+{
+ quotas_dir=$1
+ ensure_dir "$quotas_dir" "exist"
+ ensure_file "$quotas_dir/ms" "exist" 600
+ ensure_file "$quotas_dir/bytes" "exist" 600
+ ensure_file "$quotas_dir/reset_interval_ms" "exist" 600
+ test_weights "$quotas_dir/weights"
+}
+
+test_access_pattern()
+{
+ access_pattern_dir=$1
+ ensure_dir "$access_pattern_dir" "exist"
+ test_range "$access_pattern_dir/age"
+ test_range "$access_pattern_dir/nr_accesses"
+ test_range "$access_pattern_dir/sz"
+}
+
+test_scheme()
+{
+ scheme_dir=$1
+ ensure_dir "$scheme_dir" "exist"
+ ensure_file "$scheme_dir/action" "exist" "600"
+ test_access_pattern "$scheme_dir/access_pattern"
+ test_quotas "$scheme_dir/quotas"
+ test_watermarks "$scheme_dir/watermarks"
+ test_stats "$scheme_dir/stats"
+}
+
+test_schemes()
+{
+ schemes_dir=$1
+ ensure_dir "$schemes_dir" "exist"
+ ensure_file "$schemes_dir/nr_schemes" "exist" 600
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "1" "valid input"
+ test_scheme "$schemes_dir/0"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "2" "valid input"
+ test_scheme "$schemes_dir/0"
+ test_scheme "$schemes_dir/1"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "0" "valid input"
+ ensure_dir "$schemes_dir/0" "not_exist"
+ ensure_dir "$schemes_dir/1" "not_exist"
+}
+
+test_region()
+{
+ region_dir=$1
+ ensure_dir "$region_dir" "exist"
+ ensure_file "$region_dir/start" "exist" 600
+ ensure_file "$region_dir/end" "exist" 600
+}
+
+test_regions()
+{
+ regions_dir=$1
+ ensure_dir "$regions_dir" "exist"
+ ensure_file "$regions_dir/nr_regions" "exist" 600
+
+ ensure_write_succ "$regions_dir/nr_regions" "1" "valid input"
+ test_region "$regions_dir/0"
+
+ ensure_write_succ "$regions_dir/nr_regions" "2" "valid input"
+ test_region "$regions_dir/0"
+ test_region "$regions_dir/1"
+
+ ensure_write_succ "$regions_dir/nr_regions" "0" "valid input"
+ ensure_dir "$regions_dir/0" "not_exist"
+ ensure_dir "$regions_dir/1" "not_exist"
+}
+
+test_target()
+{
+ target_dir=$1
+ ensure_dir "$target_dir" "exist"
+ ensure_file "$target_dir/pid_target" "exist" "600"
+ test_regions "$target_dir/regions"
+}
+
+test_targets()
+{
+ targets_dir=$1
+ ensure_dir "$targets_dir" "exist"
+ ensure_file "$targets_dir/nr_targets" "exist" 600
+
+ ensure_write_succ "$targets_dir/nr_targets" "1" "valid input"
+ test_target "$targets_dir/0"
+
+ ensure_write_succ "$targets_dir/nr_targets" "2" "valid input"
+ test_target "$targets_dir/0"
+ test_target "$targets_dir/1"
+
+ ensure_write_succ "$targets_dir/nr_targets" "0" "valid input"
+ ensure_dir "$targets_dir/0" "not_exist"
+ ensure_dir "$targets_dir/1" "not_exist"
+}
+
+test_intervals()
+{
+ intervals_dir=$1
+ ensure_dir "$intervals_dir" "exist"
+ ensure_file "$intervals_dir/aggr_us" "exist" "600"
+ ensure_file "$intervals_dir/sample_us" "exist" "600"
+ ensure_file "$intervals_dir/update_us" "exist" "600"
+}
+
+test_monitoring_attrs()
+{
+ monitoring_attrs_dir=$1
+ ensure_dir "$monitoring_attrs_dir" "exist"
+ test_intervals "$monitoring_attrs_dir/intervals"
+ test_range "$monitoring_attrs_dir/nr_regions"
+}
+
+test_context()
+{
+ context_dir=$1
+ ensure_dir "$context_dir" "exist"
+ ensure_file "$context_dir/operations" "exist" 600
+ test_monitoring_attrs "$context_dir/monitoring_attrs"
+ test_targets "$context_dir/targets"
+ test_schemes "$context_dir/schemes"
+}
+
+test_contexts()
+{
+ contexts_dir=$1
+ ensure_dir "$contexts_dir" "exist"
+ ensure_file "$contexts_dir/nr_contexts" "exist" 600
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "1" "valid input"
+ test_context "$contexts_dir/0"
+
+ ensure_write_fail "$contexts_dir/nr_contexts" "2" "only 0/1 are supported"
+ test_context "$contexts_dir/0"
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "0" "valid input"
+ ensure_dir "$contexts_dir/0" "not_exist"
+}
+
+test_kdamond()
+{
+ kdamond_dir=$1
+ ensure_dir "$kdamond_dir" "exist"
+ ensure_file "$kdamond_dir/state" "exist" "600"
+ ensure_file "$kdamond_dir/pid" "exist" 400
+ test_contexts "$kdamond_dir/contexts"
+}
+
+test_kdamonds()
+{
+ kdamonds_dir=$1
+ ensure_dir "$kdamonds_dir" "exist"
+
+ ensure_file "$kdamonds_dir/nr_kdamonds" "exist" "600"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "1" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "2" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+ test_kdamond "$kdamonds_dir/1"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "0" "valid input"
+ ensure_dir "$kdamonds_dir/0" "not_exist"
+ ensure_dir "$kdamonds_dir/1" "not_exist"
+}
+
+test_damon_sysfs()
+{
+ damon_sysfs=$1
+ if [ ! -d "$damon_sysfs" ]
+ then
+ echo "$damon_sysfs not found"
+ exit $ksft_skip
+ fi
+
+ test_kdamonds "$damon_sysfs/kdamonds"
+}
+
+check_dependencies()
+{
+ if [ $EUID -ne 0 ]
+ then
+ echo "Run as root"
+ exit $ksft_skip
+ fi
+}
+
+check_dependencies
+test_damon_sysfs "/sys/kernel/mm/damon/admin"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index bcb110e830ce..dea33dc93790 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -50,8 +50,8 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
else
log_test "'$current_test' [$profile] overflow $target"
fi
+ RET_FIN=$(( RET_FIN || RET ))
done
- RET_FIN=$(( RET_FIN || RET ))
done
done
current_test=""
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
index 3e3e06ea5703..86e787895f78 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -60,7 +60,8 @@ __tc_police_test()
tc_police_rules_create $count $should_fail
- offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
((offload_count == count))
check_err_fail $should_fail $? "tc police offload count"
}
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index dd61118df66e..a89ba6de7987 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,13 +3,14 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
-TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
+TEST_PROGS := binfmt_script
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
+TEST_GEN_PROGS += null-argv
EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
$(OUTPUT)/S_I*.test
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
new file mode 100644
index 000000000000..c19726e710d1
--- /dev/null
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test that empty argvs are swapped out for a single empty string. */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+#define FORK(exec) \
+do { \
+ pid = fork(); \
+ if (pid == 0) { \
+ /* Child */ \
+ exec; /* Some kind of exec */ \
+ perror("# " #exec); \
+ return 1; \
+ } \
+ check_result(pid, #exec); \
+} while (0)
+
+void check_result(pid_t pid, const char *msg)
+{
+ int wstatus;
+
+ if (pid == (pid_t)-1) {
+ perror("# fork");
+ ksft_test_result_fail("fork failed: %s\n", msg);
+ return;
+ }
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ perror("# waitpid");
+ ksft_test_result_fail("waitpid failed: %s\n", msg);
+ return;
+ }
+ if (!WIFEXITED(wstatus)) {
+ ksft_test_result_fail("child did not exit: %s\n", msg);
+ return;
+ }
+ if (WEXITSTATUS(wstatus) != 0) {
+ ksft_test_result_fail("non-zero exit: %s\n", msg);
+ return;
+ }
+ ksft_test_result_pass("%s\n", msg);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+ pid_t pid;
+ static char * const args[] = { NULL };
+ static char * const str[] = { "", NULL };
+
+ /* argc counting checks */
+ if (argc < 1) {
+ fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n");
+ return 1;
+ }
+ if (argc != 1) {
+ fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc);
+ return 1;
+ }
+ if (argv[0][0] == '\0') {
+ /* Good, we found a NULL terminated string at argv[0]! */
+ return 0;
+ }
+
+ /* Test runner. */
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ FORK(execve(argv[0], str, NULL));
+ FORK(execve(argv[0], NULL, NULL));
+ FORK(execve(argv[0], NULL, envp));
+ FORK(execve(argv[0], args, NULL));
+ FORK(execve(argv[0], args, envp));
+
+ ksft_exit(ksft_cnt.ksft_pass == ksft_plan);
+}
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index e96e279e0533..25432b8cd5bd 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,7 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="do_softirq"
+FUNC2="scheduler_tick"
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 12631f0076a1..11e157d7533b 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -11,7 +11,7 @@ all:
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
if [ -e $$DIR/$(TEST_PROGS) ]; then \
rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
fi \
@@ -32,6 +32,6 @@ override define CLEAN
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
done
endef
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index 06256c96df12..f4a15cbdd5ea 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -29,6 +29,16 @@
#define SYSFS_PATH_MAX 256
#define DNAME_PATH_MAX 256
+/*
+ * Support ancient lirc.h which does not have these values. Can be removed
+ * once RHEL 8 is no longer a relevant testing platform.
+ */
+#if RC_PROTO_MAX < 26
+#define RC_PROTO_RCMM12 24
+#define RC_PROTO_RCMM24 25
+#define RC_PROTO_RCMM32 26
+#endif
+
static const struct {
enum rc_proto proto;
const char *name;
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 471eaa7b3a3f..11779405dc80 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -877,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
}
t->timed_out = true;
- kill(t->pid, SIGKILL);
+ // signal process group
+ kill(-(t->pid), SIGKILL);
}
void __wait_for_test(struct __test_metadata *t)
@@ -987,6 +988,7 @@ void __run_test(struct __fixture_metadata *f,
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->passed = 0;
} else if (t->pid == 0) {
+ setpgrp();
t->fn(t, variant);
if (t->skip)
_exit(255);
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 81ebf99d6ff0..17c3f0749f05 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -82,9 +82,9 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 9ad38bd360a4..b08d30bf71c5 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -366,6 +366,7 @@ static struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
+ int ret;
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
@@ -382,7 +383,11 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
- vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ if (ret < 0) {
+ print_skip("Failed to create vgic-v3");
+ exit(KSFT_SKIP);
+ }
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index e6c7d7f8fbd1..7eca97799917 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -761,6 +761,10 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
GICD_BASE_GPA, GICR_BASE_GPA);
+ if (gic_fd < 0) {
+ print_skip("Failed to create vgic-v3, skipping");
+ exit(KSFT_SKIP);
+ }
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 66775de26952..4ed6aa049a91 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
* guest_code - The vCPU's entry point
*/
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-void vm_xsave_req_perm(void);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 423d8a61bd2e..8a470da7b71a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -458,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_xsave_req_perm(int bit);
enum x86_page_size {
X86_PAGE_SIZE_4K = 0,
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b3a0fca0d780..f5cd0c536d85 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -52,7 +52,9 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
nr_vcpus, nr_vcpus_created);
/* Distributor setup */
- gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3,
+ false, &gic_fd) != 0)
+ return -1;
kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
0, &nr_irqs, true);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8c53f96ab7fe..d8cf851ab119 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -393,13 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
struct kvm_vm *vm;
int i;
-#ifdef __x86_64__
- /*
- * Permission needs to be requested before KVM_SET_CPUID2.
- */
- vm_xsave_req_perm();
-#endif
-
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 5f9d7e91dc69..9f000dfb5594 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -665,16 +665,31 @@ static bool is_xfd_supported(void)
return !!(eax & CPUID_XFD_BIT);
}
-void vm_xsave_req_perm(void)
+void vm_xsave_req_perm(int bit)
{
- unsigned long bitmask;
+ int kvm_fd;
+ u64 bitmask;
long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ exit(KSFT_SKIP);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+ if (!(bitmask & (1ULL << bit)))
+ exit(KSFT_SKIP);
if (!is_xfd_supported())
- return;
+ exit(KSFT_SKIP);
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
- rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
- XSTATE_XTILE_DATA_BIT);
/*
* The older kernel version(<5.15) can't support
* ARCH_REQ_XCOMP_GUEST_PERM and directly return.
@@ -684,7 +699,7 @@ void vm_xsave_req_perm(void)
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
- TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
+ TEST_ASSERT(bitmask & (1ULL << bit),
"prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
bitmask);
}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 523c1e99ed64..52a3ef6629e8 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -329,6 +329,8 @@ int main(int argc, char *argv[])
u32 amx_offset;
int stage, ret;
+ vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT);
+
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 2da8eb8e2d96..a626d40fdb48 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -105,7 +105,6 @@ static void guest_code(void *arg)
if (cpu_has_svm()) {
run_guest(svm->vmcb, svm->vmcb_gpa);
- svm->vmcb->save.rip += 3;
run_guest(svm->vmcb, svm->vmcb_gpa);
} else {
vmlaunch();
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 192a2899bae8..94df2692e6e4 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -455,6 +455,7 @@ static void mfd_fail_write(int fd)
printf("mmap()+mprotect() didn't fail as expected\n");
abort();
}
+ munmap(p, mfd_def_size);
}
/* verify PUNCH_HOLE fails */
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index e54106643337..4c88238fc8f0 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -207,15 +207,21 @@ TEST(check_file_mmap)
errno = 0;
fd = open(".", O_TMPFILE | O_RDWR, 0600);
- ASSERT_NE(-1, fd) {
- TH_LOG("Can't create temporary file: %s",
- strerror(errno));
+ if (fd < 0) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_free, "O_TMPFILE not supported by filesystem.");
}
errno = 0;
retval = fallocate(fd, 0, 0, FILE_SIZE);
- ASSERT_EQ(0, retval) {
- TH_LOG("Error allocating space for the temporary file: %s",
- strerror(errno));
+ if (retval) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_close, "fallocate not supported by filesystem.");
}
/*
@@ -271,7 +277,9 @@ TEST(check_file_mmap)
}
munmap(addr, FILE_SIZE);
+out_close:
close(fd);
+out_free:
free(vec);
}
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index f31205f04ee0..8c5fea68ae67 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -1236,7 +1236,7 @@ static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long
}
/**
- * Validate that an attached mount in our mount namespace can be idmapped.
+ * Validate that an attached mount in our mount namespace cannot be idmapped.
* (The kernel enforces that the mount's mount namespace and the caller's mount
* namespace match.)
*/
@@ -1259,7 +1259,7 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
attr.userns_fd = get_userns_fd(0, 10000, 10000);
ASSERT_GE(attr.userns_fd, 0);
- ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index 8f6997d35816..d9d1d4190126 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
*p += sizeof(__u32);
}
- if (ioam6h->type.bit6) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
- return 1;
+ if (ioam6h->type.bit6)
*p += sizeof(__u32);
- }
if (ioam6h->type.bit7) {
if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 2674ba20d524..ff821025d309 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -71,6 +71,36 @@ chk_msk_remote_key_nr()
__chk_nr "grep -c remote_key" $*
}
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+wait_connected()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break
+ sleep 0.1
+ done
+}
trap cleanup EXIT
ip netns add $ns
@@ -81,15 +111,15 @@ echo "a" | \
ip netns exec $ns \
./mptcp_connect -p 10000 -l -t ${timeout_poll} \
0.0.0.0 >/dev/null &
-sleep 0.1
+wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10000 -j -t ${timeout_poll} \
+ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \
127.0.0.1 >/dev/null &
-sleep 0.1
+wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
@@ -101,13 +131,13 @@ echo "a" | \
ip netns exec $ns \
./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
0.0.0.0 >/dev/null &
-sleep 0.1
+wait_local_port_listen $ns 10001
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10001 -j -t ${timeout_poll} \
+ ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \
127.0.0.1 >/dev/null &
-sleep 0.1
+wait_connected $ns 10001
chk_msk_fallback_nr 1 "check fallback"
flush_pids
@@ -119,7 +149,7 @@ for I in `seq 1 $NR_CLIENTS`; do
./mptcp_connect -p $((I+10001)) -l -w 10 \
-t ${timeout_poll} 0.0.0.0 >/dev/null &
done
-sleep 0.1
+wait_local_port_listen $ns $((NR_CLIENTS + 10001))
for I in `seq 1 $NR_CLIENTS`; do
echo "b" | \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index cb5809b89081..f0f4ab96b8f3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -763,8 +763,8 @@ run_tests_disconnect()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
# restore previous status
- cout=$old_cout
- cout_disconnect="$cout".disconnect
+ sin=$old_sin
+ sin_disconnect="$cout".disconnect
cin=$old_cin
cin_disconnect="$cin".disconnect
connect_per_transfer=1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 27d0eb9afdca..0c8a2a20b96c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -75,6 +75,7 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
done
}
@@ -659,6 +660,7 @@ chk_join_nr()
local ack_nr=$4
local count
local dump_stats
+ local with_cookie
printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
@@ -672,12 +674,20 @@ chk_join_nr()
fi
echo -n " - synack"
+ with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies`
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$syn_ack_nr" ]; then
- echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
- ret=1
- dump_stats=1
+ # simult connections exceeding the limit with cookie enabled could go up to
+ # synack validation as the conn limit can be enforced reliably only after
+ # the subflow creation
+ if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
+ echo -n "[ ok ]"
+ else
+ echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
+ ret=1
+ dump_stats=1
+ fi
else
echo -n "[ ok ]"
fi
@@ -751,11 +761,17 @@ chk_add_nr()
local mis_ack_nr=${8:-0}
local count
local dump_stats
+ local timeout
+
+ timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout`
printf "%-39s %s" " " "add"
- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+ count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'`
[ -z "$count" ] && count=0
- if [ "$count" != "$add_nr" ]; then
+
+ # if the test configured a short timeout tolerate greater then expected
+ # add addrs options, due to retransmissions
+ if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then
echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
ret=1
dump_stats=1
@@ -960,7 +976,7 @@ wait_for_tw()
local ns=$1
while [ $time -lt $timeout_ms ]; do
- local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l)
+ local cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}')
[ "$cnt" = 1 ] && return 1
time=$((time + 100))
@@ -1157,7 +1173,11 @@ signal_address_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
- run_tests $ns1 $ns2 10.0.1.1
+
+ # the peer could possibly miss some addr notification, allow retransmission
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ chk_join_nr "signal addresses race test" 3 3 3
# the server will not signal the address terminating
# the MPC subflow
@@ -1476,7 +1496,7 @@ ipv6_tests()
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "single subflow IPv6" 1 1 1
@@ -1511,7 +1531,7 @@ ipv6_tests()
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
chk_join_nr "remove subflow and signal IPv6" 2 2 2
chk_add_nr 1 1
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 543ad7513a8e..694732e4b344 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -374,6 +374,16 @@ run_cmd() {
return $rc
}
+run_cmd_bg() {
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: %s &\n" "${cmd}"
+ fi
+
+ $cmd 2>&1 &
+}
+
# Find the auto-generated name for this namespace
nsname() {
eval echo \$NS_$1
@@ -670,10 +680,10 @@ setup_nettest_xfrm() {
[ ${1} -eq 6 ] && proto="-6" || proto=""
port=${2}
- run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
nettest_pids="${nettest_pids} $!"
- run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
nettest_pids="${nettest_pids} $!"
}
@@ -865,7 +875,6 @@ setup_ovs_bridge() {
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
- cleanup
for arg do
eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
done
@@ -876,7 +885,7 @@ trace() {
for arg do
[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
- ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+ ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
tcpdump_pids="${tcpdump_pids} $!"
ns_cmd=
done
@@ -1836,6 +1845,10 @@ run_test() {
unset IFS
+ # Since cleanup() relies on variables modified by this subshell, it
+ # has to run in this context.
+ trap cleanup EXIT
+
if [ "$VERBOSE" = "1" ]; then
printf "\n##########################################################################\n\n"
fi
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
index 8448f74adfec..4cb887b57413 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
nf-queue
+connect_close
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index ffca314897c4..7e81c9a7fff9 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -6,9 +6,9 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
- conntrack_vrf.sh
+ conntrack_vrf.sh nft_synproxy.sh
LDLIBS = -lmnl
-TEST_GEN_FILES = nf-queue
+TEST_GEN_FILES = nf-queue connect_close
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c
new file mode 100644
index 000000000000..1c3b0add54c4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+ unsigned int timeout;
+ unsigned int port;
+} opts = {
+ .timeout = RUNTIME,
+ .port = PORT,
+};
+
+static void handler(int sig)
+{
+ _exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+ struct sigaction action = {
+ .sa_handler = handler,
+ };
+
+ sigaction(SIGALRM, &action, NULL);
+
+ alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+ int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s >= 0)
+ fcntl(s, F_SETFL, O_NONBLOCK);
+
+ connect(s, (struct sockaddr *)dst, sizeof(*dst));
+ close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+ int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s < 0)
+ return;
+
+ setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+ bind(s, (struct sockaddr *)src, sizeof(*src));
+
+ listen(s, 16);
+
+ c = accept(s, NULL, NULL);
+ if (c >= 0)
+ close(c);
+
+ close(s);
+}
+
+static int accept_loop(void)
+{
+ struct sockaddr_in src = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_accept(&src);
+
+ return 1;
+}
+
+static int connect_loop(void)
+{
+ struct sockaddr_in dst = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_connect(&dst);
+
+ return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "t:p:")) != -1) {
+ switch (c) {
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'p':
+ opts.port = atoi(optarg);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ pid_t p;
+
+ parse_opts(argc, argv);
+
+ p = fork();
+ if (p < 0)
+ return 111;
+
+ if (p > 0)
+ return accept_loop();
+
+ return connect_loop();
+}
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index ed61f6cab60f..b35010cc7f6a 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add"
+BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -354,6 +354,23 @@ TYPE_flush_remove_add="
display Add two elements, flush, re-add
"
+TYPE_reload="
+display net,mac with reload
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 1
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() {
nft flush ruleset
}
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ # check kernel does allocate pcpu sctrach map
+ # for reload with no elemet add/delete
+ ( echo flush set inet filter test ;
+ nft list set inet filter test ) | nft -f -
+
+ start=${rstart}
+ range_size=1
+
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
@@ -1531,4 +1601,4 @@ for name in ${TESTS}; do
done
done
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
+[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 6caf6ac8c285..695a1958723f 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -174,6 +174,7 @@ test_ping() {
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
sleep 3
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 349a319a9e51..eb8543b9a5c4 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -880,9 +880,8 @@ EOF
return $ksft_skip
fi
- # test default behaviour. Packet from ns1 to ns0 is not redirected
- # due to automatic port translation.
- test_port_shadow "default" "ROUTER"
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
# test packet filter based mitigation: prevent forwarding of
# packets claiming to come from the service port.
@@ -899,6 +898,144 @@ EOF
ip netns exec "$ns0" nft delete table $family nat
}
+test_stateless_nat_ip()
+{
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+ return 1
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+ map xlate_in {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+ }
+ }
+ map xlate_out {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+ ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add ip statless rules"
+ return $ksft_skip
+ fi
+
+ reset_counters
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from .2.2, due to stateless rewrite.
+ expect="packets 1 bytes 84"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run stateless nat frag test without socat tool"
+ if [ $lret -eq 0 ]; then
+ return $ksft_skip
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ return $lret
+ fi
+
+ local tmpfile=$(mktemp)
+ dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+
+ local outfile=$(mktemp)
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
+ sc_r=$!
+
+ sleep 1
+ # re-do with large ping -> ip fragmentation
+ ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+ lret=1
+ fi
+
+ wait
+
+ cmp "$tmpfile" "$outfile"
+ if [ $? -ne 0 ]; then
+ ls -l "$tmpfile" "$outfile"
+ echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+ lret=1
+ fi
+
+ rm -f "$tmpfile" "$outfile"
+
+ # ns1 should have seen packets from 2.2, due to stateless rewrite.
+ expect="packets 3 bytes 4164"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+ lret=1
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete table ip stateless" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
@@ -965,6 +1102,19 @@ table inet filter {
EOF
done
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+table inet filter {
+ counter ns0insl {}
+
+ chain pre {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr 10.0.2.2 counter name "ns0insl"
+ }
+}
+EOF
+
sleep 3
# test basic connectivity
for i in 1 2; do
@@ -1019,6 +1169,7 @@ $test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
test_port_shadowing
+test_stateless_nat_ip
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 7d27f1f3bc01..e12729753351 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -113,6 +113,7 @@ table inet $name {
chain output {
type filter hook output priority $prio; policy accept;
tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
jump nfq
}
chain post {
@@ -296,6 +297,23 @@ test_tcp_localhost()
wait 2>/dev/null
}
+test_tcp_localhost_connectclose()
+{
+ tmpfile=$(mktemp) || exit 1
+
+ ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
+
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ rm -f "$tmpfile"
+
+ wait $rpid
+ [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
test_tcp_localhost_requeue()
{
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
@@ -424,6 +442,7 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_icmp_vrf
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
new file mode 100755
index 000000000000..b62933b680d6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_synproxy.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+rnd=$(mktemp -u XXXXXXXX)
+nsr="nsr-$rnd" # synproxy machine
+ns1="ns1-$rnd" # iperf client
+ns2="ns2-$rnd" # iperf server
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "iperf3 --version" "run test without iperf3"
+checktool "ip netns add $nsr" "create net namespace"
+
+modprobe -q nf_conntrack
+
+ip netns add $ns1
+ip netns add $ns2
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+ ip netns del $ns1
+ ip netns del $ns2
+
+ ip netns del $nsr
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
+
+for dev in lo veth0 veth1; do
+ip -net $nsr link set $dev up
+done
+
+ip -net $nsr addr add 10.0.1.1/24 dev veth0
+ip -net $nsr addr add 10.0.2.1/24 dev veth1
+
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net $n link set lo up
+ ip -net $n link set eth0 up
+done
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec $nsr nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
+
+if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=$?
+ ip netns exec $nsr nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
index 04633119b29a..5a8db0b48928 100755
--- a/tools/testing/selftests/netfilter/nft_zones_many.sh
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -9,7 +9,7 @@ ns="ns-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-zones=20000
+zones=2000
have_ct_tool=0
ret=0
@@ -75,10 +75,10 @@ EOF
while [ $i -lt $max_zones ]; do
local start=$(date +%s%3N)
- i=$((i + 10000))
+ i=$((i + 1000))
j=$((j + 1))
# nft rule in output places each packet in a different zone.
- dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+ dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break
@@ -86,7 +86,7 @@ EOF
stop=$(date +%s%3N)
local duration=$((stop-start))
- echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+ echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
done
if [ $have_ct_tool -eq 1 ]; then
@@ -128,11 +128,11 @@ test_conntrack_tool() {
break
fi
- if [ $((i%10000)) -eq 0 ];then
+ if [ $((i%1000)) -eq 0 ];then
stop=$(date +%s%3N)
local duration=$((stop-start))
- echo "PASS: added 10000 entries in $duration ms (now $i total)"
+ echo "PASS: added 1000 entries in $duration ms (now $i total)"
start=$stop
fi
done
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
index 4b93b1417b86..843ba56d8e49 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/openat2/Makefile
@@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c
+$(TEST_GEN_PROGS): helpers.c helpers.h
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
index a6ea27344db2..7056340b9339 100644
--- a/tools/testing/selftests/openat2/helpers.h
+++ b/tools/testing/selftests/openat2/helpers.h
@@ -9,6 +9,7 @@
#define _GNU_SOURCE
#include <stdint.h>
+#include <stdbool.h>
#include <errno.h>
#include <linux/types.h>
#include "../kselftest.h"
@@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how);
(similar to chroot(2)). */
#endif /* RESOLVE_IN_ROOT */
-#define E_func(func, ...) \
- do { \
- if (func(__VA_ARGS__) < 0) \
- ksft_exit_fail_msg("%s:%d %s failed\n", \
- __FILE__, __LINE__, #func);\
+#define E_func(func, ...) \
+ do { \
+ errno = 0; \
+ if (func(__VA_ARGS__) < 0) \
+ ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \
+ __FILE__, __LINE__, #func, errno); \
} while (0)
#define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 1bddbe934204..7fb902099de4 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -259,6 +259,16 @@ void test_openat2_flags(void)
unlink(path);
fd = sys_openat2(AT_FDCWD, path, &test->how);
+ if (fd < 0 && fd == -EOPNOTSUPP) {
+ /*
+ * Skip the testcase if it failed because not supported
+ * by FS. (e.g. a valid O_TMPFILE combination on NFS)
+ */
+ ksft_test_result_skip("openat2 with %s fails with %d (%s)\n",
+ test->name, fd, strerror(-fd));
+ goto next;
+ }
+
if (test->err >= 0)
failed = (fd < 0);
else
@@ -303,7 +313,7 @@ skip:
else
resultfn("openat2 with %s fails with %d (%s)\n",
test->name, test->err, strerror(-test->err));
-
+next:
free(fdpath);
fflush(stdout);
}
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index 8e83cf91513a..6d849dc2bee0 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -44,9 +44,10 @@ static struct {
} ctx;
/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
-#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
+#define TEST_SIG_DATA(addr, id) (~(unsigned long)(addr) + id)
-static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
+static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr,
+ unsigned long id)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_BREAKPOINT,
@@ -60,7 +61,7 @@ static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
.inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
.remove_on_exec = 1, /* Required by sigtrap. */
.sigtrap = 1, /* Request synchronous SIGTRAP on event. */
- .sig_data = TEST_SIG_DATA(addr),
+ .sig_data = TEST_SIG_DATA(addr, id),
};
return attr;
}
@@ -110,7 +111,7 @@ FIXTURE(sigtrap_threads)
FIXTURE_SETUP(sigtrap_threads)
{
- struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on);
+ struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on, 0);
struct sigaction action = {};
int i;
@@ -165,7 +166,7 @@ TEST_F(sigtrap_threads, enable_event)
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -175,7 +176,7 @@ TEST_F(sigtrap_threads, enable_event)
/* Test that modification propagates to all inherited events. */
TEST_F(sigtrap_threads, modify_and_enable_event)
{
- struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on);
+ struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on, 42);
EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0);
run_test_threads(_metadata, self);
@@ -184,7 +185,7 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 42));
/* Check enabled for parent. */
ctx.iterate_on = 0;
@@ -204,7 +205,7 @@ TEST_F(sigtrap_threads, signal_stress)
EXPECT_EQ(ctx.tids_want_signal, 0);
EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
- EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 01f8d3c0cf2c..6922d6417e1c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -68,7 +68,7 @@
#define PIDFD_SKIP 3
#define PIDFD_XFAIL 4
-int wait_for_pid(pid_t pid)
+static inline int wait_for_pid(pid_t pid)
{
int status, ret;
@@ -78,13 +78,20 @@ again:
if (errno == EINTR)
goto again;
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
return -1;
}
- if (!WIFEXITED(status))
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
return -1;
+ }
- return WEXITSTATUS(status);
+ ret = WEXITSTATUS(status);
+ ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
+ return ret;
}
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 22558524f71c..3fd8e903118f 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -12,6 +12,7 @@
#include <string.h>
#include <syscall.h>
#include <sys/wait.h>
+#include <sys/mman.h>
#include "pidfd.h"
#include "../kselftest.h"
@@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name)
return err->code;
}
+#define CHILD_STACK_SIZE 8192
+
struct child {
+ char *stack;
pid_t pid;
int fd;
};
@@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args,
struct error *err)
{
static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;
- size_t stack_size = 1024;
- char *stack[1024] = { 0 };
struct child ret;
if (!(flags & CLONE_NEWUSER) && geteuid() != 0)
flags |= CLONE_NEWUSER;
+ ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (ret.stack == MAP_FAILED) {
+ error_set(err, -1, "mmap of stack failed (errno %d)", errno);
+ return ret;
+ }
+
#ifdef __ia64__
- ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd);
+ ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd);
#else
- ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd);
+ ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd);
#endif
if (ret.pid < 0) {
@@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err)
else if (r > 0)
error_set(err, r, "child %d reported: %d", child->pid, r);
+ if (munmap(child->stack, CHILD_STACK_SIZE)) {
+ error_set(err, -1, "munmap of child stack failed (errno %d)", errno);
+ r = -1;
+ }
+
return r;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 529eb700ac26..9a2d64901d59 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid)
{
int pid, pidfd = 0;
int status, ret;
- pthread_t t1;
time_t prog_start = time(NULL);
const char *test_name = "pidfd_poll check for premature notification on child thread exec";
@@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args)
*/
*child_exit_secs = time(NULL);
syscall(SYS_exit, 0);
+ /* Never reached, but appeases compiler thinking we should return. */
+ exit(0);
}
static void test_pidfd_poll_leader_exit(int use_waitpid)
{
int pid, pidfd = 0;
- int status, ret;
- time_t prog_start = time(NULL);
+ int status, ret = 0;
const char *test_name = "pidfd_poll check for premature notification on non-empty"
"group leader exit";
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index be2943f072f6..17999e082aa7 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
TEST(wait_simple)
{
- int pidfd = -1, status = 0;
+ int pidfd = -1;
pid_t parent_tid = -1;
struct clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
@@ -47,7 +47,6 @@ TEST(wait_simple)
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
.exit_signal = SIGCHLD,
};
- int ret;
pid_t pid;
siginfo_t info = {
.si_signo = 0,
@@ -88,7 +87,7 @@ TEST(wait_simple)
TEST(wait_states)
{
- int pidfd = -1, status = 0;
+ int pidfd = -1;
pid_t parent_tid = -1;
struct clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index e6a132df6172..69f8a5958cef 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 5a0023d183da..0941f1ddab65 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -47,8 +47,8 @@ else
exit 1
fi
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
dryrun=
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 370406bbfeed..f17000a2ccf1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -49,8 +49,8 @@ fi
mkdir $resdir/$ds
echo Results directory: $resdir/$ds
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
echo Using all `identify_qemu_vcpus` CPUs.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index e4a00779b8c6..ee886b40a5d2 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
trap 'rm -rf $T' 0
mkdir $T
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
default_starttime="`get_starttime`"
starttime="${2-default_starttime}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 2e9e9e2eedb6..5f682fc892dd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,10 +30,16 @@ editor=${EDITOR-vi}
files=
for i in ${rundir}/*/Make.out
do
+ scenariodir="`dirname $i`"
+ scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
then
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
+ elif ! test -f ${scenariobasedir}/vmlinux
+ then
+ echo No ${scenariobasedir}/vmlinux file > $i.diags
+ files="$files $i.diags $i"
fi
done
if test -n "$files"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1c4c2c727dad..43e1387234d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`"
if test -z "$ngps"
then
echo "$configfile ------- " $stopstate
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index e09b1bc78708..8c4c1e4792d0 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -19,8 +19,8 @@ then
exit 1
fi
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
starttime="`get_starttime`"
@@ -108,8 +108,8 @@ else
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
exit 2
fi
- cp -a "$rundir" "$KVM/res/"
- oldrun="$KVM/res/$ds"
+ cp -a "$rundir" "$RCUTORTURE/res/"
+ oldrun="$RCUTORTURE/res/$ds"
fi
echo | tee -a "$oldrun/remote-log"
echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
@@ -155,18 +155,23 @@ do
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
ret=$?
- if test "$ret" -ne 0
- then
- echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log"
+ tries=0
+ while test "$ret" -ne 0
+ do
+ echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log"
sleep 60
cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
ret=$?
if test "$ret" -ne 0
then
- echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
- exit 10
+ if test "$tries" > 5
+ then
+ echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+ exit 10
+ fi
fi
- fi
+ tries=$((tries+1))
+ done
done
# Function to check for presence of a file on the specified system.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6de0c183db5b..55b2c1533282 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG
dur=$((30*60))
dryrun=""
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_BUILDONLY=
-TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
TORTURE_KCONFIG_GDB_ARG=""
TORTURE_BOOT_GDB_ARG=""
@@ -262,7 +262,7 @@ else
exit 1
fi
-CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG
defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
if test -z "$configs"
@@ -272,7 +272,7 @@ fi
if test -z "$resdir"
then
- resdir=$KVM/res
+ resdir=$RCUTORTURE/res
fi
# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
@@ -280,7 +280,7 @@ configs_derep=
for CF in $configs
do
case $CF in
- [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**)
config_reps=`echo $CF | sed -e 's/\*.*$//'`
CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
;;
@@ -386,7 +386,7 @@ END {
# Generate a script to execute the tests in appropriate batches.
cat << ___EOF___ > $T/script
CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
-KVM="$KVM"; export KVM
+RCUTORTURE="$RCUTORTURE"; export RCUTORTURE
PATH="$PATH"; export PATH
TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
@@ -569,7 +569,7 @@ ___EOF___
awk < $T/cfgcpu.pack \
-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
-v CONFIGDIR="$CONFIGFRAG/" \
- -v KVM="$KVM" \
+ -v RCUTORTURE="$RCUTORTURE" \
-v ncpus=$cpus \
-v jitter="$jitter" \
-v rd=$resdir/$ds/ \
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9f624bd53c27..822eb037a057 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -138,6 +138,16 @@ then
then
summary="$summary Bugs: $n_bugs"
fi
+ n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
+ if test "$n_kcsan" -ne 0
+ then
+ if test "$n_bugs" = "$n_kcsan"
+ then
+ summary="$summary (all bugs kcsan)"
+ else
+ summary="$summary KCSAN: $n_kcsan"
+ fi
+ fi
n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0
then
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index eae88aacca2a..bfe09e2829c8 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -13,8 +13,8 @@
scriptname=$0
args="$*"
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
@@ -37,7 +37,7 @@ configs_scftorture=
kcsan_kmake_args=
# Default compression, duration, and apportionment.
-compress_kasan_vmlinux="`identify_qemu_vcpus`"
+compress_concurrency="`identify_qemu_vcpus`"
duration_base=10
duration_rcutorture_frac=7
duration_locktorture_frac=1
@@ -67,12 +67,12 @@ function doyesno () {
usage () {
echo "Usage: $scriptname optional arguments:"
- echo " --compress-kasan-vmlinux concurrency"
+ echo " --compress-concurrency concurrency"
echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
- echo " --doall"
- echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-all"
+ echo " --do-allmodconfig / --do-no-allmodconfig"
echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
@@ -91,9 +91,9 @@ usage () {
while test $# -gt 0
do
case "$1" in
- --compress-kasan-vmlinux)
- checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
- compress_kasan_vmlinux=$2
+ --compress-concurrency)
+ checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
+ compress_concurrency=$2
shift
;;
--config-rcutorture|--configs-rcutorture)
@@ -414,8 +414,14 @@ nfailures=0
echo FAILURES: | tee -a $T/log
if test -s "$T/failures"
then
- cat "$T/failures" | tee -a $T/log
+ awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
+ grep "^ Summary: " "$T/failuresum" |
+ grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
+ if test -s "$T/nonkcsan"
+ then
+ nonkcsanbug="yes"
+ fi
ret=2
fi
if test "$do_kcsan" = "yes"
@@ -424,12 +430,16 @@ then
fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+if test -z "$nonkcsanbug" && test -s "$T/failuresum"
+then
+ echo " All bugs were KCSAN failures."
+fi
tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
-if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
+if test -n "$tdir" && test $compress_concurrency -gt 0
then
# KASAN vmlinux files can approach 1GB in size, so compress them.
- echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1
- find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo
+ echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
+ find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
ncompresses=0
batchno=1
if test -s $T/xz-todo
@@ -447,7 +457,7 @@ then
do
xz "$j" >> "$tdir/log-xz" 2>&1 &
ncompresses=$((ncompresses+1))
- if test $ncompresses -ge $compress_kasan_vmlinux
+ if test $ncompresses -ge $compress_concurrency
then
echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
wait
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 3ca112444ce7..7093422050f6 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=3
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
index 238bfe3bd0cc..ce0694fd9b92 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -1 +1,2 @@
rcutorture.torture_type=srcu
+rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index ce48c7b82673..2db39f298d18 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1,2 +1,4 @@
rcutorture.torture_type=srcud
rcupdate.rcu_self_test=1
+rcutorture.fwd_progress=3
+srcutree.big_cpu_lim=5
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 34c8ff5a12f2..e4d74e5fc1d0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=5
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 2af9d39a9716..215e1067f037 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -6,7 +6,7 @@ endif
CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
$(CLANG_FLAGS)
-LDLIBS += -lpthread
+LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
# still track changes to header files and depend on shared object.
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index b953a52ff706..517756afc2a4 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -167,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
- off_t offset;
+ long offset;
int ret, cpu;
cpu = rseq_cpu_start();
diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h
new file mode 100644
index 000000000000..876eb6a7f75b
--- /dev/null
+++ b/tools/testing/selftests/rseq/compiler.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq/compiler.h
+ *
+ * Work-around asm goto compiler bugs.
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_COMPILER_H
+#define RSEQ_COMPILER_H
+
+/*
+ * gcc prior to 4.8.2 miscompiles asm goto.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * gcc prior to 8.1.0 miscompiles asm goto at O1.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908
+ *
+ * clang prior to version 13.0.1 miscompiles asm goto at O2.
+ * https://github.com/llvm/llvm-project/issues/52735
+ *
+ * Work around these issues by adding a volatile inline asm with
+ * memory clobber in the fallthrough after the asm goto and at each
+ * label target. Emit this for all compilers in case other similar
+ * issues are found in the future.
+ */
+#define rseq_after_asm_goto() asm volatile ("" : : : "memory")
+
+#endif /* RSEQ_COMPILER_H_ */
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 699ad5f93c34..da23c22d5882 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort;
" cbnz " INJECT_ASM_REG ", 222b\n" \
"333:\n"
-#elif __PPC__
+#elif defined(__PPC__)
#define RSEQ_INJECT_INPUT \
, [loop_cnt_1]"m"(loop_cnt[1]) \
@@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg)
abort();
reps = thread_data->reps;
for (i = 0; i < reps; i++) {
- int cpu = rseq_cpu_start();
-
- cpu = rseq_this_cpu_lock(&data->lock);
+ int cpu = rseq_this_cpu_lock(&data->lock);
data->c[cpu].count++;
rseq_percpu_unlock(&data->lock, cpu);
#ifndef BENCHMARK
@@ -551,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
- off_t offset;
+ long offset;
int ret;
cpu = rseq_cpu_start();
diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
new file mode 100644
index 000000000000..a8c44d9af71f
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _RSEQ_ABI_H
+#define _RSEQ_ABI_H
+
+/*
+ * rseq-abi.h
+ *
+ * Restartable sequences system call API
+ *
+ * Copyright (c) 2015-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+enum rseq_abi_cpu_id_state {
+ RSEQ_ABI_CPU_ID_UNINITIALIZED = -1,
+ RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2,
+};
+
+enum rseq_abi_flags {
+ RSEQ_ABI_FLAG_UNREGISTER = (1 << 0),
+};
+
+enum rseq_abi_cs_flags_bit {
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
+};
+
+enum rseq_abi_cs_flags {
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
+};
+
+/*
+ * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line. It is usually declared as
+ * link-time constant data.
+ */
+struct rseq_abi_cs {
+ /* Version of this structure. */
+ __u32 version;
+ /* enum rseq_abi_cs_flags */
+ __u32 flags;
+ __u64 start_ip;
+ /* Offset from start_ip. */
+ __u64 post_commit_offset;
+ __u64 abort_ip;
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+/*
+ * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line.
+ *
+ * A single struct rseq_abi per thread is allowed.
+ */
+struct rseq_abi {
+ /*
+ * Restartable sequences cpu_id_start field. Updated by the
+ * kernel. Read by user-space with single-copy atomicity
+ * semantics. This field should only be read by the thread which
+ * registered this data structure. Aligned on 32-bit. Always
+ * contains a value in the range of possible CPUs, although the
+ * value may not be the actual current CPU (e.g. if rseq is not
+ * initialized). This CPU number value should always be compared
+ * against the value of the cpu_id field before performing a rseq
+ * commit or returning a value read from a data structure indexed
+ * using the cpu_id_start value.
+ */
+ __u32 cpu_id_start;
+ /*
+ * Restartable sequences cpu_id field. Updated by the kernel.
+ * Read by user-space with single-copy atomicity semantics. This
+ * field should only be read by the thread which registered this
+ * data structure. Aligned on 32-bit. Values
+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+ * have a special semantic: the former means "rseq uninitialized",
+ * and latter means "rseq initialization failed". This value is
+ * meant to be read within rseq critical sections and compared
+ * with the cpu_id_start value previously read, before performing
+ * the commit instruction, or read and compared with the
+ * cpu_id_start value before returning a value loaded from a data
+ * structure indexed using the cpu_id_start value.
+ */
+ __u32 cpu_id;
+ /*
+ * Restartable sequences rseq_cs field.
+ *
+ * Contains NULL when no critical section is active for the current
+ * thread, or holds a pointer to the currently active struct rseq_cs.
+ *
+ * Updated by user-space, which sets the address of the currently
+ * active rseq_cs at the beginning of assembly instruction sequence
+ * block, and set to NULL by the kernel when it restarts an assembly
+ * instruction sequence block, as well as when the kernel detects that
+ * it is preempting or delivering a signal outside of the range
+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space
+ * before reclaiming memory that contains the targeted struct rseq_cs.
+ *
+ * Read and set by the kernel. Set by user-space with single-copy
+ * atomicity semantics. This field should only be updated by the
+ * thread which registered this data structure. Aligned on 64-bit.
+ */
+ union {
+ __u64 ptr64;
+
+ /*
+ * The "arch" field provides architecture accessor for
+ * the ptr field based on architecture pointer size and
+ * endianness.
+ */
+ struct {
+#ifdef __LP64__
+ __u64 ptr;
+#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN)
+ __u32 padding; /* Initialized to zero. */
+ __u32 ptr;
+#else
+ __u32 ptr;
+ __u32 padding; /* Initialized to zero. */
+#endif
+ } arch;
+ } rseq_cs;
+
+ /*
+ * Restartable sequences flags field.
+ *
+ * This field should only be updated by the thread which
+ * registered this data structure. Read by the kernel.
+ * Mainly used for single-stepping through rseq critical sections
+ * with debuggers.
+ *
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT
+ * Inhibit instruction sequence block restart on preemption
+ * for this thread.
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL
+ * Inhibit instruction sequence block restart on signal
+ * delivery for this thread.
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE
+ * Inhibit instruction sequence block restart on migration for
+ * this thread.
+ */
+ __u32 flags;
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+#endif /* _RSEQ_ABI_H */
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 5943c816c07c..893a11eca9d5 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -147,14 +147,11 @@ do { \
teardown \
"b %l[" __rseq_str(cmpfail_label) "]\n\t"
-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
-
static inline __attribute__((always_inline))
int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -185,8 +182,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -198,30 +195,31 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -255,8 +253,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -270,19 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -292,7 +292,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
#ifdef RSEQ_COMPARE_TWICE
@@ -316,8 +315,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[count] "Ir" (count)
RSEQ_INJECT_INPUT
@@ -328,14 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -347,7 +347,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -381,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -398,19 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -422,7 +423,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -474,19 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -498,7 +500,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -537,8 +538,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -554,21 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("1st expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -582,7 +586,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -657,8 +660,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"8:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -678,21 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -706,7 +709,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -782,8 +784,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
"8:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -803,21 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
- rseq_workaround_gcc_asm_size_guess();
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 200dae9e4208..cbe190a4d005 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "Qo" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -242,24 +242,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
@@ -287,8 +291,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "Qo" (*v),
[expectnot] "r" (expectnot),
[load] "Qo" (*load),
@@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -337,8 +346,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "Qo" (*v),
[count] "r" (count)
RSEQ_INJECT_INPUT
@@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -388,8 +400,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "Qo" (*v),
[newv] "r" (newv),
@@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -447,8 +463,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "Qo" (*v),
[newv] "r" (newv),
@@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -508,8 +528,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "Qo" (*v),
[expect] "r" (expect),
[v2] "Qo" (*v2),
@@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -569,8 +594,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "Qo" (*v),
[newv] "r" (newv),
@@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -629,8 +658,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "Qo" (*v),
[newv] "r" (newv),
@@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
-
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
new file mode 100644
index 000000000000..38c584661571
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-generic-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_GENERIC_THREAD_POINTER
+#define _RSEQ_GENERIC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use gcc builtin thread pointer. */
+static inline void *rseq_thread_pointer(void)
+{
+ return __builtin_thread_pointer();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index e989e7c14b09..878739fae2fd 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -154,14 +154,11 @@ do { \
teardown \
"b %l[" __rseq_str(cmpfail_label) "]\n\t"
-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
-
static inline __attribute__((always_inline))
int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -190,8 +187,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
@@ -222,11 +216,10 @@ error2:
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -258,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
@@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
#ifdef RSEQ_COMPARE_TWICE
@@ -319,8 +308,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[count] "Ir" (count)
RSEQ_INJECT_INPUT
@@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
@@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -382,8 +368,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
@@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -456,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
@@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
{
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -532,8 +510,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"5:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
@@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -649,8 +623,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"8:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
- rseq_workaround_gcc_asm_size_guess();
rseq_bug("cpu_id comparison failed");
error2:
- rseq_workaround_gcc_asm_size_guess();
rseq_bug("expected value comparison failed");
#endif
}
@@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_INJECT_C(9)
- rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -771,8 +739,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
"8:\n\t"
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
- rseq_workaround_gcc_asm_size_guess();
return 0;
abort:
- rseq_workaround_gcc_asm_size_guess();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
- rseq_workaround_gcc_asm_size_guess();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
- rseq_workaround_gcc_asm_size_guess();
rseq_bug("cpu_id comparison failed");
error2:
- rseq_workaround_gcc_asm_size_guess();
rseq_bug("expected value comparison failed");
#endif
}
diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
new file mode 100644
index 000000000000..263eee84fb76
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-ppc-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_PPC_THREAD_POINTER
+#define _RSEQ_PPC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void *rseq_thread_pointer(void)
+{
+#ifdef __powerpc64__
+ register void *__result asm ("r13");
+#else
+ register void *__result asm ("r2");
+#endif
+ asm ("" : "=r" (__result));
+ return __result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 76be90196fe4..bab8e0b9fb11 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -47,10 +47,13 @@ do { \
#ifdef __PPC64__
-#define STORE_WORD "std "
-#define LOAD_WORD "ld "
-#define LOADX_WORD "ldx "
-#define CMP_WORD "cmpd "
+#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG "cmpd "
+#define RSEQ_CMP_LONG_INT "cmpdi "
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
@@ -89,10 +92,13 @@ do { \
#else /* #ifdef __PPC64__ */
-#define STORE_WORD "stw "
-#define LOAD_WORD "lwz "
-#define LOADX_WORD "lwzx "
-#define CMP_WORD "cmpw "
+#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG "cmpw "
+#define RSEQ_CMP_LONG_INT "cmpwi "
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
@@ -125,7 +131,7 @@ do { \
RSEQ_INJECT_ASM(1) \
"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
"addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
- "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
__rseq_str(label) ":\n\t"
#endif /* #ifdef __PPC64__ */
@@ -136,7 +142,7 @@ do { \
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
"cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
"bne- cr7, " __rseq_str(label) "\n\t"
@@ -153,25 +159,25 @@ do { \
* RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
*/
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
- CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
"bne- cr7, " __rseq_str(label) "\n\t"
#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
- CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
"beq- cr7, " __rseq_str(label) "\n\t"
#define RSEQ_ASM_OP_STORE(value, var) \
- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
/* Load @var to r17 */
#define RSEQ_ASM_OP_R_LOAD(var) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
/* Store r17 to @var */
#define RSEQ_ASM_OP_R_STORE(var) \
- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
/* Add @count to r17 */
#define RSEQ_ASM_OP_R_ADD(count) \
@@ -179,11 +185,11 @@ do { \
/* Load (r17 + voffp) to r17 */
#define RSEQ_ASM_OP_R_LOADX(voffp) \
- LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+ RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
/* TODO: implement a faster memcpy. */
#define RSEQ_ASM_OP_R_MEMCPY() \
- "cmpdi %%r19, 0\n\t" \
+ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
"beq 333f\n\t" \
"addi %%r20, %%r20, -1\n\t" \
"addi %%r21, %%r21, -1\n\t" \
@@ -191,16 +197,16 @@ do { \
"lbzu %%r18, 1(%%r20)\n\t" \
"stbu %%r18, 1(%%r21)\n\t" \
"addi %%r19, %%r19, -1\n\t" \
- "cmpdi %%r19, 0\n\t" \
+ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
"bne 222b\n\t" \
"333:\n\t" \
#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
__rseq_str(post_commit_label) ":\n\t"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
__rseq_str(post_commit_label) ":\n\t"
static inline __attribute__((always_inline))
@@ -235,8 +241,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -248,23 +254,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
@@ -301,8 +312,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -316,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -359,8 +375,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[count] "r" (count)
@@ -372,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -419,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -436,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -489,8 +513,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -506,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -560,8 +589,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -577,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("1st expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -635,8 +670,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -653,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -711,8 +751,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -729,23 +769,23 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
-#undef STORE_WORD
-#undef LOAD_WORD
-#undef LOADX_WORD
-#undef CMP_WORD
-
#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 8ef94ad1cbb4..4e6dc5f0cb42 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -198,7 +203,7 @@ error2:
*/
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
@@ -233,8 +238,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -288,8 +298,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[count] "r" (count)
@@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -347,8 +360,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -426,8 +444,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("1st expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -534,8 +558,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
index 72750b5905a9..7b53dac1fcdd 100644
--- a/tools/testing/selftests/rseq/rseq-skip.h
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
return -1;
}
diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
new file mode 100644
index 000000000000..977c25d758b2
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_THREAD_POINTER
+#define _RSEQ_THREAD_POINTER
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "rseq-x86-thread-pointer.h"
+#elif defined(__PPC__)
+#include "rseq-ppc-thread-pointer.h"
+#else
+#include "rseq-generic-thread-pointer.h"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
new file mode 100644
index 000000000000..d3133587d996
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-x86-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_X86_THREAD_POINTER
+#define _RSEQ_X86_THREAD_POINTER
+
+#include <features.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __GNUC_PREREQ (11, 1)
+static inline void *rseq_thread_pointer(void)
+{
+ return __builtin_thread_pointer();
+}
+#else
+static inline void *rseq_thread_pointer(void)
+{
+ void *__result;
+
+# ifdef __x86_64__
+ __asm__ ("mov %%fs:0, %0" : "=r" (__result));
+# else
+ __asm__ ("mov %%gs:0, %0" : "=r" (__result));
+# endif
+ return __result;
+}
+#endif /* !GCC 11 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 640411518e46..bd01dc41ca13 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -28,6 +28,8 @@
#ifdef __x86_64__
+#define RSEQ_ASM_TP_SEGMENT %%fs
+
#define rseq_smp_mb() \
__asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
#define rseq_smp_rmb() rseq_barrier()
@@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -152,16 +154,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -172,7 +179,7 @@ error2:
*/
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
@@ -184,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -207,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -220,16 +227,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -245,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
#endif
/* final store */
"addq %[count], %[v]\n\t"
@@ -258,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[count] "er" (count)
@@ -269,12 +281,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -286,7 +301,7 @@ error1:
* *pval += inc;
*/
static inline __attribute__((always_inline))
-int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
{
RSEQ_INJECT_C(9)
@@ -296,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
#endif
/* get p+v */
"movq %[ptr], %%rbx\n\t"
@@ -314,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[ptr] "m" (*ptr),
[off] "er" (off),
@@ -351,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -372,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -387,16 +402,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -426,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -436,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpq %[v2], %[expect2]\n\t"
@@ -449,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -464,18 +484,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("1st expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -500,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movq %[dst], %[rseq_scratch1]\n\t"
"movq %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
"cmpq %[v], %[expect]\n\t"
"jnz 7f\n\t"
#endif
@@ -555,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -574,16 +600,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -600,7 +631,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
#endif /* !RSEQ_SKIP_FASTPATH */
-#elif __i386__
+#elif defined(__i386__)
+
+#define RSEQ_ASM_TP_SEGMENT %%gs
#define rseq_smp_mb() \
__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
@@ -701,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -719,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -730,16 +763,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -750,7 +788,7 @@ error2:
*/
static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
+ long voffp, intptr_t *load, int cpu)
{
RSEQ_INJECT_C(9)
@@ -762,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -785,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -798,16 +836,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -823,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
#endif
/* final store */
"addl %[count], %[v]\n\t"
@@ -836,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[count] "ir" (count)
@@ -847,12 +890,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
, error1
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
#endif
}
@@ -872,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -894,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* try store input */
[v2] "m" (*v2),
[newv2] "m" (newv2),
@@ -909,16 +955,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -938,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[error2]\n\t"
@@ -962,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -977,16 +1028,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
@@ -1008,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -1018,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpl %[expect2], %[v2]\n\t"
@@ -1032,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -1047,18 +1103,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
, error1, error2, error3
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("1st expected value comparison failed");
error3:
+ rseq_after_asm_goto();
rseq_bug("2nd expected value comparison failed");
#endif
}
@@ -1084,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -1142,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
@@ -1161,16 +1223,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
@@ -1196,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -1255,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
+ [rseq_offset] "r" (rseq_offset),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
@@ -1274,16 +1341,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
, error1, error2
#endif
);
+ rseq_after_asm_goto();
return 0;
abort:
+ rseq_after_asm_goto();
RSEQ_INJECT_FAILED
return -1;
cmpfail:
+ rseq_after_asm_goto();
return 1;
#ifdef RSEQ_COMPARE_TWICE
error1:
+ rseq_after_asm_goto();
rseq_bug("cpu_id comparison failed");
error2:
+ rseq_after_asm_goto();
rseq_bug("expected value comparison failed");
#endif
}
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index fb440dfca158..986b9458efb2 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -26,130 +26,124 @@
#include <assert.h>
#include <signal.h>
#include <limits.h>
+#include <dlfcn.h>
+#include <stddef.h>
#include "../kselftest.h"
#include "rseq.h"
-__thread volatile struct rseq __rseq_abi = {
- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
-};
-
-/*
- * Shared with other libraries. This library may take rseq ownership if it is
- * still 0 when executing the library constructor. Set to 1 by library
- * constructor when handling rseq. Set to 0 in destructor if handling rseq.
- */
-int __rseq_handled;
-
-/* Whether this library have ownership of rseq registration. */
-static int rseq_ownership;
+static const ptrdiff_t *libc_rseq_offset_p;
+static const unsigned int *libc_rseq_size_p;
+static const unsigned int *libc_rseq_flags_p;
-static __thread volatile uint32_t __rseq_refcount;
+/* Offset from the thread pointer to the rseq area. */
+ptrdiff_t rseq_offset;
-static void signal_off_save(sigset_t *oldset)
-{
- sigset_t set;
- int ret;
+/* Size of the registered rseq area. 0 if the registration was
+ unsuccessful. */
+unsigned int rseq_size = -1U;
- sigfillset(&set);
- ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
- if (ret)
- abort();
-}
+/* Flags used during rseq registration. */
+unsigned int rseq_flags;
-static void signal_restore(sigset_t oldset)
-{
- int ret;
+static int rseq_ownership;
- ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
- if (ret)
- abort();
-}
+static
+__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+};
-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
int flags, uint32_t sig)
{
return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
}
-int rseq_register_current_thread(void)
+int rseq_available(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ rc = sys_rseq(NULL, 0, 0, 0);
+ if (rc != -1)
+ abort();
+ switch (errno) {
+ case ENOSYS:
return 0;
- signal_off_save(&oldset);
- if (__rseq_refcount == UINT_MAX) {
- ret = -1;
- goto end;
- }
- if (__rseq_refcount++)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
- if (!rc) {
- assert(rseq_current_cpu_raw() >= 0);
- goto end;
+ case EINVAL:
+ return 1;
+ default:
+ abort();
}
- if (errno != EBUSY)
- __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
- ret = -1;
- __rseq_refcount--;
-end:
- signal_restore(oldset);
- return ret;
}
-int rseq_unregister_current_thread(void)
+int rseq_register_current_thread(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful registration. */
return 0;
- signal_off_save(&oldset);
- if (!__rseq_refcount) {
- ret = -1;
- goto end;
}
- if (--__rseq_refcount)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
- RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
- if (!rc)
- goto end;
- __rseq_refcount = 1;
- ret = -1;
-end:
- signal_restore(oldset);
- return ret;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG);
+ if (rc)
+ return -1;
+ assert(rseq_current_cpu_raw() >= 0);
+ return 0;
}
-int32_t rseq_fallback_current_cpu(void)
+int rseq_unregister_current_thread(void)
{
- int32_t cpu;
+ int rc;
- cpu = sched_getcpu();
- if (cpu < 0) {
- perror("sched_getcpu()");
- abort();
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful unregistration. */
+ return 0;
}
- return cpu;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ if (rc)
+ return -1;
+ return 0;
}
-void __attribute__((constructor)) rseq_init(void)
+static __attribute__((constructor))
+void rseq_init(void)
{
- /* Check whether rseq is handled by another library. */
- if (__rseq_handled)
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+ /* rseq registration owned by glibc */
+ rseq_offset = *libc_rseq_offset_p;
+ rseq_size = *libc_rseq_size_p;
+ rseq_flags = *libc_rseq_flags_p;
+ return;
+ }
+ if (!rseq_available())
return;
- __rseq_handled = 1;
rseq_ownership = 1;
+ rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+ rseq_size = sizeof(struct rseq_abi);
+ rseq_flags = 0;
}
-void __attribute__((destructor)) rseq_fini(void)
+static __attribute__((destructor))
+void rseq_exit(void)
{
if (!rseq_ownership)
return;
- __rseq_handled = 0;
+ rseq_offset = 0;
+ rseq_size = -1U;
rseq_ownership = 0;
}
+
+int32_t rseq_fallback_current_cpu(void)
+{
+ int32_t cpu;
+
+ cpu = sched_getcpu();
+ if (cpu < 0) {
+ perror("sched_getcpu()");
+ abort();
+ }
+ return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 3f63eb362b92..9d850b290c2e 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -16,7 +16,9 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <linux/rseq.h>
+#include <stddef.h>
+#include "rseq-abi.h"
+#include "compiler.h"
/*
* Empty code injection macros, override when testing.
@@ -43,8 +45,20 @@
#define RSEQ_INJECT_FAILED
#endif
-extern __thread volatile struct rseq __rseq_abi;
-extern int __rseq_handled;
+#include "rseq-thread-pointer.h"
+
+/* Offset from the thread pointer to the rseq area. */
+extern ptrdiff_t rseq_offset;
+/* Size of the registered rseq area. 0 if the registration was
+ unsuccessful. */
+extern unsigned int rseq_size;
+/* Flags used during rseq registration. */
+extern unsigned int rseq_flags;
+
+static inline struct rseq_abi *rseq_get_abi(void)
+{
+ return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset);
+}
#define rseq_likely(x) __builtin_expect(!!(x), 1)
#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
@@ -108,7 +122,7 @@ int32_t rseq_fallback_current_cpu(void);
*/
static inline int32_t rseq_current_cpu_raw(void)
{
- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id);
}
/*
@@ -124,7 +138,7 @@ static inline int32_t rseq_current_cpu_raw(void)
*/
static inline uint32_t rseq_cpu_start(void)
{
- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start);
}
static inline uint32_t rseq_current_cpu(void)
@@ -139,11 +153,7 @@ static inline uint32_t rseq_current_cpu(void)
static inline void rseq_clear_rseq_cs(void)
{
-#ifdef __LP64__
- __rseq_abi.rseq_cs.ptr = 0;
-#else
- __rseq_abi.rseq_cs.ptr.ptr32 = 0;
-#endif
+ RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0);
}
/*
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
index ba4d85f74cd6..a953c96aa16e 100644
--- a/tools/testing/selftests/rtc/settings
+++ b/tools/testing/selftests/rtc/settings
@@ -1 +1 @@
-timeout=90
+timeout=180
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 0ebfe8b0e147..585f7a0c10cb 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -Wl,-no-as-needed -Wall
+CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
LDFLAGS += -lpthread
TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 2956584e1e37..75af864e07b6 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
.PHONY: all clean
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
../x86/trivial_64bit_program.c)
ifndef OBJCOPY
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d4322c946e2..006b464c8fc9 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,7 +21,7 @@
void encl_delete(struct encl *encl)
{
- struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+ struct encl_segment *heap_seg;
if (encl->encl_base)
munmap((void *)encl->encl_base, encl->encl_size);
@@ -32,10 +32,11 @@ void encl_delete(struct encl *encl)
if (encl->fd)
close(encl->fd);
- munmap(heap_seg->src, heap_seg->size);
-
- if (encl->segment_tbl)
+ if (encl->segment_tbl) {
+ heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+ munmap(heap_seg->src, heap_seg->size);
free(encl->segment_tbl);
+ }
memset(encl, 0, sizeof(*encl));
}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 370c4995f7c4..dd74fa42302e 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -146,7 +146,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
if (!encl_load("test_encl.elf", encl, heap_size)) {
encl_delete(encl);
- TH_LOG("Failed to load the test enclave.\n");
+ TH_LOG("Failed to load the test enclave.");
+ return false;
}
if (!encl_measure(encl))
@@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
return true;
err:
- encl_delete(encl);
-
for (i = 0; i < encl->nr_segments; i++) {
seg = &encl->segment_tbl[i];
@@ -205,7 +204,9 @@ err:
fclose(maps_file);
}
- TH_LOG("Failed to initialize the test enclave.\n");
+ TH_LOG("Failed to initialize the test enclave.");
+
+ encl_delete(encl);
return false;
}
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index f34486cd7342..057a4f49c79d 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT)
TPM2_CAP_HANDLES = 0x00000001
TPM2_CAP_COMMANDS = 0x00000002
+TPM2_CAP_PCRS = 0x00000005
TPM2_CAP_TPM_PROPERTIES = 0x00000006
TPM2_PT_FIXED = 0x100
@@ -712,3 +713,33 @@ class Client:
pt += 1
return handles
+
+ def get_cap_pcrs(self):
+ pcr_banks = {}
+
+ fmt = '>HII III'
+
+ cmd = struct.pack(fmt,
+ TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ TPM2_CC_GET_CAPABILITY,
+ TPM2_CAP_PCRS, 0, 1)
+ rsp = self.send_cmd(cmd)[10:]
+ _, _, cnt = struct.unpack('>BII', rsp[:9])
+ rsp = rsp[9:]
+
+ # items are TPMS_PCR_SELECTION's
+ for i in range(0, cnt):
+ hash, sizeOfSelect = struct.unpack('>HB', rsp[:3])
+ rsp = rsp[3:]
+
+ pcrSelect = 0
+ if sizeOfSelect > 0:
+ pcrSelect, = struct.unpack('%ds' % sizeOfSelect,
+ rsp[:sizeOfSelect])
+ rsp = rsp[sizeOfSelect:]
+ pcrSelect = int.from_bytes(pcrSelect, byteorder='big')
+
+ pcr_banks[hash] = pcrSelect
+
+ return pcr_banks
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 9d764306887b..ffe98b5c8d22 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase):
result = self.client.unseal(self.root_key, blob, auth, None)
self.assertEqual(data, result)
+ def determine_bank_alg(self, mask):
+ pcr_banks = self.client.get_cap_pcrs()
+ for bank_alg, pcrSelection in pcr_banks.items():
+ if pcrSelection & mask == mask:
+ return bank_alg
+ return None
+
def test_seal_with_policy(self):
+ bank_alg = self.determine_bank_alg(1 << 16)
+ self.assertIsNotNone(bank_alg)
+
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
data = ('X' * 64).encode()
@@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase):
pcrs = [16]
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
policy_dig = self.client.get_policy_digest(handle)
@@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase):
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase):
self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
def test_unseal_with_wrong_policy(self):
+ bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1)
+ self.assertIsNotNone(bank_alg)
+
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
data = ('X' * 64).encode()
@@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase):
pcrs = [16]
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
policy_dig = self.client.get_policy_digest(handle)
@@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase):
# Extend first a PCR that is not part of the policy and try to unseal.
# This should succeed.
- ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
- self.client.extend_pcr(1, ('X' * ds).encode())
+ ds = tpm2.get_digest_size(bank_alg)
+ self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg)
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase):
# Then, extend a PCR that is part of the policy and try to unseal.
# This should fail.
- self.client.extend_pcr(16, ('X' * ds).encode())
+ self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg)
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
rc = 0
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -302,3 +315,19 @@ class AsyncTest(unittest.TestCase):
log.debug("Calling get_cap in a NON_BLOCKING mode")
async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
async_client.close()
+
+ def test_flush_invalid_context(self):
+ log = logging.getLogger(__name__)
+ log.debug(sys._getframe().f_code.co_name)
+
+ async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK)
+ log.debug("Calling flush_context passing in an invalid handle ")
+ handle = 0x80123456
+ rc = 0
+ try:
+ async_client.flush_context(handle)
+ except OSError as e:
+ rc = e.errno
+
+ self.assertEqual(rc, 22)
+ async_client.close()
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index 3d603f1394af..883ca85424bc 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -33,110 +33,114 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
typedef time_t (*vdso_time_t)(time_t *t);
-static int vdso_test_gettimeofday(void)
+#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__
+#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x
+#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x
+
+static void vdso_test_gettimeofday(void)
{
/* Find gettimeofday. */
vdso_gettimeofday_t vdso_gettimeofday =
(vdso_gettimeofday_t)vdso_sym(version, name[0]);
if (!vdso_gettimeofday) {
- printf("Could not find %s\n", name[0]);
- return KSFT_SKIP;
+ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0]));
+ return;
}
struct timeval tv;
long ret = vdso_gettimeofday(&tv, 0);
if (ret == 0) {
- printf("The time is %lld.%06lld\n",
- (long long)tv.tv_sec, (long long)tv.tv_usec);
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
+ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
} else {
- printf("%s failed\n", name[0]);
- return KSFT_FAIL;
+ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0]));
}
-
- return KSFT_PASS;
}
-static int vdso_test_clock_gettime(clockid_t clk_id)
+static void vdso_test_clock_gettime(clockid_t clk_id)
{
/* Find clock_gettime. */
vdso_clock_gettime_t vdso_clock_gettime =
(vdso_clock_gettime_t)vdso_sym(version, name[1]);
if (!vdso_clock_gettime) {
- printf("Could not find %s\n", name[1]);
- return KSFT_SKIP;
+ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1]));
+ return;
}
struct timespec ts;
long ret = vdso_clock_gettime(clk_id, &ts);
if (ret == 0) {
- printf("The time is %lld.%06lld\n",
- (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
} else {
- printf("%s failed\n", name[1]);
- return KSFT_FAIL;
+ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1]));
}
-
- return KSFT_PASS;
}
-static int vdso_test_time(void)
+static void vdso_test_time(void)
{
/* Find time. */
vdso_time_t vdso_time =
(vdso_time_t)vdso_sym(version, name[2]);
if (!vdso_time) {
- printf("Could not find %s\n", name[2]);
- return KSFT_SKIP;
+ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2]));
+ return;
}
long ret = vdso_time(NULL);
if (ret > 0) {
- printf("The time in hours since January 1, 1970 is %lld\n",
+ ksft_print_msg("The time in hours since January 1, 1970 is %lld\n",
(long long)(ret / 3600));
+ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
} else {
- printf("%s failed\n", name[2]);
- return KSFT_FAIL;
+ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2]));
}
-
- return KSFT_PASS;
}
-static int vdso_test_clock_getres(clockid_t clk_id)
+static void vdso_test_clock_getres(clockid_t clk_id)
{
+ int clock_getres_fail = 0;
+
/* Find clock_getres. */
vdso_clock_getres_t vdso_clock_getres =
(vdso_clock_getres_t)vdso_sym(version, name[3]);
if (!vdso_clock_getres) {
- printf("Could not find %s\n", name[3]);
- return KSFT_SKIP;
+ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3]));
+ return;
}
struct timespec ts, sys_ts;
long ret = vdso_clock_getres(clk_id, &ts);
if (ret == 0) {
- printf("The resolution is %lld %lld\n",
- (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_print_msg("The vdso resolution is %lld %lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
} else {
- printf("%s failed\n", name[3]);
- return KSFT_FAIL;
+ clock_getres_fail++;
}
ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
- if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) {
- printf("%s failed\n", name[3]);
- return KSFT_FAIL;
- }
+ ksft_print_msg("The syscall resolution is %lld %lld\n",
+ (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
- return KSFT_PASS;
+ if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec))
+ clock_getres_fail++;
+
+ if (clock_getres_fail > 0) {
+ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3]));
+ } else {
+ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
+ }
}
const char *vdso_clock_name[12] = {
@@ -158,36 +162,23 @@ const char *vdso_clock_name[12] = {
* This function calls vdso_test_clock_gettime and vdso_test_clock_getres
* with different values for clock_id.
*/
-static inline int vdso_test_clock(clockid_t clock_id)
+static inline void vdso_test_clock(clockid_t clock_id)
{
- int ret0, ret1;
-
- ret0 = vdso_test_clock_gettime(clock_id);
- /* A skipped test is considered passed */
- if (ret0 == KSFT_SKIP)
- ret0 = KSFT_PASS;
-
- ret1 = vdso_test_clock_getres(clock_id);
- /* A skipped test is considered passed */
- if (ret1 == KSFT_SKIP)
- ret1 = KSFT_PASS;
+ ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]);
- ret0 += ret1;
+ vdso_test_clock_gettime(clock_id);
- printf("clock_id: %s", vdso_clock_name[clock_id]);
-
- if (ret0 > 0)
- printf(" [FAIL]\n");
- else
- printf(" [PASS]\n");
-
- return ret0;
+ vdso_test_clock_getres(clock_id);
}
+#define VDSO_TEST_PLAN 16
+
int main(int argc, char **argv)
{
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
- int ret;
+
+ ksft_print_header();
+ ksft_set_plan(VDSO_TEST_PLAN);
if (!sysinfo_ehdr) {
printf("AT_SYSINFO_EHDR is not present!\n");
@@ -201,44 +192,42 @@ int main(int argc, char **argv)
vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
- ret = vdso_test_gettimeofday();
+ vdso_test_gettimeofday();
#if _POSIX_TIMERS > 0
#ifdef CLOCK_REALTIME
- ret += vdso_test_clock(CLOCK_REALTIME);
+ vdso_test_clock(CLOCK_REALTIME);
#endif
#ifdef CLOCK_BOOTTIME
- ret += vdso_test_clock(CLOCK_BOOTTIME);
+ vdso_test_clock(CLOCK_BOOTTIME);
#endif
#ifdef CLOCK_TAI
- ret += vdso_test_clock(CLOCK_TAI);
+ vdso_test_clock(CLOCK_TAI);
#endif
#ifdef CLOCK_REALTIME_COARSE
- ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+ vdso_test_clock(CLOCK_REALTIME_COARSE);
#endif
#ifdef CLOCK_MONOTONIC
- ret += vdso_test_clock(CLOCK_MONOTONIC);
+ vdso_test_clock(CLOCK_MONOTONIC);
#endif
#ifdef CLOCK_MONOTONIC_RAW
- ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+ vdso_test_clock(CLOCK_MONOTONIC_RAW);
#endif
#ifdef CLOCK_MONOTONIC_COARSE
- ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+ vdso_test_clock(CLOCK_MONOTONIC_COARSE);
#endif
#endif
- ret += vdso_test_time();
-
- if (ret > 0)
- return KSFT_FAIL;
+ vdso_test_time();
- return KSFT_PASS;
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e85282..3b5faec3c04f 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -2,6 +2,7 @@
hugepage-mmap
hugepage-mremap
hugepage-shm
+hugepage-vmemmap
khugepaged
map_hugetlb
map_populate
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1607322a112c..81040f01711f 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
include local_config.mk
uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -31,6 +33,7 @@ TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-mremap
TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
TEST_GEN_FILES += khugepaged
TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
@@ -49,9 +52,9 @@ TEST_GEN_FILES += split_huge_page_test
TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
TARGETS := protection_keys
BINARIES_32 := $(TARGETS:%=%_32)
@@ -140,10 +143,6 @@ endif
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 2a7c33631a29..1d689084a54b 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -3,9 +3,10 @@
* hugepage-mremap:
*
* Example of remapping huge page memory in a user application using the
- * mremap system call. Code assumes a hugetlbfs filesystem is mounted
- * at './huge'. The amount of memory used by this test is decided by a command
- * line argument in MBs. If missing, the default amount is 10MB.
+ * mremap system call. The path to a file in a hugetlbfs filesystem must
+ * be passed as the last argument to this test. The amount of memory used
+ * by this test in MBs can optionally be passed as an argument. If no memory
+ * amount is passed, the default amount is 10MB.
*
* To make sure the test triggers pmd sharing and goes through the 'unshare'
* path in the mremap code use 1GB (1024) or more.
@@ -25,7 +26,6 @@
#define DEFAULT_LENGTH_MB 10UL
#define MB_TO_BYTES(x) (x * 1024 * 1024)
-#define FILE_NAME "huge/hugepagefile"
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
@@ -107,17 +107,26 @@ static void register_region_with_uffd(char *addr, size_t len)
int main(int argc, char *argv[])
{
+ size_t length;
+
+ if (argc != 2 && argc != 3) {
+ printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
+ exit(1);
+ }
+
/* Read memory length as the first arg if valid, otherwise fallback to
- * the default length. Any additional args are ignored.
+ * the default length.
*/
- size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL;
+ if (argc == 3)
+ length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL;
length = length > 0 ? length : DEFAULT_LENGTH_MB;
length = MB_TO_BYTES(length);
int ret = 0;
- int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
+ /* last arg is the hugetlb file name */
+ int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755);
if (fd < 0) {
perror("Open failed");
@@ -169,5 +178,8 @@ int main(int argc, char *argv[])
munmap(addr, length);
+ close(fd);
+ unlink(argv[argc-1]);
+
return ret;
}
diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c
new file mode 100644
index 000000000000..557bdbd4f87e
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-vmemmap.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag. Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define MAP_LENGTH (2UL * 1024 * 1024)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
+#define PAGE_SIZE 4096
+
+#define PAGE_COMPOUND_HEAD (1UL << 15)
+#define PAGE_COMPOUND_TAIL (1UL << 16)
+#define PAGE_HUGE (1UL << 17)
+
+#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1)
+
+/*
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified. Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#ifdef __ia64__
+#define MAP_ADDR (void *)(0x8000000000000000UL)
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define MAP_ADDR NULL
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+ int fd;
+ unsigned long pagemap;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ return -1UL;
+
+ lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+ read(fd, &pagemap, sizeof(pagemap));
+ close(fd);
+
+ return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+ int fd, i;
+ unsigned long pageflags;
+
+ fd = open("/proc/kpageflags", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Head page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+
+ /*
+ * pages other than the first page must be tail and shouldn't be head;
+ * this also verifies kernel has correctly set the fake page_head to tail
+ * while hugetlb_free_vmemmap is enabled.
+ */
+ for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+ (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Tail page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ void *addr;
+ unsigned long pfn;
+
+ addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* Trigger allocation of HugeTLB page. */
+ write_bytes(addr, MAP_LENGTH);
+
+ pfn = virt_to_pfn(addr);
+ if (pfn == -1UL) {
+ munmap(addr, MAP_LENGTH);
+ perror("virt_to_pfn");
+ exit(1);
+ }
+
+ printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+ if (check_page_flags(pfn) < 0) {
+ munmap(addr, MAP_LENGTH);
+ perror("check_page_flags");
+ exit(1);
+ }
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, MAP_LENGTH)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
index d91bde511268..eed44322d1a6 100644
--- a/tools/testing/selftests/vm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
@@ -17,9 +17,6 @@
#define MAP_FIXED_NOREPLACE 0x100000
#endif
-#define BASE_ADDRESS (256ul * 1024 * 1024)
-
-
static void dump_maps(void)
{
char cmd[32];
@@ -28,18 +25,46 @@ static void dump_maps(void)
system(cmd);
}
+static unsigned long find_base_addr(unsigned long size)
+{
+ void *addr;
+ unsigned long flags;
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ if (addr == MAP_FAILED) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 0;
+ }
+
+ if (munmap(addr, size) != 0) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 0;
+ }
+ return (unsigned long)addr;
+}
+
int main(void)
{
+ unsigned long base_addr;
unsigned long flags, addr, size, page_size;
char *p;
page_size = sysconf(_SC_PAGE_SIZE);
+ //let's find a base addr that is free before we start the tests
+ size = 5 * page_size;
+ base_addr = find_base_addr(size);
+ if (!base_addr) {
+ printf("Error: couldn't map the space we need for the test\n");
+ return 1;
+ }
+
flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
// Check we can map all the areas we need below
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
@@ -60,7 +85,7 @@ int main(void)
printf("unmap() successful\n");
errno = 0;
- addr = BASE_ADDRESS + page_size;
+ addr = base_addr + page_size;
size = 3 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -80,7 +105,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -101,7 +126,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS + (2 * page_size);
+ addr = base_addr + (2 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -121,7 +146,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS + (3 * page_size);
+ addr = base_addr + (3 * page_size);
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -141,7 +166,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -161,7 +186,7 @@ int main(void)
* +4 | free |
*/
errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -181,7 +206,7 @@ int main(void)
* +4 | free | new
*/
errno = 0;
- addr = BASE_ADDRESS + (4 * page_size);
+ addr = base_addr + (4 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
@@ -192,7 +217,7 @@ int main(void)
return 1;
}
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
if (munmap((void *)addr, size) != 0) {
dump_maps();
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 75d401741394..e10d50e0b8e8 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -111,7 +111,19 @@ fi
echo "-----------------------"
echo "running hugepage-mremap"
echo "-----------------------"
-./hugepage-mremap 256
+./hugepage-mremap $mnt/huge_mremap
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+rm -f $mnt/huge_mremap
+
+echo "------------------------"
+echo "running hugepage-vmemmap"
+echo "------------------------"
+./hugepage-vmemmap
if [ $? -ne 0 ]; then
echo "[FAIL]"
exitcode=1
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
index 5e4c036f6ad3..a03cb3fce1f6 100644
--- a/tools/testing/selftests/vm/transhuge-stress.c
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -26,15 +26,17 @@
#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
int pagemap_fd;
+int backing_fd = -1;
+int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
+#define PROT_RW (PROT_READ | PROT_WRITE)
int64_t allocate_transhuge(void *ptr)
{
uint64_t ent[2];
/* drop pmd */
- if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS |
- MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ if (mmap(ptr, HPAGE_SIZE, PROT_RW, MAP_FIXED | mmap_flags,
+ backing_fd, 0) != ptr)
errx(2, "mmap transhuge");
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
@@ -60,6 +62,8 @@ int main(int argc, char **argv)
size_t ram, len;
void *ptr, *p;
struct timespec a, b;
+ int i = 0;
+ char *name = NULL;
double s;
uint8_t *map;
size_t map_len;
@@ -69,13 +73,23 @@ int main(int argc, char **argv)
ram = SIZE_MAX / 4;
else
ram *= sysconf(_SC_PAGESIZE);
+ len = ram;
+
+ while (++i < argc) {
+ if (!strcmp(argv[i], "-h"))
+ errx(1, "usage: %s [size in MiB]", argv[0]);
+ else if (!strcmp(argv[i], "-f"))
+ name = argv[++i];
+ else
+ len = atoll(argv[i]) << 20;
+ }
- if (argc == 1)
- len = ram;
- else if (!strcmp(argv[1], "-h"))
- errx(1, "usage: %s [size in MiB]", argv[0]);
- else
- len = atoll(argv[1]) << 20;
+ if (name) {
+ backing_fd = open(name, O_RDWR);
+ if (backing_fd == -1)
+ errx(2, "open %s", name);
+ mmap_flags = MAP_SHARED;
+ }
warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
" and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
@@ -86,8 +100,7 @@ int main(int argc, char **argv)
err(2, "open pagemap");
len -= len % HPAGE_SIZE;
- ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
if (ptr == MAP_FAILED)
err(2, "initial mmap");
ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index d3fd24f9fae8..29cc6f247a9e 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -46,6 +46,7 @@
#include <signal.h>
#include <poll.h>
#include <string.h>
+#include <linux/mman.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
@@ -539,7 +540,7 @@ static void continue_range(int ufd, __u64 start, __u64 len)
static void *locking_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
- unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ unsigned long page_nr;
unsigned long long count;
if (!(bounces & BOUNCE_RANDOM)) {
@@ -1417,6 +1418,7 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
+ char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1527,9 +1529,13 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- swap(area_src, area_dst);
+ tmp_area = area_src;
+ area_src = area_dst;
+ area_dst = tmp_area;
- swap(area_src_alias, area_dst_alias);
+ tmp_area = area_src_alias;
+ area_src_alias = area_dst_alias;
+ area_dst_alias = tmp_area;
uffd_stats_report(uffd_stats, nr_cpus);
}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8a1f62ab3c8e..53df7d3893d3 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,9 +6,9 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
TESTPROG="$2"
shift 2
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
echo 1
else
echo 0
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 232e958ec454..b0b91d9b0dc2 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,9 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
TCID="zram.sh"
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
. ./zram_lib.sh
run_zram () {
@@ -18,14 +15,4 @@ echo ""
check_prereqs
-# check zram module exists
-MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
-if [ -f $MODULE_PATH ]; then
- run_zram
-elif [ -b /dev/zram0 ]; then
- run_zram
-else
- echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
- echo "$TCID : CONFIG_ZRAM is not set"
- exit $ksft_skip
-fi
+run_zram
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
index 114863d9fb87..8f4affe34f3e 100755
--- a/tools/testing/selftests/zram/zram01.sh
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -33,9 +33,7 @@ zram_algs="lzo"
zram_fill_fs()
{
- local mem_free0=$(free -m | awk 'NR==2 {print $4}')
-
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo "fill zram$i..."
local b=0
while [ true ]; do
@@ -45,29 +43,17 @@ zram_fill_fs()
b=$(($b + 1))
done
echo "zram$i can be filled with '$b' KB"
- done
- local mem_free1=$(free -m | awk 'NR==2 {print $4}')
- local used_mem=$(($mem_free0 - $mem_free1))
+ local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
+ local v=$((100 * 1024 * $b / $mem_used_total))
+ if [ "$v" -lt 100 ]; then
+ echo "FAIL compression ratio: 0.$v:1"
+ ERR_CODE=-1
+ return
+ fi
- local total_size=0
- for sm in $zram_sizes; do
- local s=$(echo $sm | sed 's/M//')
- total_size=$(($total_size + $s))
+ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
done
-
- echo "zram used ${used_mem}M, zram disk sizes ${total_size}M"
-
- local v=$((100 * $total_size / $used_mem))
-
- if [ "$v" -lt 100 ]; then
- echo "FAIL compression ratio: 0.$v:1"
- ERR_CODE=-1
- zram_cleanup
- return
- fi
-
- echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
}
check_prereqs
@@ -81,7 +67,6 @@ zram_mount
zram_fill_fs
zram_cleanup
-zram_unload
if [ $ERR_CODE -ne 0 ]; then
echo "$TCID : [FAIL]"
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
index e83b404807c0..2418b0c4ed13 100755
--- a/tools/testing/selftests/zram/zram02.sh
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -36,7 +36,6 @@ zram_set_memlimit
zram_makeswap
zram_swapoff
zram_cleanup
-zram_unload
if [ $ERR_CODE -ne 0 ]; then
echo "$TCID : [FAIL]"
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index 6f872f266fd1..21ec1966de76 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -5,12 +5,17 @@
# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
-MODULE=0
dev_makeswap=-1
dev_mounted=-1
-
+dev_start=0
+dev_end=-1
+module_load=-1
+sys_control=-1
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+kernel_version=`uname -r | cut -d'.' -f1,2`
+kernel_major=${kernel_version%.*}
+kernel_minor=${kernel_version#*.}
trap INT
@@ -25,68 +30,104 @@ check_prereqs()
fi
}
+kernel_gte()
+{
+ major=${1%.*}
+ minor=${1#*.}
+
+ if [ $kernel_major -gt $major ]; then
+ return 0
+ elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
zram_cleanup()
{
echo "zram cleanup"
local i=
- for i in $(seq 0 $dev_makeswap); do
+ for i in $(seq $dev_start $dev_makeswap); do
swapoff /dev/zram$i
done
- for i in $(seq 0 $dev_mounted); do
+ for i in $(seq $dev_start $dev_mounted); do
umount /dev/zram$i
done
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo 1 > /sys/block/zram${i}/reset
rm -rf zram$i
done
-}
+ if [ $sys_control -eq 1 ]; then
+ for i in $(seq $dev_start $dev_end); do
+ echo $i > /sys/class/zram-control/hot_remove
+ done
+ fi
-zram_unload()
-{
- if [ $MODULE -ne 0 ] ; then
- echo "zram rmmod zram"
+ if [ $module_load -eq 1 ]; then
rmmod zram > /dev/null 2>&1
fi
}
zram_load()
{
- # check zram module exists
- MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
- if [ -f $MODULE_PATH ]; then
- MODULE=1
- echo "create '$dev_num' zram device(s)"
- modprobe zram num_devices=$dev_num
- if [ $? -ne 0 ]; then
- echo "failed to insert zram module"
- exit 1
- fi
-
- dev_num_created=$(ls /dev/zram* | wc -w)
+ echo "create '$dev_num' zram device(s)"
+
+ # zram module loaded, new kernel
+ if [ -d "/sys/class/zram-control" ]; then
+ echo "zram modules already loaded, kernel supports" \
+ "zram-control interface"
+ dev_start=$(ls /dev/zram* | wc -w)
+ dev_end=$(($dev_start + $dev_num - 1))
+ sys_control=1
+
+ for i in $(seq $dev_start $dev_end); do
+ cat /sys/class/zram-control/hot_add > /dev/null
+ done
+
+ echo "all zram devices (/dev/zram$dev_start~$dev_end" \
+ "successfully created"
+ return 0
+ fi
- if [ "$dev_num_created" -ne "$dev_num" ]; then
- echo "unexpected num of devices: $dev_num_created"
- ERR_CODE=-1
+ # detect old kernel or built-in
+ modprobe zram num_devices=$dev_num
+ if [ ! -d "/sys/class/zram-control" ]; then
+ if grep -q '^zram' /proc/modules; then
+ rmmod zram > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "zram module is being used on old kernel" \
+ "without zram-control interface"
+ exit $ksft_skip
+ fi
else
- echo "zram load module successful"
+ echo "test needs CONFIG_ZRAM=m on old kernel without" \
+ "zram-control interface"
+ exit $ksft_skip
fi
- elif [ -b /dev/zram0 ]; then
- echo "/dev/zram0 device file found: OK"
- else
- echo "ERROR: No zram.ko module or no /dev/zram0 device found"
- echo "$TCID : CONFIG_ZRAM is not set"
- exit 1
+ modprobe zram num_devices=$dev_num
fi
+
+ module_load=1
+ dev_end=$(($dev_num - 1))
+ echo "all zram devices (/dev/zram0~$dev_end) successfully created"
}
zram_max_streams()
{
echo "set max_comp_streams to zram device(s)"
- local i=0
+ kernel_gte 4.7
+ if [ $? -eq 0 ]; then
+ echo "The device attribute max_comp_streams was"\
+ "deprecated in 4.7"
+ return 0
+ fi
+
+ local i=$dev_start
for max_s in $zram_max_streams; do
local sys_path="/sys/block/zram${i}/max_comp_streams"
echo $max_s > $sys_path || \
@@ -98,7 +139,7 @@ zram_max_streams()
echo "FAIL can't set max_streams '$max_s', get $max_stream"
i=$(($i + 1))
- echo "$sys_path = '$max_streams' ($i/$dev_num)"
+ echo "$sys_path = '$max_streams'"
done
echo "zram max streams: OK"
@@ -108,15 +149,16 @@ zram_compress_alg()
{
echo "test that we can set compression algorithm"
- local algs=$(cat /sys/block/zram0/comp_algorithm)
+ local i=$dev_start
+ local algs=$(cat /sys/block/zram${i}/comp_algorithm)
echo "supported algs: $algs"
- local i=0
+
for alg in $zram_algs; do
local sys_path="/sys/block/zram${i}/comp_algorithm"
echo "$alg" > $sys_path || \
echo "FAIL can't set '$alg' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$alg' ($i/$dev_num)"
+ echo "$sys_path = '$alg'"
done
echo "zram set compression algorithm: OK"
@@ -125,14 +167,14 @@ zram_compress_alg()
zram_set_disksizes()
{
echo "set disk size to zram device(s)"
- local i=0
+ local i=$dev_start
for ds in $zram_sizes; do
local sys_path="/sys/block/zram${i}/disksize"
echo "$ds" > $sys_path || \
echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$ds' ($i/$dev_num)"
+ echo "$sys_path = '$ds'"
done
echo "zram set disksizes: OK"
@@ -142,14 +184,14 @@ zram_set_memlimit()
{
echo "set memory limit to zram device(s)"
- local i=0
+ local i=$dev_start
for ds in $zram_mem_limits; do
local sys_path="/sys/block/zram${i}/mem_limit"
echo "$ds" > $sys_path || \
echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$ds' ($i/$dev_num)"
+ echo "$sys_path = '$ds'"
done
echo "zram set memory limit: OK"
@@ -158,8 +200,8 @@ zram_set_memlimit()
zram_makeswap()
{
echo "make swap with zram device(s)"
- local i=0
- for i in $(seq 0 $(($dev_num - 1))); do
+ local i=$dev_start
+ for i in $(seq $dev_start $dev_end); do
mkswap /dev/zram$i > err.log 2>&1
if [ $? -ne 0 ]; then
cat err.log
@@ -182,7 +224,7 @@ zram_makeswap()
zram_swapoff()
{
local i=
- for i in $(seq 0 $dev_makeswap); do
+ for i in $(seq $dev_start $dev_end); do
swapoff /dev/zram$i > err.log 2>&1
if [ $? -ne 0 ]; then
cat err.log
@@ -196,7 +238,7 @@ zram_swapoff()
zram_makefs()
{
- local i=0
+ local i=$dev_start
for fs in $zram_filesystems; do
# if requested fs not supported default it to ext2
which mkfs.$fs > /dev/null 2>&1 || fs=ext2
@@ -215,7 +257,7 @@ zram_makefs()
zram_mount()
{
local i=0
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo "mount /dev/zram$i"
mkdir zram$i
mount /dev/zram$i zram$i > /dev/null || \
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
index 5648f9252e58..b8ec6c15bccb 100644
--- a/tools/tracing/rtla/src/osnoise.c
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -656,6 +656,85 @@ void osnoise_put_print_stack(struct osnoise_context *context)
}
/*
+ * osnoise_get_tracing_thresh - read and save the original "tracing_thresh"
+ */
+static long long
+osnoise_get_tracing_thresh(struct osnoise_context *context)
+{
+ long long tracing_thresh;
+
+ if (context->tracing_thresh != OSNOISE_OPTION_INIT_VAL)
+ return context->tracing_thresh;
+
+ if (context->orig_tracing_thresh != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_tracing_thresh;
+
+ tracing_thresh = osnoise_read_ll_config("tracing_thresh");
+ if (tracing_thresh < 0)
+ goto out_err;
+
+ context->orig_tracing_thresh = tracing_thresh;
+ return tracing_thresh;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_tracing_thresh - set "tracing_thresh"
+ */
+int osnoise_set_tracing_thresh(struct osnoise_context *context, long long tracing_thresh)
+{
+ long long curr_tracing_thresh = osnoise_get_tracing_thresh(context);
+ int retval;
+
+ if (curr_tracing_thresh == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("tracing_thresh", tracing_thresh);
+ if (retval < 0)
+ return -1;
+
+ context->tracing_thresh = tracing_thresh;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_tracing_thresh - restore the original "tracing_thresh"
+ */
+void osnoise_restore_tracing_thresh(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_tracing_thresh == context->tracing_thresh)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("tracing_thresh", context->orig_tracing_thresh);
+ if (retval < 0)
+ err_msg("Could not restore original tracing_thresh\n");
+
+out_done:
+ context->tracing_thresh = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_tracing_thresh - restore original values and cleanup data
+ */
+void osnoise_put_tracing_thresh(struct osnoise_context *context)
+{
+ osnoise_restore_tracing_thresh(context);
+
+ if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
* enable_osnoise - enable osnoise tracer in the trace_instance
*/
int enable_osnoise(struct trace_instance *trace)
@@ -716,6 +795,9 @@ struct osnoise_context *osnoise_context_alloc(void)
context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
context->print_stack = OSNOISE_OPTION_INIT_VAL;
+ context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL;
+ context->tracing_thresh = OSNOISE_OPTION_INIT_VAL;
+
osnoise_get_context(context);
return context;
@@ -741,6 +823,7 @@ void osnoise_put_context(struct osnoise_context *context)
osnoise_put_stop_total_us(context);
osnoise_put_timerlat_period_us(context);
osnoise_put_print_stack(context);
+ osnoise_put_tracing_thresh(context);
free(context);
}
@@ -810,7 +893,7 @@ struct osnoise_tool *osnoise_init_trace_tool(char *tracer)
retval = enable_tracer_by_name(trace->trace.inst, tracer);
if (retval) {
- err_msg("Could not enable osnoiser tracer for tracing\n");
+ err_msg("Could not enable %s tracer for tracing\n", tracer);
goto out_err;
}
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
index 9e4b2e2a4559..04a4384cc544 100644
--- a/tools/tracing/rtla/src/osnoise.h
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -23,6 +23,10 @@ struct osnoise_context {
long long orig_timerlat_period_us;
long long timerlat_period_us;
+ /* 0 as init value */
+ long long orig_tracing_thresh;
+ long long tracing_thresh;
+
/* -1 as init value because 0 is disabled */
long long orig_stop_us;
long long stop_us;
@@ -67,6 +71,10 @@ int osnoise_set_timerlat_period_us(struct osnoise_context *context,
long long timerlat_period_us);
void osnoise_restore_timerlat_period_us(struct osnoise_context *context);
+int osnoise_set_tracing_thresh(struct osnoise_context *context,
+ long long tracing_thresh);
+void osnoise_restore_tracing_thresh(struct osnoise_context *context);
+
void osnoise_restore_print_stack(struct osnoise_context *context);
int osnoise_set_print_stack(struct osnoise_context *context,
long long print_stack);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 1f0b7fce55cf..b4380d45cacd 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -21,6 +21,7 @@ struct osnoise_hist_params {
char *trace_output;
unsigned long long runtime;
unsigned long long period;
+ long long threshold;
long long stop_us;
long long stop_total_us;
int sleep_time;
@@ -28,6 +29,7 @@ struct osnoise_hist_params {
int set_sched;
int output_divisor;
struct sched_attr sched_param;
+ struct trace_events *events;
char no_header;
char no_summary;
@@ -425,21 +427,27 @@ static void osnoise_hist_usage(char *usage)
static const char * const msg[] = {
"",
- " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
- " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\",
- " [--no-index] [--with-zeros]",
+ " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
+ " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
+ " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] [--no-index] \\",
+ " [--with-zeros]",
"",
" -h/--help: print this menu",
+ " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
" -p/--period us: osnoise period in us",
" -r/--runtime us: osnoise runtime in us",
" -s/--stop us: stop trace if a single sample is higher than the argument in us",
" -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -T/--threshold us: the minimum delta to be considered a noise",
" -c/--cpus cpu-list: list of cpus to run osnoise threads",
" -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
+ " --filter <filter>: enable a trace event filter to the previous -e event",
+ " --trigger <trigger>: enable a trace event trigger to the previous -e event",
" -b/--bucket-size N: set the histogram bucket size (default 1)",
- " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " -E/--entries N: set the number of entries of the histogram (default 256)",
" --no-header: do not print header",
" --no-summary: do not print summary",
" --no-index: do not print index",
@@ -471,6 +479,7 @@ static struct osnoise_hist_params
*osnoise_hist_parse_args(int argc, char *argv[])
{
struct osnoise_hist_params *params;
+ struct trace_events *tevent;
int retval;
int c;
@@ -485,8 +494,9 @@ static struct osnoise_hist_params
while (1) {
static struct option long_options[] = {
+ {"auto", required_argument, 0, 'a'},
{"bucket-size", required_argument, 0, 'b'},
- {"entries", required_argument, 0, 'e'},
+ {"entries", required_argument, 0, 'E'},
{"cpus", required_argument, 0, 'c'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
@@ -497,17 +507,21 @@ static struct osnoise_hist_params
{"stop", required_argument, 0, 's'},
{"stop-total", required_argument, 0, 'S'},
{"trace", optional_argument, 0, 't'},
+ {"event", required_argument, 0, 'e'},
+ {"threshold", required_argument, 0, 'T'},
{"no-header", no_argument, 0, '0'},
{"no-summary", no_argument, 0, '1'},
{"no-index", no_argument, 0, '2'},
{"with-zeros", no_argument, 0, '3'},
+ {"trigger", required_argument, 0, '4'},
+ {"filter", required_argument, 0, '5'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123",
+ c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhp:P:r:s:S:t::T:01234:5:",
long_options, &option_index);
/* detect the end of the options. */
@@ -515,6 +529,17 @@ static struct osnoise_hist_params
break;
switch (c) {
+ case 'a':
+ /* set sample stop to auto_thresh */
+ params->stop_us = get_llong_from_str(optarg);
+
+ /* set sample threshold to 1 */
+ params->threshold = 1;
+
+ /* set trace */
+ params->trace_output = "osnoise_trace.txt";
+
+ break;
case 'b':
params->bucket_size = get_llong_from_str(optarg);
if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
@@ -535,6 +560,18 @@ static struct osnoise_hist_params
osnoise_hist_usage("Invalid -D duration\n");
break;
case 'e':
+ tevent = trace_event_alloc(optarg);
+ if (!tevent) {
+ err_msg("Error alloc trace event");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->events)
+ tevent->next = params->events;
+
+ params->events = tevent;
+ break;
+ case 'E':
params->entries = get_llong_from_str(optarg);
if ((params->entries < 10) || (params->entries > 9999999))
osnoise_hist_usage("Entries must be > 10 and < 9999999\n");
@@ -565,6 +602,9 @@ static struct osnoise_hist_params
case 'S':
params->stop_total_us = get_llong_from_str(optarg);
break;
+ case 'T':
+ params->threshold = get_llong_from_str(optarg);
+ break;
case 't':
if (optarg)
/* skip = */
@@ -584,6 +624,28 @@ static struct osnoise_hist_params
case '3': /* with zeros */
params->with_zeros = 1;
break;
+ case '4': /* trigger */
+ if (params->events) {
+ retval = trace_event_add_trigger(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding trigger %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ osnoise_hist_usage("--trigger requires a previous -e\n");
+ }
+ break;
+ case '5': /* filter */
+ if (params->events) {
+ retval = trace_event_add_filter(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding filter %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ osnoise_hist_usage("--filter requires a previous -e\n");
+ }
+ break;
default:
osnoise_hist_usage("Invalid option");
}
@@ -645,6 +707,14 @@ osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params
}
}
+ if (params->threshold) {
+ retval = osnoise_set_tracing_thresh(tool->context, params->threshold);
+ if (retval) {
+ err_msg("Failed to set tracing_thresh\n");
+ goto out_err;
+ }
+ }
+
return 0;
out_err:
@@ -751,6 +821,13 @@ int osnoise_hist_main(int argc, char *argv[])
err_msg("Failed to enable the trace instance\n");
goto out_hist;
}
+
+ if (params->events) {
+ retval = trace_events_enable(&record->trace, params->events);
+ if (retval)
+ goto out_hist;
+ }
+
trace_instance_start(&record->trace);
}
@@ -771,9 +848,9 @@ int osnoise_hist_main(int argc, char *argv[])
goto out_hist;
}
- if (!tracefs_trace_is_on(trace->inst))
+ if (trace_is_off(&tool->trace, &record->trace))
break;
- };
+ }
osnoise_read_trace_hist(tool);
@@ -781,8 +858,8 @@ int osnoise_hist_main(int argc, char *argv[])
return_value = 0;
- if (!tracefs_trace_is_on(trace->inst)) {
- printf("rtla timelat hit stop tracing\n");
+ if (trace_is_off(&tool->trace, &record->trace)) {
+ printf("rtla osnoise hit stop tracing\n");
if (params->trace_output) {
printf(" Saving trace to %s\n", params->trace_output);
save_trace_to_file(record->trace.inst, params->trace_output);
@@ -790,6 +867,8 @@ int osnoise_hist_main(int argc, char *argv[])
}
out_hist:
+ trace_events_destroy(&record->trace, params->events);
+ params->events = NULL;
osnoise_free_histogram(tool->data);
out_destroy:
osnoise_destroy_tool(record);
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index c67dc28ef716..72c2fd6ce005 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -23,6 +23,7 @@ struct osnoise_top_params {
char *trace_output;
unsigned long long runtime;
unsigned long long period;
+ long long threshold;
long long stop_us;
long long stop_total_us;
int sleep_time;
@@ -30,6 +31,7 @@ struct osnoise_top_params {
int quiet;
int set_sched;
struct sched_attr sched_param;
+ struct trace_events *events;
};
struct osnoise_top_cpu {
@@ -244,18 +246,24 @@ void osnoise_top_usage(char *usage)
int i;
static const char * const msg[] = {
- " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
+ " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
" [-c cpu-list] [-P priority]",
"",
" -h/--help: print this menu",
+ " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
" -p/--period us: osnoise period in us",
" -r/--runtime us: osnoise runtime in us",
" -s/--stop us: stop trace if a single sample is higher than the argument in us",
" -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -T/--threshold us: the minimum delta to be considered a noise",
" -c/--cpus cpu-list: list of cpus to run osnoise threads",
" -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
+ " --filter <filter>: enable a trace event filter to the previous -e event",
+ " --trigger <trigger>: enable a trace event trigger to the previous -e event",
" -q/--quiet print only a summary at the end",
" -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
" o:prio - use SCHED_OTHER with prio",
@@ -283,6 +291,7 @@ void osnoise_top_usage(char *usage)
struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
{
struct osnoise_top_params *params;
+ struct trace_events *tevent;
int retval;
int c;
@@ -292,9 +301,11 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
while (1) {
static struct option long_options[] = {
+ {"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
+ {"event", required_argument, 0, 'e'},
{"help", no_argument, 0, 'h'},
{"period", required_argument, 0, 'p'},
{"priority", required_argument, 0, 'P'},
@@ -302,14 +313,17 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
{"runtime", required_argument, 0, 'r'},
{"stop", required_argument, 0, 's'},
{"stop-total", required_argument, 0, 'S'},
+ {"threshold", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
+ {"trigger", required_argument, 0, '0'},
+ {"filter", required_argument, 0, '1'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::",
+ c = getopt_long(argc, argv, "a:c:d:De:hp:P:qr:s:S:t::T:0:1:",
long_options, &option_index);
/* Detect the end of the options. */
@@ -317,6 +331,17 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
break;
switch (c) {
+ case 'a':
+ /* set sample stop to auto_thresh */
+ params->stop_us = get_llong_from_str(optarg);
+
+ /* set sample threshold to 1 */
+ params->threshold = 1;
+
+ /* set trace */
+ params->trace_output = "osnoise_trace.txt";
+
+ break;
case 'c':
retval = parse_cpu_list(optarg, &params->monitored_cpus);
if (retval)
@@ -331,6 +356,18 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
if (!params->duration)
osnoise_top_usage("Invalid -D duration\n");
break;
+ case 'e':
+ tevent = trace_event_alloc(optarg);
+ if (!tevent) {
+ err_msg("Error alloc trace event");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->events)
+ tevent->next = params->events;
+ params->events = tevent;
+
+ break;
case 'h':
case '?':
osnoise_top_usage(NULL);
@@ -367,6 +404,31 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
else
params->trace_output = "osnoise_trace.txt";
break;
+ case 'T':
+ params->threshold = get_llong_from_str(optarg);
+ break;
+ case '0': /* trigger */
+ if (params->events) {
+ retval = trace_event_add_trigger(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding trigger %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ osnoise_top_usage("--trigger requires a previous -e\n");
+ }
+ break;
+ case '1': /* filter */
+ if (params->events) {
+ retval = trace_event_add_filter(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding filter %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ osnoise_top_usage("--filter requires a previous -e\n");
+ }
+ break;
default:
osnoise_top_usage("Invalid option");
}
@@ -425,6 +487,14 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p
}
}
+ if (params->threshold) {
+ retval = osnoise_set_tracing_thresh(tool->context, params->threshold);
+ if (retval) {
+ err_msg("Failed to set tracing_thresh\n");
+ goto out_err;
+ }
+ }
+
return 0;
out_err:
@@ -529,13 +599,20 @@ int osnoise_top_main(int argc, char **argv)
err_msg("Failed to enable the trace instance\n");
goto out_top;
}
+
+ if (params->events) {
+ retval = trace_events_enable(&record->trace, params->events);
+ if (retval)
+ goto out_top;
+ }
+
trace_instance_start(&record->trace);
}
tool->start_time = time(NULL);
osnoise_top_set_signals(params);
- do {
+ while (!stop_tracing) {
sleep(params->sleep_time);
retval = tracefs_iterate_raw_events(trace->tep,
@@ -552,16 +629,16 @@ int osnoise_top_main(int argc, char **argv)
if (!params->quiet)
osnoise_print_stats(params, tool);
- if (!tracefs_trace_is_on(trace->inst))
+ if (trace_is_off(&tool->trace, &record->trace))
break;
- } while (!stop_tracing);
+ }
osnoise_print_stats(params, tool);
return_value = 0;
- if (!tracefs_trace_is_on(trace->inst)) {
+ if (trace_is_off(&tool->trace, &record->trace)) {
printf("osnoise hit stop tracing\n");
if (params->trace_output) {
printf(" Saving trace to %s\n", params->trace_output);
@@ -570,9 +647,12 @@ int osnoise_top_main(int argc, char **argv)
}
out_top:
+ trace_events_destroy(&record->trace, params->events);
+ params->events = NULL;
osnoise_free_top(tool->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(tool);
+ free(params);
out_exit:
exit(return_value);
}
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 436a799f9adf..dc908126c610 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -28,7 +28,9 @@ struct timerlat_hist_params {
int output_divisor;
int duration;
int set_sched;
+ int dma_latency;
struct sched_attr sched_param;
+ struct trace_events *events;
char no_irq;
char no_thread;
@@ -428,11 +430,13 @@ static void timerlat_hist_usage(char *usage)
char *msg[] = {
"",
- " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
- " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
- " [--no-index] [--with-zeros]",
+ " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\",
+ " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\",
+ " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros] [--dma-latency us]",
"",
" -h/--help: print this menu",
+ " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
" -p/--period us: timerlat period in us",
" -i/--irq us: stop trace if the irq latency is higher than the argument in us",
" -T/--thread us: stop trace if the thread latency is higher than the argument in us",
@@ -440,16 +444,20 @@ static void timerlat_hist_usage(char *usage)
" -c/--cpus cpus: run the tracer only on the given cpus",
" -d/--duration time[m|h|d]: duration of the session in seconds",
" -D/--debug: print debug info",
- " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
+ " --filter <filter>: enable a trace event filter to the previous -e event",
+ " --trigger <trigger>: enable a trace event trigger to the previous -e event",
" -n/--nano: display data in nanoseconds",
" -b/--bucket-size N: set the histogram bucket size (default 1)",
- " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " -E/--entries N: set the number of entries of the histogram (default 256)",
" --no-irq: ignore IRQ latencies",
" --no-thread: ignore thread latencies",
" --no-header: do not print header",
" --no-summary: do not print summary",
" --no-index: do not print index",
" --with-zeros: print zero only entries",
+ " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
" -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
" o:prio - use SCHED_OTHER with prio",
" r:prio - use SCHED_RR with prio",
@@ -477,6 +485,8 @@ static struct timerlat_hist_params
*timerlat_hist_parse_args(int argc, char *argv[])
{
struct timerlat_hist_params *params;
+ struct trace_events *tevent;
+ int auto_thresh;
int retval;
int c;
@@ -484,6 +494,9 @@ static struct timerlat_hist_params
if (!params)
exit(1);
+ /* disabled by default */
+ params->dma_latency = -1;
+
/* display data in microseconds */
params->output_divisor = 1000;
params->bucket_size = 1;
@@ -491,10 +504,11 @@ static struct timerlat_hist_params
while (1) {
static struct option long_options[] = {
+ {"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
{"bucket-size", required_argument, 0, 'b'},
{"debug", no_argument, 0, 'D'},
- {"entries", required_argument, 0, 'e'},
+ {"entries", required_argument, 0, 'E'},
{"duration", required_argument, 0, 'd'},
{"help", no_argument, 0, 'h'},
{"irq", required_argument, 0, 'i'},
@@ -504,19 +518,23 @@ static struct timerlat_hist_params
{"stack", required_argument, 0, 's'},
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
+ {"event", required_argument, 0, 'e'},
{"no-irq", no_argument, 0, '0'},
{"no-thread", no_argument, 0, '1'},
{"no-header", no_argument, 0, '2'},
{"no-summary", no_argument, 0, '3'},
{"no-index", no_argument, 0, '4'},
{"with-zeros", no_argument, 0, '5'},
+ {"trigger", required_argument, 0, '6'},
+ {"filter", required_argument, 0, '7'},
+ {"dma-latency", required_argument, 0, '8'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345",
+ c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhi:np:P:s:t::T:0123456:7:8:",
long_options, &option_index);
/* detect the end of the options. */
@@ -524,6 +542,19 @@ static struct timerlat_hist_params
break;
switch (c) {
+ case 'a':
+ auto_thresh = get_llong_from_str(optarg);
+
+ /* set thread stop to auto_thresh */
+ params->stop_total_us = auto_thresh;
+
+ /* get stack trace */
+ params->print_stack = auto_thresh;
+
+ /* set trace */
+ params->trace_output = "timerlat_trace.txt";
+
+ break;
case 'c':
retval = parse_cpu_list(optarg, &params->monitored_cpus);
if (retval)
@@ -544,6 +575,18 @@ static struct timerlat_hist_params
timerlat_hist_usage("Invalid -D duration\n");
break;
case 'e':
+ tevent = trace_event_alloc(optarg);
+ if (!tevent) {
+ err_msg("Error alloc trace event");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->events)
+ tevent->next = params->events;
+
+ params->events = tevent;
+ break;
+ case 'E':
params->entries = get_llong_from_str(optarg);
if ((params->entries < 10) || (params->entries > 9999999))
timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
@@ -600,6 +643,35 @@ static struct timerlat_hist_params
case '5': /* with zeros */
params->with_zeros = 1;
break;
+ case '6': /* trigger */
+ if (params->events) {
+ retval = trace_event_add_trigger(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding trigger %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ timerlat_hist_usage("--trigger requires a previous -e\n");
+ }
+ break;
+ case '7': /* filter */
+ if (params->events) {
+ retval = trace_event_add_filter(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding filter %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ timerlat_hist_usage("--filter requires a previous -e\n");
+ }
+ break;
+ case '8':
+ params->dma_latency = get_llong_from_str(optarg);
+ if (params->dma_latency < 0 || params->dma_latency > 10000) {
+ err_msg("--dma-latency needs to be >= 0 and < 10000");
+ exit(EXIT_FAILURE);
+ }
+ break;
default:
timerlat_hist_usage("Invalid option");
}
@@ -732,6 +804,7 @@ int timerlat_hist_main(int argc, char *argv[])
struct osnoise_tool *record = NULL;
struct osnoise_tool *tool = NULL;
struct trace_instance *trace;
+ int dma_latency_fd = -1;
int return_value = 1;
int retval;
@@ -767,6 +840,14 @@ int timerlat_hist_main(int argc, char *argv[])
}
}
+ if (params->dma_latency >= 0) {
+ dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
+ if (dma_latency_fd < 0) {
+ err_msg("Could not set /dev/cpu_dma_latency.\n");
+ goto out_hist;
+ }
+ }
+
trace_instance_start(trace);
if (params->trace_output) {
@@ -775,6 +856,13 @@ int timerlat_hist_main(int argc, char *argv[])
err_msg("Failed to enable the trace instance\n");
goto out_hist;
}
+
+ if (params->events) {
+ retval = trace_events_enable(&record->trace, params->events);
+ if (retval)
+ goto out_hist;
+ }
+
trace_instance_start(&record->trace);
}
@@ -795,15 +883,15 @@ int timerlat_hist_main(int argc, char *argv[])
goto out_hist;
}
- if (!tracefs_trace_is_on(trace->inst))
+ if (trace_is_off(&tool->trace, &record->trace))
break;
- };
+ }
timerlat_print_stats(params, tool);
return_value = 0;
- if (!tracefs_trace_is_on(trace->inst)) {
+ if (trace_is_off(&tool->trace, &record->trace)) {
printf("rtla timelat hit stop tracing\n");
if (params->trace_output) {
printf(" Saving trace to %s\n", params->trace_output);
@@ -812,6 +900,10 @@ int timerlat_hist_main(int argc, char *argv[])
}
out_hist:
+ if (dma_latency_fd >= 0)
+ close(dma_latency_fd);
+ trace_events_destroy(&record->trace, params->events);
+ params->events = NULL;
timerlat_free_histogram(tool->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(tool);
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index d4187f6534ed..1f754c3df53f 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -29,7 +29,9 @@ struct timerlat_top_params {
int duration;
int quiet;
int set_sched;
+ int dma_latency;
struct sched_attr sched_param;
+ struct trace_events *events;
};
struct timerlat_top_cpu {
@@ -266,10 +268,12 @@ static void timerlat_top_usage(char *usage)
static const char *const msg[] = {
"",
- " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
- " [-c cpu-list] [-P priority]",
+ " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\",
+ " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\",
+ " [-P priority] [--dma-latency us]",
"",
" -h/--help: print this menu",
+ " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
" -p/--period us: timerlat period in us",
" -i/--irq us: stop trace if the irq latency is higher than the argument in us",
" -T/--thread us: stop trace if the thread latency is higher than the argument in us",
@@ -278,8 +282,12 @@ static void timerlat_top_usage(char *usage)
" -d/--duration time[m|h|d]: duration of the session in seconds",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
+ " --filter <command>: enable a trace event filter to the previous -e event",
+ " --trigger <command>: enable a trace event trigger to the previous -e event",
" -n/--nano: display data in nanoseconds",
" -q/--quiet print only a summary at the end",
+ " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
" -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
" o:prio - use SCHED_OTHER with prio",
" r:prio - use SCHED_RR with prio",
@@ -307,6 +315,8 @@ static struct timerlat_top_params
*timerlat_top_parse_args(int argc, char **argv)
{
struct timerlat_top_params *params;
+ struct trace_events *tevent;
+ long long auto_thresh;
int retval;
int c;
@@ -314,14 +324,19 @@ static struct timerlat_top_params
if (!params)
exit(1);
+ /* disabled by default */
+ params->dma_latency = -1;
+
/* display data in microseconds */
params->output_divisor = 1000;
while (1) {
static struct option long_options[] = {
+ {"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
+ {"event", required_argument, 0, 'e'},
{"help", no_argument, 0, 'h'},
{"irq", required_argument, 0, 'i'},
{"nano", no_argument, 0, 'n'},
@@ -331,13 +346,16 @@ static struct timerlat_top_params
{"stack", required_argument, 0, 's'},
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
+ {"trigger", required_argument, 0, '0'},
+ {"filter", required_argument, 0, '1'},
+ {"dma-latency", required_argument, 0, '2'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:",
+ c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:",
long_options, &option_index);
/* detect the end of the options. */
@@ -345,6 +363,19 @@ static struct timerlat_top_params
break;
switch (c) {
+ case 'a':
+ auto_thresh = get_llong_from_str(optarg);
+
+ /* set thread stop to auto_thresh */
+ params->stop_total_us = auto_thresh;
+
+ /* get stack trace */
+ params->print_stack = auto_thresh;
+
+ /* set trace */
+ params->trace_output = "timerlat_trace.txt";
+
+ break;
case 'c':
retval = parse_cpu_list(optarg, &params->monitored_cpus);
if (retval)
@@ -359,6 +390,17 @@ static struct timerlat_top_params
if (!params->duration)
timerlat_top_usage("Invalid -D duration\n");
break;
+ case 'e':
+ tevent = trace_event_alloc(optarg);
+ if (!tevent) {
+ err_msg("Error alloc trace event");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->events)
+ tevent->next = params->events;
+ params->events = tevent;
+ break;
case 'h':
case '?':
timerlat_top_usage(NULL);
@@ -396,6 +438,35 @@ static struct timerlat_top_params
else
params->trace_output = "timerlat_trace.txt";
break;
+ case '0': /* trigger */
+ if (params->events) {
+ retval = trace_event_add_trigger(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding trigger %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ timerlat_top_usage("--trigger requires a previous -e\n");
+ }
+ break;
+ case '1': /* filter */
+ if (params->events) {
+ retval = trace_event_add_filter(params->events, optarg);
+ if (retval) {
+ err_msg("Error adding filter %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ timerlat_top_usage("--filter requires a previous -e\n");
+ }
+ break;
+ case '2': /* dma-latency */
+ params->dma_latency = get_llong_from_str(optarg);
+ if (params->dma_latency < 0 || params->dma_latency > 10000) {
+ err_msg("--dma-latency needs to be >= 0 and < 10000");
+ exit(EXIT_FAILURE);
+ }
+ break;
default:
timerlat_top_usage("Invalid option");
}
@@ -524,6 +595,7 @@ int timerlat_top_main(int argc, char *argv[])
struct osnoise_tool *record = NULL;
struct osnoise_tool *top = NULL;
struct trace_instance *trace;
+ int dma_latency_fd = -1;
int return_value = 1;
int retval;
@@ -559,6 +631,14 @@ int timerlat_top_main(int argc, char *argv[])
}
}
+ if (params->dma_latency >= 0) {
+ dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
+ if (dma_latency_fd < 0) {
+ err_msg("Could not set /dev/cpu_dma_latency.\n");
+ goto out_top;
+ }
+ }
+
trace_instance_start(trace);
if (params->trace_output) {
@@ -567,6 +647,13 @@ int timerlat_top_main(int argc, char *argv[])
err_msg("Failed to enable the trace instance\n");
goto out_top;
}
+
+ if (params->events) {
+ retval = trace_events_enable(&record->trace, params->events);
+ if (retval)
+ goto out_top;
+ }
+
trace_instance_start(&record->trace);
}
@@ -590,16 +677,16 @@ int timerlat_top_main(int argc, char *argv[])
if (!params->quiet)
timerlat_print_stats(params, top);
- if (!tracefs_trace_is_on(trace->inst))
+ if (trace_is_off(&top->trace, &record->trace))
break;
- };
+ }
timerlat_print_stats(params, top);
return_value = 0;
- if (!tracefs_trace_is_on(trace->inst)) {
+ if (trace_is_off(&top->trace, &record->trace)) {
printf("rtla timelat hit stop tracing\n");
if (params->trace_output) {
printf(" Saving trace to %s\n", params->trace_output);
@@ -608,6 +695,10 @@ int timerlat_top_main(int argc, char *argv[])
}
out_top:
+ if (dma_latency_fd >= 0)
+ close(dma_latency_fd);
+ trace_events_destroy(&record->trace, params->events);
+ params->events = NULL;
timerlat_free_top(top->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(top);
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
index 83de259abcc1..5784c9f9e570 100644
--- a/tools/tracing/rtla/src/trace.c
+++ b/tools/tracing/rtla/src/trace.c
@@ -190,3 +190,348 @@ int trace_instance_start(struct trace_instance *trace)
{
return tracefs_trace_on(trace->inst);
}
+
+/*
+ * trace_events_free - free a list of trace events
+ */
+static void trace_events_free(struct trace_events *events)
+{
+ struct trace_events *tevent = events;
+ struct trace_events *free_event;
+
+ while (tevent) {
+ free_event = tevent;
+
+ tevent = tevent->next;
+
+ if (free_event->filter)
+ free(free_event->filter);
+ if (free_event->trigger)
+ free(free_event->trigger);
+ free(free_event->system);
+ free(free_event);
+ }
+}
+
+/*
+ * trace_event_alloc - alloc and parse a single trace event
+ */
+struct trace_events *trace_event_alloc(const char *event_string)
+{
+ struct trace_events *tevent;
+
+ tevent = calloc(1, sizeof(*tevent));
+ if (!tevent)
+ return NULL;
+
+ tevent->system = strdup(event_string);
+ if (!tevent->system) {
+ free(tevent);
+ return NULL;
+ }
+
+ tevent->event = strstr(tevent->system, ":");
+ if (tevent->event) {
+ *tevent->event = '\0';
+ tevent->event = &tevent->event[1];
+ }
+
+ return tevent;
+}
+
+/*
+ * trace_event_add_filter - record an event filter
+ */
+int trace_event_add_filter(struct trace_events *event, char *filter)
+{
+ if (event->filter)
+ free(event->filter);
+
+ event->filter = strdup(filter);
+ if (!event->filter)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * trace_event_add_trigger - record an event trigger action
+ */
+int trace_event_add_trigger(struct trace_events *event, char *trigger)
+{
+ if (event->trigger)
+ free(event->trigger);
+
+ event->trigger = strdup(trigger);
+ if (!event->trigger)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * trace_event_disable_filter - disable an event filter
+ */
+static void trace_event_disable_filter(struct trace_instance *instance,
+ struct trace_events *tevent)
+{
+ char filter[1024];
+ int retval;
+
+ if (!tevent->filter)
+ return;
+
+ if (!tevent->filter_enabled)
+ return;
+
+ debug_msg("Disabling %s:%s filter %s\n", tevent->system,
+ tevent->event ? : "*", tevent->filter);
+
+ snprintf(filter, 1024, "!%s\n", tevent->filter);
+
+ retval = tracefs_event_file_write(instance->inst, tevent->system,
+ tevent->event, "filter", filter);
+ if (retval < 0)
+ err_msg("Error disabling %s:%s filter %s\n", tevent->system,
+ tevent->event ? : "*", tevent->filter);
+}
+
+/*
+ * trace_event_save_hist - save the content of an event hist
+ *
+ * If the trigger is a hist: one, save the content of the hist file.
+ */
+static void trace_event_save_hist(struct trace_instance *instance,
+ struct trace_events *tevent)
+{
+ int retval, index, out_fd;
+ mode_t mode = 0644;
+ char path[1024];
+ char *hist;
+
+ if (!tevent)
+ return;
+
+ /* trigger enables hist */
+ if (!tevent->trigger)
+ return;
+
+ /* is this a hist: trigger? */
+ retval = strncmp(tevent->trigger, "hist:", strlen("hist:"));
+ if (retval)
+ return;
+
+ snprintf(path, 1024, "%s_%s_hist.txt", tevent->system, tevent->event);
+
+ printf(" Saving event %s:%s hist to %s\n", tevent->system, tevent->event, path);
+
+ out_fd = creat(path, mode);
+ if (out_fd < 0) {
+ err_msg(" Failed to create %s output file\n", path);
+ return;
+ }
+
+ hist = tracefs_event_file_read(instance->inst, tevent->system, tevent->event, "hist", 0);
+ if (!hist) {
+ err_msg(" Failed to read %s:%s hist file\n", tevent->system, tevent->event);
+ goto out_close;
+ }
+
+ index = 0;
+ do {
+ index += write(out_fd, &hist[index], strlen(hist) - index);
+ } while (index < strlen(hist));
+
+ free(hist);
+out_close:
+ close(out_fd);
+}
+
+/*
+ * trace_event_disable_trigger - disable an event trigger
+ */
+static void trace_event_disable_trigger(struct trace_instance *instance,
+ struct trace_events *tevent)
+{
+ char trigger[1024];
+ int retval;
+
+ if (!tevent->trigger)
+ return;
+
+ if (!tevent->trigger_enabled)
+ return;
+
+ debug_msg("Disabling %s:%s trigger %s\n", tevent->system,
+ tevent->event ? : "*", tevent->trigger);
+
+ trace_event_save_hist(instance, tevent);
+
+ snprintf(trigger, 1024, "!%s\n", tevent->trigger);
+
+ retval = tracefs_event_file_write(instance->inst, tevent->system,
+ tevent->event, "trigger", trigger);
+ if (retval < 0)
+ err_msg("Error disabling %s:%s trigger %s\n", tevent->system,
+ tevent->event ? : "*", tevent->trigger);
+}
+
+/*
+ * trace_events_disable - disable all trace events
+ */
+void trace_events_disable(struct trace_instance *instance,
+ struct trace_events *events)
+{
+ struct trace_events *tevent = events;
+
+ if (!events)
+ return;
+
+ while (tevent) {
+ debug_msg("Disabling event %s:%s\n", tevent->system, tevent->event ? : "*");
+ if (tevent->enabled) {
+ trace_event_disable_filter(instance, tevent);
+ trace_event_disable_trigger(instance, tevent);
+ tracefs_event_disable(instance->inst, tevent->system, tevent->event);
+ }
+
+ tevent->enabled = 0;
+ tevent = tevent->next;
+ }
+}
+
+/*
+ * trace_event_enable_filter - enable an event filter associated with an event
+ */
+static int trace_event_enable_filter(struct trace_instance *instance,
+ struct trace_events *tevent)
+{
+ char filter[1024];
+ int retval;
+
+ if (!tevent->filter)
+ return 0;
+
+ if (!tevent->event) {
+ err_msg("Filter %s applies only for single events, not for all %s:* events\n",
+ tevent->filter, tevent->system);
+ return 1;
+ }
+
+ snprintf(filter, 1024, "%s\n", tevent->filter);
+
+ debug_msg("Enabling %s:%s filter %s\n", tevent->system,
+ tevent->event ? : "*", tevent->filter);
+
+ retval = tracefs_event_file_write(instance->inst, tevent->system,
+ tevent->event, "filter", filter);
+ if (retval < 0) {
+ err_msg("Error enabling %s:%s filter %s\n", tevent->system,
+ tevent->event ? : "*", tevent->filter);
+ return 1;
+ }
+
+ tevent->filter_enabled = 1;
+ return 0;
+}
+
+/*
+ * trace_event_enable_trigger - enable an event trigger associated with an event
+ */
+static int trace_event_enable_trigger(struct trace_instance *instance,
+ struct trace_events *tevent)
+{
+ char trigger[1024];
+ int retval;
+
+ if (!tevent->trigger)
+ return 0;
+
+ if (!tevent->event) {
+ err_msg("Trigger %s applies only for single events, not for all %s:* events\n",
+ tevent->trigger, tevent->system);
+ return 1;
+ }
+
+ snprintf(trigger, 1024, "%s\n", tevent->trigger);
+
+ debug_msg("Enabling %s:%s trigger %s\n", tevent->system,
+ tevent->event ? : "*", tevent->trigger);
+
+ retval = tracefs_event_file_write(instance->inst, tevent->system,
+ tevent->event, "trigger", trigger);
+ if (retval < 0) {
+ err_msg("Error enabling %s:%s trigger %s\n", tevent->system,
+ tevent->event ? : "*", tevent->trigger);
+ return 1;
+ }
+
+ tevent->trigger_enabled = 1;
+
+ return 0;
+}
+
+/*
+ * trace_events_enable - enable all events
+ */
+int trace_events_enable(struct trace_instance *instance,
+ struct trace_events *events)
+{
+ struct trace_events *tevent = events;
+ int retval;
+
+ while (tevent) {
+ debug_msg("Enabling event %s:%s\n", tevent->system, tevent->event ? : "*");
+ retval = tracefs_event_enable(instance->inst, tevent->system, tevent->event);
+ if (retval < 0) {
+ err_msg("Error enabling event %s:%s\n", tevent->system,
+ tevent->event ? : "*");
+ return 1;
+ }
+
+ retval = trace_event_enable_filter(instance, tevent);
+ if (retval)
+ return 1;
+
+ retval = trace_event_enable_trigger(instance, tevent);
+ if (retval)
+ return 1;
+
+ tevent->enabled = 1;
+ tevent = tevent->next;
+ }
+
+ return 0;
+}
+
+/*
+ * trace_events_destroy - disable and free all trace events
+ */
+void trace_events_destroy(struct trace_instance *instance,
+ struct trace_events *events)
+{
+ if (!events)
+ return;
+
+ trace_events_disable(instance, events);
+ trace_events_free(events);
+}
+
+int trace_is_off(struct trace_instance *tool, struct trace_instance *trace)
+{
+ /*
+ * The tool instance is always present, it is the one used to collect
+ * data.
+ */
+ if (!tracefs_trace_is_on(tool->inst))
+ return 1;
+
+ /*
+ * The trace instance is only enabled when -t is set. IOW, when the system
+ * is tracing.
+ */
+ if (trace && !tracefs_trace_is_on(trace->inst))
+ return 1;
+
+ return 0;
+}
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
index 0ea1df0ad9a7..2e9a89a25615 100644
--- a/tools/tracing/rtla/src/trace.h
+++ b/tools/tracing/rtla/src/trace.h
@@ -2,6 +2,17 @@
#include <tracefs.h>
#include <stddef.h>
+struct trace_events {
+ struct trace_events *next;
+ char *system;
+ char *event;
+ char *filter;
+ char *trigger;
+ char enabled;
+ char filter_enabled;
+ char trigger_enabled;
+};
+
struct trace_instance {
struct tracefs_instance *inst;
struct tep_handle *tep;
@@ -25,3 +36,15 @@ void destroy_instance(struct tracefs_instance *inst);
int save_trace_to_file(struct tracefs_instance *inst, const char *filename);
int collect_registered_events(struct tep_event *tep, struct tep_record *record,
int cpu, void *context);
+
+struct trace_events *trace_event_alloc(const char *event_string);
+void trace_events_disable(struct trace_instance *instance,
+ struct trace_events *events);
+void trace_events_destroy(struct trace_instance *instance,
+ struct trace_events *events);
+int trace_events_enable(struct trace_instance *instance,
+ struct trace_events *events);
+
+int trace_event_add_filter(struct trace_events *event, char *filter);
+int trace_event_add_trigger(struct trace_events *event, char *trigger);
+int trace_is_off(struct trace_instance *tool, struct trace_instance *trace);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index ffaf8ec84001..da2b590edaed 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
+#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -431,3 +432,35 @@ int parse_prio(char *arg, struct sched_attr *sched_param)
}
return 0;
}
+
+/*
+ * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
+ *
+ * This is used to reduce the exit from idle latency. The value
+ * will be reset once the file descriptor of /dev/cpu_dma_latecy
+ * is closed.
+ *
+ * Return: the /dev/cpu_dma_latecy file descriptor
+ */
+int set_cpu_dma_latency(int32_t latency)
+{
+ int retval;
+ int fd;
+
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ if (fd < 0) {
+ err_msg("Error opening /dev/cpu_dma_latency\n");
+ return -1;
+ }
+
+ retval = write(fd, &latency, 4);
+ if (retval < 1) {
+ err_msg("Error setting /dev/cpu_dma_latency\n");
+ close(fd);
+ return -1;
+ }
+
+ debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
+
+ return fd;
+}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index 9aa962319ca2..fa08e374870a 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -54,3 +54,4 @@ struct sched_attr {
int parse_prio(char *arg, struct sched_attr *sched_param);
int set_comm_sched_attr(const char *comm, struct sched_attr *attr);
+int set_cpu_dma_latency(int32_t latency);
diff --git a/tools/virtio/linux/mm_types.h b/tools/virtio/linux/mm_types.h
new file mode 100644
index 000000000000..356ba4d496f6
--- /dev/null
+++ b/tools/virtio/linux/mm_types.h
@@ -0,0 +1,3 @@
+struct folio {
+ struct page page;
+};
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index cb3f29c09aff..23f142af544a 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
memset(dev, 0, sizeof *dev);
dev->vdev.features = features;
INIT_LIST_HEAD(&dev->vdev.vqs);
+ spin_lock_init(&dev->vdev.vqs_list_lock);
dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size);
assert(dev->buf);
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 7be7468c177b..83822c33e9e7 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -28,6 +28,7 @@ no-header-test += linux/am437x-vpfe.h
no-header-test += linux/android/binder.h
no-header-test += linux/android/binderfs.h
no-header-test += linux/coda.h
+no-header-test += linux/cyclades.h
no-header-test += linux/errqueue.h
no-header-test += linux/fsmap.h
no-header-test += linux/hdlc/ioctl.h
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2ad013b8bde9..59b1dd4a549e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
idx = srcu_read_lock(&kvm->irq_srcu);
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
+ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
+ link, srcu_read_lock_held(&kvm->irq_srcu))
if (kian->gsi == gsi) {
srcu_read_unlock(&kvm->irq_srcu, idx);
return true;
@@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
{
struct kvm_irq_ack_notifier *kian;
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
+ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
+ link, srcu_read_lock_held(&kvm->irq_srcu))
if (kian->gsi == gsi)
kian->irq_acked(kian);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9a20f2299386..0afc016cc54d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2248,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
return NULL;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
@@ -2463,9 +2462,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
}
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
- unsigned long addr, bool *async,
- bool write_fault, bool *writable,
- kvm_pfn_t *p_pfn)
+ unsigned long addr, bool write_fault,
+ bool *writable, kvm_pfn_t *p_pfn)
{
kvm_pfn_t pfn;
pte_t *ptep;
@@ -2575,7 +2573,7 @@ retry:
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
- r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
+ r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
if (r == -EAGAIN)
goto retry;
if (r < 0)
@@ -5530,9 +5528,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
if (kvm_usage_count) {
-#ifdef CONFIG_LOCKDEP
- WARN_ON(lockdep_is_held(&kvm_count_lock));
-#endif
+ lockdep_assert_not_held(&kvm_count_lock);
hardware_enable_nolock(NULL);
}
}